source: titan/mediathek/localhoster/lib/helpers.py @ 39352

Last change on this file since 39352 was 39352, checked in by obi, 7 years ago

fix flashx.py

File size: 7.8 KB
Line 
1"""
2    URLResolver Addon for Kodi
3    Copyright (C) 2016 t0mm0, tknorris
4
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17"""
18import re
19import urllib
20#import xbmcgui
21import jsunpack
22from urlparse import urlparse
23#from urlresolver import common
24#from urlresolver.resolver import ResolverError
25import common
26
27def get_hidden(html, form_id=None, index=None, include_submit=True):
28    hidden = {}
29    if form_id:
30        pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
31    else:
32        pattern = '''<form[^>]*>(.*?)</form>'''
33   
34    for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
35        if match.group(1) != '//': html = html.replace(match.group(0), '')
36       
37    for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
38        if index is None or i == index:
39            for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
40                match = re.search('''name\s*=\s*['"]([^'"]+)''', field.group(0))
41                match1 = re.search('''value\s*=\s*['"]([^'"]*)''', field.group(0))
42                if match and match1:
43                    hidden[match.group(1)] = match1.group(1)
44           
45            if include_submit:
46                match = re.search('''<input [^>]*type=['"]?submit['"]?[^>]*>''', form.group(1))
47                if match:
48                    name = re.search('''name\s*=\s*['"]([^'"]+)''', match.group(0))
49                    value = re.search('''value\s*=\s*['"]([^'"]*)''', match.group(0))
50                    if name and value:
51                        hidden[name.group(1)] = value.group(1)
52           
53#    common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
54    return hidden
55
56def pick_source(sources, auto_pick=None):
57#    if auto_pick is None:
58#        auto_pick = common.get_setting('auto_pick') == 'true'
59       
60    if len(sources) == 1:
61        return sources[0][1]
62    elif len(sources) > 1:
63        if auto_pick:
64            return sources[0][1]
65#       else:
66#            result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
67#            if result == -1:
68#                raise ResolverError('No link selected')
69#            else:
70#                return sources[result][1]
71    else:
72        raise ResolverError('No Video Link Found')
73
74def append_headers(headers):
75    return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
76
77def add_packed_data(html):
78    for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
79        try:
80            js_data = jsunpack.unpack(match.group(1))
81            js_data = js_data.replace('\\', '')
82            html += js_data
83        except:
84            pass
85       
86    return html
87
88def parse_sources_list(html):
89    sources = []
90    match = re.search('''['"]?sources['"]?\s*:\s*\[(.*?)\]''', html, re.DOTALL)
91    if match:
92        sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)]
93    return sources
94
95def parse_html5_source_list(html):
96    label_attrib = 'type' if not re.search('''<source\s+src\s*=.*?data-res\s*=.*?/\s*>''', html) else 'data-res'
97    sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''<source\s+src\s*=\s*['"]([^'"]+)['"](?:.*?''' + label_attrib + '''\s*=\s*['"](?:video/)?([^'"]+)['"])''', html, re.DOTALL)]
98    return sources
99
100def parse_smil_source_list(smil):
101    sources = []
102    base = re.search('base\s*=\s*"([^"]+)', smil).groups()[0]
103    for i in re.finditer('src\s*=\s*"([^"]+)(?:"\s*(?:width|height)\s*=\s*"([^"]+))?', smil):
104        label = 'Unknown'
105        if (len(i.groups()) > 1) and (i.group(2) is not None):
106            label = i.group(2)
107        sources += [(label, '%s playpath=%s' % (base, i.group(1)))]
108    return sources
109
110def scrape_sources(html, result_blacklist=None):
111    def __parse_to_list(_html, regex):
112        _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
113        _blacklist = set(_blacklist + result_blacklist)
114        streams = []
115        labels = []
116        for r in re.finditer(regex, _html, re.DOTALL):
117            match = r.groupdict()
118            stream_url = match['url']
119            file_name = urlparse(stream_url).path.split('/')[-1]
120            blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
121            if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
122                continue
123   
124            label = match.get('label', file_name)
125            if label is None: label = file_name
126            labels.append(label)
127            streams.append(stream_url)
128           
129        matches = zip(labels, streams)
130#        if matches:
131#            common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
132        return matches
133
134    if result_blacklist is None:
135        result_blacklist = []
136    elif isinstance(result_blacklist, str):
137        result_blacklist = [result_blacklist]
138       
139    html = add_packed_data(html)
140
141    source_list = []
142    source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
143    source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
144    source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
145    source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
146    source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
147
148    if len(source_list) > 1:
149        try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
150        except:
151            common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
152            try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
153            except:
154                common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
155
156    return source_list
157
158
159def get_media_url(url, result_blacklist=None):
160    if result_blacklist is None:
161        result_blacklist = []
162    elif isinstance(result_blacklist, str):
163        result_blacklist = [result_blacklist]
164
165    result_blacklist = list(set(result_blacklist + ['.smil']))  # smil(not playable) contains potential sources, only blacklist when called from here
166    net = common.Net()
167    parsed_url = urlparse(url)
168    headers = {'User-Agent': common.FF_USER_AGENT,
169               'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
170
171    response = net.http_GET(url, headers=headers)
172    response_headers = response.get_headers(as_dict=True)
173    headers.update({'Referer': url})
174    cookie = response_headers.get('Set-Cookie', None)
175    if cookie:
176        headers.update({'Cookie': cookie})
177    html = response.content
178
179    source_list = scrape_sources(html, result_blacklist)
180    source = pick_source(source_list)
181    return source + append_headers(headers)
Note: See TracBrowser for help on using the repository browser.