source: titan/mediathek/localhoster/lib/helpers.py @ 39354

Last change on this file since 39354 was 39354, checked in by obi, 7 years ago

fix vidzi

File size: 8.0 KB
Line 
1"""
2    URLResolver Addon for Kodi
3    Copyright (C) 2016 t0mm0, tknorris
4
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17"""
18import re
19import urllib
20#import xbmcgui
21import jsunpack
22from urlparse import urlparse
23#from urlresolver import common
24#from urlresolver.resolver import ResolverError
25import common
26from net import Net
27
28def get_hidden(html, form_id=None, index=None, include_submit=True):
29    hidden = {}
30    if form_id:
31        pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
32    else:
33        pattern = '''<form[^>]*>(.*?)</form>'''
34   
35    for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
36        if match.group(1) != '//': html = html.replace(match.group(0), '')
37       
38    for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
39        if index is None or i == index:
40            for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
41                match = re.search('''name\s*=\s*['"]([^'"]+)''', field.group(0))
42                match1 = re.search('''value\s*=\s*['"]([^'"]*)''', field.group(0))
43                if match and match1:
44                    hidden[match.group(1)] = match1.group(1)
45           
46            if include_submit:
47                match = re.search('''<input [^>]*type=['"]?submit['"]?[^>]*>''', form.group(1))
48                if match:
49                    name = re.search('''name\s*=\s*['"]([^'"]+)''', match.group(0))
50                    value = re.search('''value\s*=\s*['"]([^'"]*)''', match.group(0))
51                    if name and value:
52                        hidden[name.group(1)] = value.group(1)
53           
54#    common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
55    return hidden
56
57def pick_source(sources, auto_pick=None):
58#    if auto_pick is None:
59#        auto_pick = common.get_setting('auto_pick') == 'true'
60       
61    if len(sources) == 1:
62        return sources[0][1]
63    elif len(sources) > 1:
64        if auto_pick:
65            return sources[0][1]
66#       else:
67#            result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
68#            if result == -1:
69#                raise ResolverError('No link selected')
70#            else:
71#                return sources[result][1]
72    else:
73        raise ResolverError('No Video Link Found')
74
75def append_headers(headers):
76    return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
77
78def add_packed_data(html):
79    for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
80        try:
81            js_data = jsunpack.unpack(match.group(1))
82            js_data = js_data.replace('\\', '')
83            html += js_data
84        except:
85            pass
86       
87    return html
88
89def parse_sources_list(html):
90    sources = []
91    match = re.search('''['"]?sources['"]?\s*:\s*\[(.*?)\]''', html, re.DOTALL)
92    if match:
93        sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)]
94    return sources
95
96def parse_html5_source_list(html):
97    label_attrib = 'type' if not re.search('''<source\s+src\s*=.*?data-res\s*=.*?/\s*>''', html) else 'data-res'
98    sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''<source\s+src\s*=\s*['"]([^'"]+)['"](?:.*?''' + label_attrib + '''\s*=\s*['"](?:video/)?([^'"]+)['"])''', html, re.DOTALL)]
99    return sources
100
101def parse_smil_source_list(smil):
102    sources = []
103    base = re.search('base\s*=\s*"([^"]+)', smil).groups()[0]
104    for i in re.finditer('src\s*=\s*"([^"]+)(?:"\s*(?:width|height)\s*=\s*"([^"]+))?', smil):
105        label = 'Unknown'
106        if (len(i.groups()) > 1) and (i.group(2) is not None):
107            label = i.group(2)
108        sources += [(label, '%s playpath=%s' % (base, i.group(1)))]
109    return sources
110
111def scrape_sources(html, result_blacklist=None):
112    def __parse_to_list(_html, regex):
113        _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
114        _blacklist = set(_blacklist + result_blacklist)
115        streams = []
116        labels = []
117        for r in re.finditer(regex, _html, re.DOTALL):
118            match = r.groupdict()
119            stream_url = match['url']
120            file_name = urlparse(stream_url).path.split('/')[-1]
121            blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
122            if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
123                continue
124   
125            label = match.get('label', file_name)
126            if label is None: label = file_name
127            labels.append(label)
128            streams.append(stream_url)
129           
130        matches = zip(labels, streams)
131#        if matches:
132#            common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
133        return matches
134
135    if result_blacklist is None:
136        result_blacklist = []
137    elif isinstance(result_blacklist, str):
138        result_blacklist = [result_blacklist]
139       
140    html = add_packed_data(html)
141
142    source_list = []
143    source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
144    source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
145    source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
146    source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
147    source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
148
149    if len(source_list) > 1:
150        try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
151        except:
152            test = 1
153#            common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
154            try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
155            except:
156                 test = 2
157#                common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
158
159    return source_list
160
161
162def get_media_url(url, result_blacklist=None):
163    if result_blacklist is None:
164        result_blacklist = []
165    elif isinstance(result_blacklist, str):
166        result_blacklist = [result_blacklist]
167
168    result_blacklist = list(set(result_blacklist + ['.smil']))  # smil(not playable) contains potential sources, only blacklist when called from here
169    net = Net()
170    parsed_url = urlparse(url)
171    headers = {'User-Agent': common.FF_USER_AGENT,
172               'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
173
174    response = net.http_GET(url, headers=headers)
175#    response_headers = response.get_headers(as_dict=True)
176    response_headers = response.get_headers()
177
178    headers.update({'Referer': url})
179#    cookie = response_headers.get('Set-Cookie', None)
180#    if cookie:
181#        headers.update({'Cookie': cookie})
182    html = response.content
183
184    source_list = scrape_sources(html, result_blacklist)
185    source = pick_source(source_list)
186    return source# + append_headers(headers)
Note: See TracBrowser for help on using the repository browser.