source: titan/mediathek/localhoster/lib/helpers.py @ 39358

Last change on this file since 39358 was 39358, checked in by obi, 7 years ago

add vidto.py

File size: 8.0 KB
Line 
1"""
2    URLResolver Addon for Kodi
3    Copyright (C) 2016 t0mm0, tknorris
4
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17"""
18import re
19import urllib
20#import xbmcgui
21import jsunpack
22from urlparse import urlparse
23#from urlresolver import common
24#from urlresolver.resolver import ResolverError
25import common
26from net import Net
27
28def get_hidden(html, form_id=None, index=None, include_submit=True):
29    hidden = {}
30    if form_id:
31        pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
32    else:
33        pattern = '''<form[^>]*>(.*?)</form>'''
34   
35    for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
36        if match.group(1) != '//': html = html.replace(match.group(0), '')
37       
38    for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
39        if index is None or i == index:
40            for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
41                match = re.search('''name\s*=\s*['"]([^'"]+)''', field.group(0))
42                match1 = re.search('''value\s*=\s*['"]([^'"]*)''', field.group(0))
43                if match and match1:
44                    hidden[match.group(1)] = match1.group(1)
45           
46            if include_submit:
47                match = re.search('''<input [^>]*type=['"]?submit['"]?[^>]*>''', form.group(1))
48                if match:
49                    name = re.search('''name\s*=\s*['"]([^'"]+)''', match.group(0))
50                    value = re.search('''value\s*=\s*['"]([^'"]*)''', match.group(0))
51                    if name and value:
52                        hidden[name.group(1)] = value.group(1)
53           
54#    common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
55    return hidden
56
57def pick_source(sources, auto_pick=None):
58#    if auto_pick is None:
59#        auto_pick = common.get_setting('auto_pick') == 'true'
60       
61    if len(sources) == 1:
62        return sources[0][1]
63    elif len(sources) > 1:
64        if auto_pick:
65            return sources[0][1]
66        else:
67            return sources[0][1]
68
69#            result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
70#            if result == -1:
71#                raise ResolverError('No link selected')
72#            else:
73#                return sources[result][1]
74#    else:
75#        raise ResolverError('No Video Link Found')
76
77def append_headers(headers):
78    return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
79
80def add_packed_data(html):
81    for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
82        try:
83            js_data = jsunpack.unpack(match.group(1))
84            js_data = js_data.replace('\\', '')
85            html += js_data
86        except:
87            pass
88       
89    return html
90
91def parse_sources_list(html):
92    sources = []
93    match = re.search('''['"]?sources['"]?\s*:\s*\[(.*?)\]''', html, re.DOTALL)
94    if match:
95        sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)]
96    return sources
97
98def parse_html5_source_list(html):
99    label_attrib = 'type' if not re.search('''<source\s+src\s*=.*?data-res\s*=.*?/\s*>''', html) else 'data-res'
100    sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''<source\s+src\s*=\s*['"]([^'"]+)['"](?:.*?''' + label_attrib + '''\s*=\s*['"](?:video/)?([^'"]+)['"])''', html, re.DOTALL)]
101    return sources
102
103def parse_smil_source_list(smil):
104    sources = []
105    base = re.search('base\s*=\s*"([^"]+)', smil).groups()[0]
106    for i in re.finditer('src\s*=\s*"([^"]+)(?:"\s*(?:width|height)\s*=\s*"([^"]+))?', smil):
107        label = 'Unknown'
108        if (len(i.groups()) > 1) and (i.group(2) is not None):
109            label = i.group(2)
110        sources += [(label, '%s playpath=%s' % (base, i.group(1)))]
111    return sources
112
113def scrape_sources(html, result_blacklist=None):
114    def __parse_to_list(_html, regex):
115        _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
116        _blacklist = set(_blacklist + result_blacklist)
117        streams = []
118        labels = []
119        for r in re.finditer(regex, _html, re.DOTALL):
120            match = r.groupdict()
121            stream_url = match['url']
122            file_name = urlparse(stream_url).path.split('/')[-1]
123            blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
124            if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
125                continue
126   
127            label = match.get('label', file_name)
128            if label is None: label = file_name
129            labels.append(label)
130            streams.append(stream_url)
131           
132        matches = zip(labels, streams)
133#        if matches:
134#            common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
135        return matches
136
137    if result_blacklist is None:
138        result_blacklist = []
139    elif isinstance(result_blacklist, str):
140        result_blacklist = [result_blacklist]
141       
142    html = add_packed_data(html)
143
144    source_list = []
145    source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
146    source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
147    source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
148    source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
149    source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
150
151    if len(source_list) > 1:
152        try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
153        except:
154            test = 1
155#            common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
156            try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
157            except:
158                 test = 2
159#                common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
160
161    return source_list
162
163
164def get_media_url(url, result_blacklist=None):
165    if result_blacklist is None:
166        result_blacklist = []
167    elif isinstance(result_blacklist, str):
168        result_blacklist = [result_blacklist]
169
170    result_blacklist = list(set(result_blacklist + ['.smil']))  # smil(not playable) contains potential sources, only blacklist when called from here
171    net = Net()
172    parsed_url = urlparse(url)
173    headers = {'User-Agent': common.FF_USER_AGENT,
174               'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
175
176    response = net.http_GET(url, headers=headers)
177    response_headers = response.get_headers(as_dict=True)
178    response_headers = response.get_headers()
179
180    headers.update({'Referer': url})
181    cookie = response_headers.get('Set-Cookie', None)
182    if cookie:
183        headers.update({'Cookie': cookie})
184    html = response.content
185
186    source_list = scrape_sources(html, result_blacklist)
187    source = pick_source(source_list)
188    return source + append_headers(headers)
189#    return source
Note: See TracBrowser for help on using the repository browser.