1 | """
|
---|
2 | URLResolver Addon for Kodi
|
---|
3 | Copyright (C) 2016 t0mm0, tknorris
|
---|
4 |
|
---|
5 | This program is free software: you can redistribute it and/or modify
|
---|
6 | it under the terms of the GNU General Public License as published by
|
---|
7 | the Free Software Foundation, either version 3 of the License, or
|
---|
8 | (at your option) any later version.
|
---|
9 |
|
---|
10 | This program is distributed in the hope that it will be useful,
|
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | GNU General Public License for more details.
|
---|
14 |
|
---|
15 | You should have received a copy of the GNU General Public License
|
---|
16 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
17 | """
|
---|
18 | import re
|
---|
19 | import urllib
|
---|
20 | #import xbmcgui
|
---|
21 | import jsunpack
|
---|
22 | from urlparse import urlparse
|
---|
23 | #from urlresolver import common
|
---|
24 | #from urlresolver.resolver import ResolverError
|
---|
25 | import common
|
---|
26 |
|
---|
27 | def get_hidden(html, form_id=None, index=None, include_submit=True):
|
---|
28 | hidden = {}
|
---|
29 | if form_id:
|
---|
30 | pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
|
---|
31 | else:
|
---|
32 | pattern = '''<form[^>]*>(.*?)</form>'''
|
---|
33 |
|
---|
34 | for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
|
---|
35 | if match.group(1) != '//': html = html.replace(match.group(0), '')
|
---|
36 |
|
---|
37 | for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
|
---|
38 | if index is None or i == index:
|
---|
39 | for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
|
---|
40 | match = re.search('''name\s*=\s*['"]([^'"]+)''', field.group(0))
|
---|
41 | match1 = re.search('''value\s*=\s*['"]([^'"]*)''', field.group(0))
|
---|
42 | if match and match1:
|
---|
43 | hidden[match.group(1)] = match1.group(1)
|
---|
44 |
|
---|
45 | if include_submit:
|
---|
46 | match = re.search('''<input [^>]*type=['"]?submit['"]?[^>]*>''', form.group(1))
|
---|
47 | if match:
|
---|
48 | name = re.search('''name\s*=\s*['"]([^'"]+)''', match.group(0))
|
---|
49 | value = re.search('''value\s*=\s*['"]([^'"]*)''', match.group(0))
|
---|
50 | if name and value:
|
---|
51 | hidden[name.group(1)] = value.group(1)
|
---|
52 |
|
---|
53 | # common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
|
---|
54 | return hidden
|
---|
55 |
|
---|
56 | def pick_source(sources, auto_pick=None):
|
---|
57 | # if auto_pick is None:
|
---|
58 | # auto_pick = common.get_setting('auto_pick') == 'true'
|
---|
59 |
|
---|
60 | if len(sources) == 1:
|
---|
61 | return sources[0][1]
|
---|
62 | elif len(sources) > 1:
|
---|
63 | if auto_pick:
|
---|
64 | return sources[0][1]
|
---|
65 | # else:
|
---|
66 | # result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
|
---|
67 | # if result == -1:
|
---|
68 | # raise ResolverError('No link selected')
|
---|
69 | # else:
|
---|
70 | # return sources[result][1]
|
---|
71 | else:
|
---|
72 | raise ResolverError('No Video Link Found')
|
---|
73 |
|
---|
74 | def append_headers(headers):
|
---|
75 | return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
|
---|
76 |
|
---|
77 | def add_packed_data(html):
|
---|
78 | for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
|
---|
79 | try:
|
---|
80 | js_data = jsunpack.unpack(match.group(1))
|
---|
81 | js_data = js_data.replace('\\', '')
|
---|
82 | html += js_data
|
---|
83 | except:
|
---|
84 | pass
|
---|
85 |
|
---|
86 | return html
|
---|
87 |
|
---|
88 | def parse_sources_list(html):
|
---|
89 | sources = []
|
---|
90 | match = re.search('''['"]?sources['"]?\s*:\s*\[(.*?)\]''', html, re.DOTALL)
|
---|
91 | if match:
|
---|
92 | sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)]
|
---|
93 | return sources
|
---|
94 |
|
---|
95 | def parse_html5_source_list(html):
|
---|
96 | label_attrib = 'type' if not re.search('''<source\s+src\s*=.*?data-res\s*=.*?/\s*>''', html) else 'data-res'
|
---|
97 | sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''<source\s+src\s*=\s*['"]([^'"]+)['"](?:.*?''' + label_attrib + '''\s*=\s*['"](?:video/)?([^'"]+)['"])''', html, re.DOTALL)]
|
---|
98 | return sources
|
---|
99 |
|
---|
100 | def parse_smil_source_list(smil):
|
---|
101 | sources = []
|
---|
102 | base = re.search('base\s*=\s*"([^"]+)', smil).groups()[0]
|
---|
103 | for i in re.finditer('src\s*=\s*"([^"]+)(?:"\s*(?:width|height)\s*=\s*"([^"]+))?', smil):
|
---|
104 | label = 'Unknown'
|
---|
105 | if (len(i.groups()) > 1) and (i.group(2) is not None):
|
---|
106 | label = i.group(2)
|
---|
107 | sources += [(label, '%s playpath=%s' % (base, i.group(1)))]
|
---|
108 | return sources
|
---|
109 |
|
---|
110 | def scrape_sources(html, result_blacklist=None):
|
---|
111 | def __parse_to_list(_html, regex):
|
---|
112 | _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
|
---|
113 | _blacklist = set(_blacklist + result_blacklist)
|
---|
114 | streams = []
|
---|
115 | labels = []
|
---|
116 | for r in re.finditer(regex, _html, re.DOTALL):
|
---|
117 | match = r.groupdict()
|
---|
118 | stream_url = match['url']
|
---|
119 | file_name = urlparse(stream_url).path.split('/')[-1]
|
---|
120 | blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
|
---|
121 | if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
|
---|
122 | continue
|
---|
123 |
|
---|
124 | label = match.get('label', file_name)
|
---|
125 | if label is None: label = file_name
|
---|
126 | labels.append(label)
|
---|
127 | streams.append(stream_url)
|
---|
128 |
|
---|
129 | matches = zip(labels, streams)
|
---|
130 | # if matches:
|
---|
131 | # common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
|
---|
132 | return matches
|
---|
133 |
|
---|
134 | if result_blacklist is None:
|
---|
135 | result_blacklist = []
|
---|
136 | elif isinstance(result_blacklist, str):
|
---|
137 | result_blacklist = [result_blacklist]
|
---|
138 |
|
---|
139 | html = add_packed_data(html)
|
---|
140 |
|
---|
141 | source_list = []
|
---|
142 | source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
|
---|
143 | source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
|
---|
144 | source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
|
---|
145 | source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
|
---|
146 | source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
|
---|
147 |
|
---|
148 | if len(source_list) > 1:
|
---|
149 | try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
|
---|
150 | except:
|
---|
151 | common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
|
---|
152 | try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
|
---|
153 | except:
|
---|
154 | common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
|
---|
155 |
|
---|
156 | return source_list
|
---|
157 |
|
---|
158 |
|
---|
159 | def get_media_url(url, result_blacklist=None):
|
---|
160 | if result_blacklist is None:
|
---|
161 | result_blacklist = []
|
---|
162 | elif isinstance(result_blacklist, str):
|
---|
163 | result_blacklist = [result_blacklist]
|
---|
164 |
|
---|
165 | result_blacklist = list(set(result_blacklist + ['.smil'])) # smil(not playable) contains potential sources, only blacklist when called from here
|
---|
166 | net = common.Net()
|
---|
167 | parsed_url = urlparse(url)
|
---|
168 | headers = {'User-Agent': common.FF_USER_AGENT,
|
---|
169 | 'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
|
---|
170 |
|
---|
171 | response = net.http_GET(url, headers=headers)
|
---|
172 | response_headers = response.get_headers(as_dict=True)
|
---|
173 | headers.update({'Referer': url})
|
---|
174 | cookie = response_headers.get('Set-Cookie', None)
|
---|
175 | if cookie:
|
---|
176 | headers.update({'Cookie': cookie})
|
---|
177 | html = response.content
|
---|
178 |
|
---|
179 | source_list = scrape_sources(html, result_blacklist)
|
---|
180 | source = pick_source(source_list)
|
---|
181 | return source + append_headers(headers)
|
---|