1 | """
|
---|
2 | URLResolver Addon for Kodi
|
---|
3 | Copyright (C) 2016 t0mm0, tknorris
|
---|
4 |
|
---|
5 | This program is free software: you can redistribute it and/or modify
|
---|
6 | it under the terms of the GNU General Public License as published by
|
---|
7 | the Free Software Foundation, either version 3 of the License, or
|
---|
8 | (at your option) any later version.
|
---|
9 |
|
---|
10 | This program is distributed in the hope that it will be useful,
|
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | GNU General Public License for more details.
|
---|
14 |
|
---|
15 | You should have received a copy of the GNU General Public License
|
---|
16 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
17 | """
|
---|
18 | import re
|
---|
19 | import urllib
|
---|
20 | #import xbmcgui
|
---|
21 | import jsunpack
|
---|
22 | from urlparse import urlparse
|
---|
23 | #from urlresolver import common
|
---|
24 | #from urlresolver.resolver import ResolverError
|
---|
25 | import common
|
---|
26 | from net import Net
|
---|
27 |
|
---|
28 | def get_hidden(html, form_id=None, index=None, include_submit=True):
|
---|
29 | hidden = {}
|
---|
30 | if form_id:
|
---|
31 | pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
|
---|
32 | else:
|
---|
33 | pattern = '''<form[^>]*>(.*?)</form>'''
|
---|
34 |
|
---|
35 | for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
|
---|
36 | if match.group(1) != '//': html = html.replace(match.group(0), '')
|
---|
37 |
|
---|
38 | for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
|
---|
39 | if index is None or i == index:
|
---|
40 | for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
|
---|
41 | match = re.search('''name\s*=\s*['"]([^'"]+)''', field.group(0))
|
---|
42 | match1 = re.search('''value\s*=\s*['"]([^'"]*)''', field.group(0))
|
---|
43 | if match and match1:
|
---|
44 | hidden[match.group(1)] = match1.group(1)
|
---|
45 |
|
---|
46 | if include_submit:
|
---|
47 | match = re.search('''<input [^>]*type=['"]?submit['"]?[^>]*>''', form.group(1))
|
---|
48 | if match:
|
---|
49 | name = re.search('''name\s*=\s*['"]([^'"]+)''', match.group(0))
|
---|
50 | value = re.search('''value\s*=\s*['"]([^'"]*)''', match.group(0))
|
---|
51 | if name and value:
|
---|
52 | hidden[name.group(1)] = value.group(1)
|
---|
53 |
|
---|
54 | # common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
|
---|
55 | return hidden
|
---|
56 |
|
---|
57 | def pick_source(sources, auto_pick=None):
|
---|
58 | # if auto_pick is None:
|
---|
59 | # auto_pick = common.get_setting('auto_pick') == 'true'
|
---|
60 |
|
---|
61 | if len(sources) == 1:
|
---|
62 | return sources[0][1]
|
---|
63 | elif len(sources) > 1:
|
---|
64 | if auto_pick:
|
---|
65 | return sources[0][1]
|
---|
66 | else:
|
---|
67 | return sources[0][1]
|
---|
68 |
|
---|
69 | # result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
|
---|
70 | # if result == -1:
|
---|
71 | # raise ResolverError('No link selected')
|
---|
72 | # else:
|
---|
73 | # return sources[result][1]
|
---|
74 | # else:
|
---|
75 | # raise ResolverError('No Video Link Found')
|
---|
76 |
|
---|
77 | def append_headers(headers):
|
---|
78 | return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
|
---|
79 |
|
---|
80 | def add_packed_data(html):
|
---|
81 | for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
|
---|
82 | try:
|
---|
83 | js_data = jsunpack.unpack(match.group(1))
|
---|
84 | js_data = js_data.replace('\\', '')
|
---|
85 | html += js_data
|
---|
86 | except:
|
---|
87 | pass
|
---|
88 |
|
---|
89 | return html
|
---|
90 |
|
---|
91 | def parse_sources_list(html):
|
---|
92 | sources = []
|
---|
93 | match = re.search('''['"]?sources['"]?\s*:\s*\[(.*?)\]''', html, re.DOTALL)
|
---|
94 | if match:
|
---|
95 | sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)]
|
---|
96 | return sources
|
---|
97 |
|
---|
98 | def parse_html5_source_list(html):
|
---|
99 | label_attrib = 'type' if not re.search('''<source\s+src\s*=.*?data-res\s*=.*?/\s*>''', html) else 'data-res'
|
---|
100 | sources = [(match[1], match[0].replace('\/', '/')) for match in re.findall('''<source\s+src\s*=\s*['"]([^'"]+)['"](?:.*?''' + label_attrib + '''\s*=\s*['"](?:video/)?([^'"]+)['"])''', html, re.DOTALL)]
|
---|
101 | return sources
|
---|
102 |
|
---|
103 | def parse_smil_source_list(smil):
|
---|
104 | sources = []
|
---|
105 | base = re.search('base\s*=\s*"([^"]+)', smil).groups()[0]
|
---|
106 | for i in re.finditer('src\s*=\s*"([^"]+)(?:"\s*(?:width|height)\s*=\s*"([^"]+))?', smil):
|
---|
107 | label = 'Unknown'
|
---|
108 | if (len(i.groups()) > 1) and (i.group(2) is not None):
|
---|
109 | label = i.group(2)
|
---|
110 | sources += [(label, '%s playpath=%s' % (base, i.group(1)))]
|
---|
111 | return sources
|
---|
112 |
|
---|
113 | def scrape_sources(html, result_blacklist=None):
|
---|
114 | def __parse_to_list(_html, regex):
|
---|
115 | _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
|
---|
116 | _blacklist = set(_blacklist + result_blacklist)
|
---|
117 | streams = []
|
---|
118 | labels = []
|
---|
119 | for r in re.finditer(regex, _html, re.DOTALL):
|
---|
120 | match = r.groupdict()
|
---|
121 | stream_url = match['url']
|
---|
122 | file_name = urlparse(stream_url).path.split('/')[-1]
|
---|
123 | blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
|
---|
124 | if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
|
---|
125 | continue
|
---|
126 |
|
---|
127 | label = match.get('label', file_name)
|
---|
128 | if label is None: label = file_name
|
---|
129 | labels.append(label)
|
---|
130 | streams.append(stream_url)
|
---|
131 |
|
---|
132 | matches = zip(labels, streams)
|
---|
133 | # if matches:
|
---|
134 | # common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
|
---|
135 | return matches
|
---|
136 |
|
---|
137 | if result_blacklist is None:
|
---|
138 | result_blacklist = []
|
---|
139 | elif isinstance(result_blacklist, str):
|
---|
140 | result_blacklist = [result_blacklist]
|
---|
141 |
|
---|
142 | html = add_packed_data(html)
|
---|
143 |
|
---|
144 | source_list = []
|
---|
145 | source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
|
---|
146 | source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
|
---|
147 | source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
|
---|
148 | source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
|
---|
149 | source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
|
---|
150 |
|
---|
151 | if len(source_list) > 1:
|
---|
152 | try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
|
---|
153 | except:
|
---|
154 | test = 1
|
---|
155 | # common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
|
---|
156 | try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
|
---|
157 | except:
|
---|
158 | test = 2
|
---|
159 | # common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
|
---|
160 |
|
---|
161 | return source_list
|
---|
162 |
|
---|
163 |
|
---|
164 | def get_media_url(url, result_blacklist=None):
|
---|
165 | if result_blacklist is None:
|
---|
166 | result_blacklist = []
|
---|
167 | elif isinstance(result_blacklist, str):
|
---|
168 | result_blacklist = [result_blacklist]
|
---|
169 |
|
---|
170 | result_blacklist = list(set(result_blacklist + ['.smil'])) # smil(not playable) contains potential sources, only blacklist when called from here
|
---|
171 | net = Net()
|
---|
172 | parsed_url = urlparse(url)
|
---|
173 | headers = {'User-Agent': common.FF_USER_AGENT,
|
---|
174 | 'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
|
---|
175 |
|
---|
176 | response = net.http_GET(url, headers=headers)
|
---|
177 | response_headers = response.get_headers(as_dict=True)
|
---|
178 | response_headers = response.get_headers()
|
---|
179 |
|
---|
180 | headers.update({'Referer': url})
|
---|
181 | cookie = response_headers.get('Set-Cookie', None)
|
---|
182 | if cookie:
|
---|
183 | headers.update({'Cookie': cookie})
|
---|
184 | html = response.content
|
---|
185 |
|
---|
186 | source_list = scrape_sources(html, result_blacklist)
|
---|
187 | source = pick_source(source_list)
|
---|
188 | return source + append_headers(headers)
|
---|
189 | # return source |
---|