- Timestamp:
- 11/16/17 19:46:19 (6 years ago)
- Location:
- titan/mediathek/localhoster
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
titan/mediathek/localhoster/hoster.sh
r41176 r41177 227 227 { 228 228 $BIN $CMD/streamango.py $INPUT 229 } 230 231 vidlox() 232 { 233 $BIN $CMD/vidlox.py $INPUT 229 234 } 230 235 … … 287 292 waaw|netu|hqq) waaw $INPUT;; 288 293 streamango|streamcherry) streamango $INPUT;; 294 vidlox) vidlox $INPUT;; 289 295 redirector|googlevideo|vodcloud|google) directstream "$INPUT";; 290 296 esac -
titan/mediathek/localhoster/lib/helpers.py
r39680 r41177 29 29 hidden = {} 30 30 if form_id: 31 pattern = '''<form [^>]* id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)31 pattern = '''<form [^>]*(?:id|name)\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id) 32 32 else: 33 33 pattern = '''<form[^>]*>(.*?)</form>''' 34 34 35 for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL): 36 if match.group(1) != '//': html = html.replace(match.group(0), '') 35 html = cleanse_html(html) 37 36 38 37 for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)): 38 # common.logger.log(form.group(1)) 39 39 if index is None or i == index: 40 40 for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)): … … 52 52 hidden[name.group(1)] = value.group(1) 53 53 54 # common.log_utils.log_debug('Hidden fields are: %s' % (hidden))54 # common.logger.log_debug('Hidden fields are: %s' % (hidden)) 55 55 return hidden 56 56 … … 67 67 return sources[0][1] 68 68 69 # result = xbmcgui.Dialog().select( 'Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])69 # result = xbmcgui.Dialog().select(common.i18n('choose_the_link'), [str(source[0]) if source[0] else 'Unknown' for source in sources]) 70 70 # if result == -1: 71 # raise ResolverError( 'No link selected')71 # raise ResolverError(common.i18n('no_link_selected')) 72 72 # else: 73 73 # return sources[result][1] 74 74 # else: 75 # raise ResolverError('No Video Link Found') 75 # raise ResolverError(common.i18n('no_video_link')) 76 76 77 77 78 def append_headers(headers): 78 79 return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers]) 79 80 80 def add_packed_data(html): 81 for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL): 81 def get_packed_data(html): 82 packed_data = '' 83 for match in re.finditer('(eval\s*\(function.*?)</script>', html, re.DOTALL | re.I): 82 84 try: 83 85 js_data = jsunpack.unpack(match.group(1)) 84 86 js_data = js_data.replace('\\', '') 85 html+= js_data87 packed_data += js_data 86 88 except: 87 89 pass 88 90 89 return html91 return packed_data 90 92 91 93 def parse_sources_list(html): … … 111 113 return sources 112 114 113 def scrape_sources(html, result_blacklist=None): 115 def scrape_sources(html, result_blacklist=None, scheme='http', patterns=None, generic_patterns=True): 116 if patterns is None: patterns = [] 117 114 118 def __parse_to_list(_html, regex): 115 _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt' ]119 _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt', '.mpd'] 116 120 _blacklist = set(_blacklist + result_blacklist) 117 121 streams = [] … … 119 123 for r in re.finditer(regex, _html, re.DOTALL): 120 124 match = r.groupdict() 121 stream_url = match['url'] 122 file_name = urlparse(stream_url ).path.split('/')[-1]125 stream_url = match['url'].replace('&', '&') 126 file_name = urlparse(stream_url[:-1]).path.split('/')[-1] if stream_url.endswith("/") else urlparse(stream_url).path.split('/')[-1] 123 127 blocked = not file_name or any(item in file_name.lower() for item in _blacklist) 128 if stream_url.startswith('//'): stream_url = scheme + ':' + stream_url 124 129 if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list): 125 130 continue … … 132 137 matches = zip(labels, streams) 133 138 # if matches: 134 # common.log _utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))139 # common.logger.log_debug('Scrape sources |%s| found |%s|' % (regex, matches)) 135 140 return matches 136 141 … … 140 145 result_blacklist = [result_blacklist] 141 146 142 html = add_packed_data(html) 147 html = html.replace("\/", "/") 148 html += get_packed_data(html) 143 149 144 150 source_list = [] 145 source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''') 146 source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''') 147 source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''') 148 source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''') 149 source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''') 150 151 if generic_patterns or not patterns: 152 source_list += __parse_to_list(html, '''["']?label\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)["']?(?:[^}\]]+)["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)''') 153 source_list += __parse_to_list(html, '''["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\]]+)["']?\s*label\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)''') 154 source_list += __parse_to_list(html, '''video[^><]+src\s*[=:]\s*['"](?P<url>[^'"]+)''') 155 source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?res\s*=\s*['"](?P<label>[^'"]+))?''') 156 source_list += __parse_to_list(html, '''["'](?:file|url)["']\s*[:=]\s*["'](?P<url>[^"']+)''') 157 source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''') 158 for regex in patterns: 159 source_list += __parse_to_list(html, regex) 160 161 source_list = list(set(source_list)) 162 163 # common.logger.log(source_list) 151 164 if len(source_list) > 1: 152 try: source_list.sort(key=lambda x: int( x[0]), reverse=True)165 try: source_list.sort(key=lambda x: int(re.sub("\D", "", x[0])), reverse=True) 153 166 except: 154 167 test = 1 155 # common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')156 try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)168 # common.logger.log_debug('Scrape sources sort failed |int(re.sub("\D", "", x[0])|') 169 try: source_list.sort(key=lambda x: re.sub("[^a-zA-Z]", "", x[0])) 157 170 except: 158 test = 2159 # common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')171 test = 2 172 common.logger.log_debug('Scrape sources sort failed |re.sub("[^a-zA-Z]", "", x[0])|') 160 173 161 174 return source_list 162 175 163 176 164 def get_media_url(url, result_blacklist=None): 177 def get_media_url(url, result_blacklist=None, patterns=None, generic_patterns=True): 178 if patterns is None: patterns = [] 179 scheme = urlparse(url).scheme 165 180 if result_blacklist is None: 166 181 result_blacklist = [] … … 169 184 170 185 result_blacklist = list(set(result_blacklist + ['.smil'])) # smil(not playable) contains potential sources, only blacklist when called from here 171 net = Net() 172 parsed_url = urlparse(url) 173 headers = {'User-Agent': common.FF_USER_AGENT, 174 'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)} 186 net = common.Net() 187 headers = {'User-Agent': common.RAND_UA} 175 188 176 189 response = net.http_GET(url, headers=headers) 177 # response_headers = response.get_headers(as_dict=True) 178 response_headers = response.get_headers() 179 190 response_headers = response.get_headers(as_dict=True) 180 191 headers.update({'Referer': url}) 181 #cookie = response_headers.get('Set-Cookie', None)182 #if cookie:183 #headers.update({'Cookie': cookie})192 cookie = response_headers.get('Set-Cookie', None) 193 if cookie: 194 headers.update({'Cookie': cookie}) 184 195 html = response.content 185 196 186 source_list = scrape_sources(html, result_blacklist )197 source_list = scrape_sources(html, result_blacklist, scheme, patterns, generic_patterns) 187 198 source = pick_source(source_list) 188 # return source + append_headers(headers) 189 return source 199 return source + append_headers(headers) 200 201 def cleanse_html(html): 202 for match in re.finditer('<!--(.*?)-->', html, re.DOTALL): 203 if match.group(1)[-2:] != '//': html = html.replace(match.group(0), '') 204 205 html = re.sub('''<(div|span)[^>]+style=["'](visibility:\s*hidden|display:\s*none);?["']>.*?</\\1>''', '', html, re.I | re.DOTALL) 206 return html 207 208 def get_dom(html, tag): 209 start_str = '<%s' % (tag.lower()) 210 end_str = '</%s' % (tag.lower()) 211 212 results = [] 213 html = html.lower() 214 while html: 215 start = html.find(start_str) 216 end = html.find(end_str, start) 217 pos = html.find(start_str, start + 1) 218 while pos < end and pos != -1: 219 tend = html.find(end_str, end + len(end_str)) 220 if tend != -1: end = tend 221 pos = html.find(start_str, pos + 1) 222 223 if start == -1 and end == -1: 224 break 225 elif start > -1 and end > -1: 226 result = html[start:end] 227 elif end > -1: 228 result = html[:end] 229 elif start > -1: 230 result = html[start:] 231 else: 232 break 233 234 results.append(result) 235 html = html[start + len(start_str):] 236 237 return results 238
Note: See TracChangeset
for help on using the changeset viewer.