Changeset 40037
- Timestamp:
- 02/19/17 00:10:11 (7 years ago)
- Location:
- titan/mediathek/localhoster
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
titan/mediathek/localhoster/flashx.py
r39352 r40037 5 5 from lib.net import Net 6 6 import lib.common as common 7 import lib.fx_gmu as fx_gmu8 7 import lib.fx_gmu as fx_gmu 9 8 … … 17 16 18 17 def __init__(self): 18 # print "1111111111111" 19 19 self.net = Net() 20 # print "2222222222222" 21 20 22 url = str(sys.argv[1]) 23 # print "3333333333333" 24 21 25 host = self.get_host_and_id(url)[0] 26 # print "4444444444444" 27 22 28 media_id = self.get_host_and_id(url)[1] 29 # print "5555555555555" 23 30 24 31 return self.get_media_url(host, media_id) … … 65 72 # py_data = f.read() 66 73 # import fx_gmu 74 # print "aaaaaaaaaaaa" 75 67 76 web_url = self.get_url(host, media_id) 77 # print "bbbbbbbbbbbb", web_url 78 68 79 print fx_gmu.get_media_url(web_url) 80 # print "cccccccccccc" 81 69 82 # except Exception as e: 70 83 # print 'error' -
titan/mediathek/localhoster/lib/fx_gmu.py
r39352 r40037 19 19 import urlparse 20 20 import urllib 21 from lib import jsunpack 22 from lib import unwise 21 23 from lib import helpers 22 24 #from urlresolver import common … … 29 31 net = Net() 30 32 33 BAD_ETAGS = ["580eabc0-40ac3a", "582e7b99-9cd574b"] 34 MIN_CONTENT = 1024 * 1024 * 5 35 31 36 def get_media_url(url): 32 37 try: 38 # print "xxxxxxxx" 33 39 hostname = urlparse.urlparse(url).hostname 34 headers = {'User-Agent': common.FF_USER_AGENT} 40 media_id = re.search('embed.php\?c=(.*)', url).group(1) 41 headers = {'User-Agent': common.IE_USER_AGENT} 35 42 html = net.http_GET(url, headers=headers).content 43 adblock_check(html, headers) 36 44 headers.update({'Referer': url}) 37 for match in re.finditer('''<script[^>]*src=["']([^'"]+)''', html): 38 _html = get_js(match.group(1), headers, hostname) 39 40 match = re.search('''href=['"]([^'"]+)''', html) 41 if match: 45 # print "yyyyyyyy" 46 47 for js_url in get_js_url(html): 48 # print "xxxxxxxx111" 49 js = get_js(js_url, headers, hostname) 50 js = unwise.unwise_process(js) 51 adblock_check(js, headers) 52 xhr_check(js, headers) 53 54 html = cleanse_html(html) 55 for match in re.finditer('''href=['"]([^'"]+)''', html): 56 # print "xxxxxxxx2222" 42 57 playvid_url = match.group(1) 43 html = net.http_GET(playvid_url, headers=headers).content 44 headers.update({'Referer': playvid_url}) 45 for match in re.finditer('''<script[^>]*src=["']([^'"]+)''', html): 46 js = get_js(match.group(1), headers, hostname) 47 match = re.search('''!=\s*null.*?get\(['"]([^'"]+).*?\{([^:]+)''', js, re.DOTALL) 48 if match: 49 fx_url, fx_param = match.groups() 50 fx_url = resolve_url(urlparse.urljoin('http://www.flashx.tv', fx_url) + '?' + urllib.urlencode({fx_param: 1})) 51 # common.log_utils.log('fxurl: %s' % (fx_url)) 52 _html = net.http_GET(fx_url, headers=headers).content 53 54 headers.update({'Referer': url}) 55 html = net.http_GET(playvid_url, headers=headers).content 56 html = helpers.add_packed_data(html) 57 58 # common.log_utils.log(html) 59 sources = helpers.parse_sources_list(html) 60 try: sources.sort(key=lambda x: SORT_KEY.get(x[0], 0), reverse=True) 61 except: pass 58 if '-%s.' % (media_id) in playvid_url: 59 headers.update({'Referer': url}) 60 html = net.http_GET(playvid_url, headers=headers).content 61 # common.log_utils.log(html) 62 html = cleanse_html(html) 63 headers['Referer'] = playvid_url 64 65 sources = [] 66 spans = get_span_ids(html, media_id) 67 for match in re.finditer('<script[^>]*>\s*(eval\(function.*?)</script>', html, re.DOTALL): 68 # print "xxxxxxxx3333" 69 js_data = jsunpack.unpack(match.group(1)) 70 if not spans or any(span_id in js_data for span_id in spans): 71 js_sources = helpers.parse_sources_list(js_data) 72 sources += js_sources 73 74 d = {} 75 for source in sources: d[source[1]] = d.setdefault(source[1], 0) + 1 76 # print "xxxxxxxx4444" 77 # common.log_utils.log(sources) 78 sources = [source for source in sources if d[source[1]] == 1] 79 # print "xxxxxxxx5555" 80 # common.log_utils.log(sources) 81 sources = [source for source in sources if not any([x in source[1].lower() for x in ('/movie.mp4', '/trailer.mp4', '://cdn.flashx.tv')])] 82 # print "xxxxxxxx6666" 83 # print "source", source 84 # print "headers", headers 85 86 87 # common.log_utils.log(sources) 88 # sources = [source for source in sources if check_headers(source, headers)] 89 # print "xxxxxxxx7777" 90 # common.log_utils.log(sources) 91 try: 92 sources.sort(key=lambda x: SORT_KEY.get(x[0], 0), reverse=True) 93 # print "xxxxxxxx8888" 94 except: 95 pass 96 # print "xxxxxxxx9999" 62 97 source = helpers.pick_source(sources) 98 # print "xxxxxxxxxxxxxxxxxxxxxxxxxx" 63 99 return source + helpers.append_headers(headers) 64 100 … … 69 105 raise ResolverError('Unable to resolve flashx link. Filelink not found.') 70 106 107 def cleanse_html(html): 108 for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL): 109 if match.group(1) != '//': html = html.replace(match.group(0), '') 110 111 html = re.sub('''<(div|span)[^>]+style=["'](visibility:\s*hidden|display:\s*none);?["']>.*?</\\1>''', '', html, re.I | re.DOTALL) 112 return html 113 114 def get_span_ids(html, media_id): 115 spans = [] 116 pattern = '''<img[^>]+src=['"][^"']+%s.jpe?g''' % (media_id) 117 for span in get_dom(html, 'span'): 118 match = re.search('''<span[^>]+id=['"]([^'"]+)[^>]+>(.*)''', span, re.I | re.DOTALL) 119 if match: 120 if re.search(pattern, match.group(2), re.I | re.DOTALL): 121 spans.append(match.group(1)) 122 123 return spans 124 125 def get_dom(html, tag): 126 start_str = '<%s' % (tag.lower()) 127 end_str = '</%s' % (tag.lower()) 128 129 results = [] 130 html = html.lower() 131 while html: 132 start = html.find(start_str) 133 end = html.find(end_str, start) 134 pos = html.find(start_str, start + 1) 135 while pos < end and pos != -1: 136 tend = html.find(end_str, end + len(end_str)) 137 if tend != -1: end = tend 138 pos = html.find(start_str, pos + 1) 139 140 if start == -1 and end == -1: 141 break 142 elif start > -1 and end > -1: 143 result = html[start:end] 144 elif end > -1: 145 result = html[:end] 146 elif start > -1: 147 result = html[start:] 148 else: 149 break 150 151 results.append(result) 152 html = html[start + len(start_str):] 153 154 return results 155 156 def adblock_check(js, headers): 157 match = re.search('''!=\s*null.*?get\(['"]([^'"]+).*?\{([^:]+)\s*:\s*['"]([^'"]+)''', js, re.DOTALL) 158 if match: 159 fx_url, fx_param, fx_value = match.groups() 160 fx_url = resolve_url(urlparse.urljoin('https://www.flashx.tv', fx_url) + '?' + urllib.urlencode({fx_param: fx_value})) 161 # common.log_utils.log('fxurl: %s' % (fx_url)) 162 _html = net.http_GET(fx_url, headers=headers).content 163 164 def xhr_check(js, headers): 165 match = re.search('''request\.open\(\s*["']([^'"]+)"\s*,\s*['"]([^'"]+)''', js, re.DOTALL) 166 if match: 167 _method, xhr_url = match.groups() 168 # common.log_utils.log('xhr url: %s' % (xhr_url)) 169 _html = net.http_GET(xhr_url, headers=headers).content 170 171 def check_headers(source, headers): 172 try: 173 response = net.http_HEAD(source[1], headers=headers) 174 res_headers = response.get_headers(as_dict=True) 175 # common.log_utils.log(res_headers) 176 if res_headers.get('Etag', '').strip('"') not in BAD_ETAGS and int(res_headers.get('Content-Length', 0)) >= MIN_CONTENT: 177 return True 178 except Exception as e: 179 # common.log_utils.log('Adding failed source: %s' % (e), common.log_utils.LOGWARNING) 180 return True 181 return False 182 183 def get_js_url(html): 184 urls = [] 185 for match in re.finditer('''<script[^>]*src\s*=\s*(["'])(.*?)\\1''', html, re.I): 186 js_url = match.group(2).strip() 187 js_url = re.sub('''['"]''', '', js_url) 188 if '/' not in js_url: 189 js_url = js_url.strip('+') 190 pattern = '''var\s+%s\s*=\s*(['"])(.*?)\\1''' % (js_url) 191 match = re.search(pattern, html) 192 if match: 193 js_url = match.group(2) 194 urls.append(js_url) 195 return urls 196 71 197 def get_js(js_url, headers, hostname): 72 198 js = '' 73 if not js_url.startswith('http'): 74 base_url = 'http://' + hostname 199 if js_url.startswith('//'): 200 js_url = 'https:' + js_url 201 elif not js_url.startswith('http'): 202 base_url = 'https://' + hostname 75 203 js_url = urlparse.urljoin(base_url, js_url) 76 204 77 if hostname in js_url: 78 # common.log_utils.log('Getting JS: |%s| - |%s|' % (js_url, headers)) 79 js = net.http_GET(js_url, headers=headers).content205 # common.log_utils.log('Getting JS: |%s| - |%s|' % (js_url, headers)) 206 try: js = net.http_GET(js_url, headers=headers).content 207 except: js = '' 80 208 return js 81 209
Note: See TracChangeset
for help on using the changeset viewer.