Changeset 41177 for titan/mediathek


Ignore:
Timestamp:
Nov 16, 2017, 7:46:19 PM (2 years ago)
Author:
obi
Message:

tithek add vidlox

Location:
titan/mediathek/localhoster
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • titan/mediathek/localhoster/hoster.sh

    r41176 r41177  
    227227{
    228228        $BIN $CMD/streamango.py $INPUT
     229}
     230
     231vidlox()
     232{
     233        $BIN $CMD/vidlox.py $INPUT
    229234}
    230235
     
    287292                waaw|netu|hqq) waaw $INPUT;;
    288293                streamango|streamcherry) streamango $INPUT;;
     294                vidlox) vidlox $INPUT;;
    289295                redirector|googlevideo|vodcloud|google) directstream "$INPUT";;
    290296        esac
  • titan/mediathek/localhoster/lib/helpers.py

    r39680 r41177  
    2929    hidden = {}
    3030    if form_id:
    31         pattern = '''<form [^>]*id\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
     31        pattern = '''<form [^>]*(?:id|name)\s*=\s*['"]?%s['"]?[^>]*>(.*?)</form>''' % (form_id)
    3232    else:
    3333        pattern = '''<form[^>]*>(.*?)</form>'''
    3434   
    35     for match in re.finditer('<!--.*?(..)-->', html, re.DOTALL):
    36         if match.group(1) != '//': html = html.replace(match.group(0), '')
     35    html = cleanse_html(html)
    3736       
    3837    for i, form in enumerate(re.finditer(pattern, html, re.DOTALL | re.I)):
     38 #       common.logger.log(form.group(1))
    3939        if index is None or i == index:
    4040            for field in re.finditer('''<input [^>]*type=['"]?hidden['"]?[^>]*>''', form.group(1)):
     
    5252                        hidden[name.group(1)] = value.group(1)
    5353           
    54 #    common.log_utils.log_debug('Hidden fields are: %s' % (hidden))
     54 #   common.logger.log_debug('Hidden fields are: %s' % (hidden))
    5555    return hidden
    5656
     
    6767            return sources[0][1]
    6868
    69 #            result = xbmcgui.Dialog().select('Choose the link', [source[0] if source[0] else 'Uknown' for source in sources])
     69#            result = xbmcgui.Dialog().select(common.i18n('choose_the_link'), [str(source[0]) if source[0] else 'Unknown' for source in sources])
    7070#            if result == -1:
    71 #                raise ResolverError('No link selected')
     71#                raise ResolverError(common.i18n('no_link_selected'))
    7272#            else:
    7373#                return sources[result][1]
    7474#    else:
    75 #        raise ResolverError('No Video Link Found')
     75#        raise ResolverError(common.i18n('no_video_link'))
     76
    7677
    7778def append_headers(headers):
    7879    return '|%s' % '&'.join(['%s=%s' % (key, urllib.quote_plus(headers[key])) for key in headers])
    7980
    80 def add_packed_data(html):
    81     for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
     81def get_packed_data(html):
     82    packed_data = ''
     83    for match in re.finditer('(eval\s*\(function.*?)</script>', html, re.DOTALL | re.I):
    8284        try:
    8385            js_data = jsunpack.unpack(match.group(1))
    8486            js_data = js_data.replace('\\', '')
    85             html += js_data
     87            packed_data += js_data
    8688        except:
    8789            pass
    8890       
    89     return html
     91    return packed_data
    9092
    9193def parse_sources_list(html):
     
    111113    return sources
    112114
    113 def scrape_sources(html, result_blacklist=None):
     115def scrape_sources(html, result_blacklist=None, scheme='http', patterns=None, generic_patterns=True):
     116    if patterns is None: patterns = []
     117   
    114118    def __parse_to_list(_html, regex):
    115         _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt']
     119        _blacklist = ['.jpg', '.jpeg', '.gif', '.png', '.js', '.css', '.htm', '.html', '.php', '.srt', '.sub', '.xml', '.swf', '.vtt', '.mpd']
    116120        _blacklist = set(_blacklist + result_blacklist)
    117121        streams = []
     
    119123        for r in re.finditer(regex, _html, re.DOTALL):
    120124            match = r.groupdict()
    121             stream_url = match['url']
    122             file_name = urlparse(stream_url).path.split('/')[-1]
     125            stream_url = match['url'].replace('&amp;', '&')
     126            file_name = urlparse(stream_url[:-1]).path.split('/')[-1] if stream_url.endswith("/") else urlparse(stream_url).path.split('/')[-1]
    123127            blocked = not file_name or any(item in file_name.lower() for item in _blacklist)
     128            if stream_url.startswith('//'): stream_url = scheme + ':' + stream_url
    124129            if '://' not in stream_url or blocked or (stream_url in streams) or any(stream_url == t[1] for t in source_list):
    125130                continue
     
    132137        matches = zip(labels, streams)
    133138#        if matches:
    134 #            common.log_utils.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
     139#            common.logger.log_debug('Scrape sources |%s| found |%s|' % (regex, matches))
    135140        return matches
    136141
     
    140145        result_blacklist = [result_blacklist]
    141146       
    142     html = add_packed_data(html)
     147    html = html.replace("\/", "/")
     148    html += get_packed_data(html)
    143149
    144150    source_list = []
    145     source_list += __parse_to_list(html, '''["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\],]?["',]?\s*label\s*["']?\s*[:=]?\s*["'](?P<label>[^"']+))?''')
    146     source_list += __parse_to_list(html, '''video[^><]+src\s*=\s*['"](?P<url>[^'"]+)''')
    147     source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?data-res\s*=\s*['"](?P<label>[^'"]+))?''')
    148     source_list += __parse_to_list(html, '''["']?\s*url\s*["']?\s*[:=]\s*["'](?P<url>[^"']+)''')
    149     source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
    150 
     151    if generic_patterns or not patterns:
     152        source_list += __parse_to_list(html, '''["']?label\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)["']?(?:[^}\]]+)["']?\s*file\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)''')
     153        source_list += __parse_to_list(html, '''["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)(?:[^}>\]]+)["']?\s*label\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)''')
     154        source_list += __parse_to_list(html, '''video[^><]+src\s*[=:]\s*['"](?P<url>[^'"]+)''')
     155        source_list += __parse_to_list(html, '''source\s+src\s*=\s*['"](?P<url>[^'"]+)['"](?:.*?res\s*=\s*['"](?P<label>[^'"]+))?''')
     156        source_list += __parse_to_list(html, '''["'](?:file|url)["']\s*[:=]\s*["'](?P<url>[^"']+)''')
     157        source_list += __parse_to_list(html, '''param\s+name\s*=\s*"src"\s*value\s*=\s*"(?P<url>[^"]+)''')
     158    for regex in patterns:
     159        source_list += __parse_to_list(html, regex)
     160       
     161    source_list = list(set(source_list))
     162   
     163 #   common.logger.log(source_list)
    151164    if len(source_list) > 1:
    152         try: source_list.sort(key=lambda x: int(x[0]), reverse=True)
     165        try: source_list.sort(key=lambda x: int(re.sub("\D", "", x[0])), reverse=True)
    153166        except:
    154167            test = 1
    155 #            common.log_utils.log_debug('Scrape sources sort failed |int(x[0])|')
    156             try: source_list.sort(key=lambda x: int(x[0][:-1]), reverse=True)
     168 #           common.logger.log_debug('Scrape sources sort failed |int(re.sub("\D", "", x[0])|')
     169            try: source_list.sort(key=lambda x: re.sub("[^a-zA-Z]", "", x[0]))
    157170            except:
    158                  test = 2
    159 #                common.log_utils.log_debug('Scrape sources sort failed |int(x[0][:-1])|')
     171                test = 2
     172                common.logger.log_debug('Scrape sources sort failed |re.sub("[^a-zA-Z]", "", x[0])|')
    160173
    161174    return source_list
    162175
    163176
    164 def get_media_url(url, result_blacklist=None):
     177def get_media_url(url, result_blacklist=None, patterns=None, generic_patterns=True):
     178    if patterns is None: patterns = []
     179    scheme = urlparse(url).scheme
    165180    if result_blacklist is None:
    166181        result_blacklist = []
     
    169184
    170185    result_blacklist = list(set(result_blacklist + ['.smil']))  # smil(not playable) contains potential sources, only blacklist when called from here
    171     net = Net()
    172     parsed_url = urlparse(url)
    173     headers = {'User-Agent': common.FF_USER_AGENT,
    174                'Referer': '%s://%s' % (parsed_url.scheme, parsed_url.hostname)}
     186    net = common.Net()
     187    headers = {'User-Agent': common.RAND_UA}
    175188
    176189    response = net.http_GET(url, headers=headers)
    177 #    response_headers = response.get_headers(as_dict=True)
    178     response_headers = response.get_headers()
    179 
     190    response_headers = response.get_headers(as_dict=True)
    180191    headers.update({'Referer': url})
    181 #    cookie = response_headers.get('Set-Cookie', None)
    182 #    if cookie:
    183 #        headers.update({'Cookie': cookie})
     192    cookie = response_headers.get('Set-Cookie', None)
     193    if cookie:
     194        headers.update({'Cookie': cookie})
    184195    html = response.content
    185196
    186     source_list = scrape_sources(html, result_blacklist)
     197    source_list = scrape_sources(html, result_blacklist, scheme, patterns, generic_patterns)
    187198    source = pick_source(source_list)
    188 #    return source + append_headers(headers)
    189     return source
     199    return source + append_headers(headers)
     200
     201def cleanse_html(html):
     202    for match in re.finditer('<!--(.*?)-->', html, re.DOTALL):
     203        if match.group(1)[-2:] != '//': html = html.replace(match.group(0), '')
     204   
     205    html = re.sub('''<(div|span)[^>]+style=["'](visibility:\s*hidden|display:\s*none);?["']>.*?</\\1>''', '', html, re.I | re.DOTALL)
     206    return html
     207
     208def get_dom(html, tag):
     209    start_str = '<%s' % (tag.lower())
     210    end_str = '</%s' % (tag.lower())
     211   
     212    results = []
     213    html = html.lower()
     214    while html:
     215        start = html.find(start_str)
     216        end = html.find(end_str, start)
     217        pos = html.find(start_str, start + 1)
     218        while pos < end and pos != -1:
     219            tend = html.find(end_str, end + len(end_str))
     220            if tend != -1: end = tend
     221            pos = html.find(start_str, pos + 1)
     222       
     223        if start == -1 and end == -1:
     224            break
     225        elif start > -1 and end > -1:
     226            result = html[start:end]
     227        elif end > -1:
     228            result = html[:end]
     229        elif start > -1:
     230            result = html[start:]
     231        else:
     232            break
     233           
     234        results.append(result)
     235        html = html[start + len(start_str):]
     236   
     237    return results
     238
Note: See TracChangeset for help on using the changeset viewer.