source: titan/mediathek/localhoster/lib/net.py @ 40523

Last change on this file since 40523 was 39400, checked in by tt4sk, 7 years ago

First call of goldesel.to with goldesel.py / net.py, store cookie in /mnt/network/cookies, following calls with curl using same cookie and user agent.

File size: 22.9 KB
Line 
1'''
2    common XBMC Module
3    Copyright (C) 2011 t0mm0
4
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17'''
18
19import cookielib
20import gzip
21import re
22import StringIO
23import urllib
24import urllib2
25import socket
26from urlparse import urlparse
27from urlparse import urlunparse
28import time
29
30class HeadRequest(urllib2.Request):
31    '''A Request class that sends HEAD requests'''
32    def get_method(self):
33        return 'HEAD'
34
35class Net:
36    '''
37    This class wraps :mod:`urllib2` and provides an easy way to make http
38    requests while taking care of cookies, proxies, gzip compression and
39    character encoding.
40   
41    Example::
42   
43        from addon.common.net import Net
44        net = Net()
45        response = net.http_GET('http://xbmc.org')
46        print response.content
47    '''
48    IE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko'
49    FF_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
50    IOS_USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25'
51    ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
52
53    _cj = cookielib.MozillaCookieJar()
54
55    _proxy = None
56    _user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'
57    _accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
58    _http_debug = False
59    _socket_timeout = 60
60
61    def __init__(self, cookie_file='', proxy='', user_agent='',
62                 http_debug=False, accept=_accept, socket_timeout=_socket_timeout, cloudflare=False):
63        '''
64        Kwargs:
65            cookie_file (str): Full path to a file to be used to load and save
66            cookies to.
67           
68            proxy (str): Proxy setting (eg.
69            ``'http://user:pass@example.com:1234'``)
70           
71            user_agent (str): String to use as the User Agent header. If not
72            supplied the class will use a default user agent (chrome)
73           
74            http_debug (bool): Set ``True`` to have HTTP header info written to
75            the XBMC log for all requests.
76           
77            accept (str) : String to use as HTTP Request Accept header.
78           
79            socket_timeout (int): time in seconds for socket connections to wait until time out
80
81            cloudflare (bool): Set ``True`` to check all requests that raise HTTPError 503 for Cloudflare challenge and solve
82            This can be changed per request as well, see http_GET, http_PUSH
83        '''
84   
85        #Set socket timeout - Useful for slow connections
86        socket.setdefaulttimeout(socket_timeout)
87
88        # empty jar for each instance rather than scope of the import
89        self._cloudflare_jar = cookielib.MozillaCookieJar()
90
91        self.cloudflare = cloudflare
92        if cookie_file:
93            self.set_cookies(cookie_file)
94        if proxy:
95            self.set_proxy(proxy)
96        if user_agent:
97            self.set_user_agent(user_agent)
98        self._http_debug = http_debug
99        self._update_opener()
100       
101   
102    def set_cookies(self, cookie_file):
103        '''
104        Set the cookie file and try to load cookies from it if it exists.
105       
106        Args:
107            cookie_file (str): Full path to a file to be used to load and save
108            cookies to.
109        '''
110        try:
111            self._cj.load(cookie_file, ignore_discard=True)
112            self._update_opener()
113            return True
114        except:
115            return False
116       
117   
118    def get_cookies(self):
119        '''Returns A dictionary containing all cookie information by domain.'''
120        return self._cj._cookies
121
122
123    def save_cookies(self, cookie_file):
124        '''
125        Saves cookies to a file.
126       
127        Args:
128            cookie_file (str): Full path to a file to save cookies to.
129        '''
130        self._cj.save(cookie_file, ignore_discard=True)       
131
132       
133    def set_proxy(self, proxy):
134        '''
135        Args:
136            proxy (str): Proxy setting (eg.
137            ``'http://user:pass@example.com:1234'``)
138        '''
139        self._proxy = proxy
140        self._update_opener()
141
142       
143    def get_proxy(self):
144        '''Returns string containing proxy details.'''
145        return self._proxy
146       
147       
148    def set_user_agent(self, user_agent):
149        '''
150        Args:
151            user_agent (str): String to use as the User Agent header.
152        '''
153        self._user_agent = user_agent
154
155       
156    def get_user_agent(self):
157        '''Returns user agent string.'''
158        return self._user_agent
159
160
161    def _update_opener(self, cloudflare_jar=False):
162        """
163        Builds and installs a new opener to be used by all future calls to
164        :func:`urllib2.urlopen`.
165        """
166        if self._http_debug:
167            http = urllib2.HTTPHandler(debuglevel=1)
168        else:
169            http = urllib2.HTTPHandler()
170
171        if cloudflare_jar:
172            self._cloudflare_jar = cookielib.MozillaCookieJar()
173            jar = self._cloudflare_jar
174        else:
175            jar = self._cj
176
177        if self._proxy:
178            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
179                                          urllib2.ProxyHandler({'http':
180                                                                self._proxy}),
181                                          urllib2.HTTPBasicAuthHandler(),
182                                          http)
183
184        else:
185            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
186                                          urllib2.HTTPBasicAuthHandler(),
187                                          http)
188        urllib2.install_opener(opener)
189
190
191    def _parseJSString(self, s):
192        """
193        lambda
194        plugin.video.genesis\resources\lib\libraries\cloudflare.py
195        https://offshoregit.com/lambda81/
196        """
197        try:
198            offset=1 if s[0]=='+' else 0
199            val = int(eval(s.replace('!+[]','1').replace('!![]','1').replace('[]','0').replace('(','str(')[offset:]))
200            return val
201        except:
202            raise Exception
203
204
205    def _cloudflare_challenge(self, url, challenge, form_data={}, headers={}, compression=True):
206        """
207        Use _set_cloudflare to call this, not intended to be called directly.
208        Solve challenge and make request with cloudflare cookie jar
209
210        Part from:
211        lambda
212        plugin.video.genesis\resources\lib\libraries\cloudflare.py
213        https://offshoregit.com/lambda81/
214        """
215        jschl = re.compile('name="jschl_vc" value="(.+?)"/>').findall(challenge)[0]
216        init = re.compile('setTimeout\(function\(\){\s*.*?.*:(.*?)};').findall(challenge)[0]
217        builder = re.compile(r"challenge-form\'\);\s*(.*)a.v").findall(challenge)[0]
218        decrypt_val = self._parseJSString(init)
219        lines = builder.split(';')
220
221        for line in lines:
222            if len(line)>0 and '=' in line:
223                sections=line.split('=')
224                line_val = self._parseJSString(sections[1])
225                decrypt_val = int(eval(str(decrypt_val)+sections[0][-1]+str(line_val)))
226
227        path = urlparse(url).path
228        netloc = urlparse(url).netloc
229        if not netloc:
230            netloc = path
231
232        answer = decrypt_val + len(netloc)
233
234        url = url.rstrip('/')
235        query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (url, jschl, answer)
236
237        if 'type="hidden" name="pass"' in challenge:
238            passval = re.compile('name="pass" value="(.*?)"').findall(challenge)[0]
239            query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % \
240                    (url, urllib.quote_plus(passval), jschl, answer)
241            time.sleep(9)
242
243        self._update_opener(cloudflare_jar=True)
244        req = urllib2.Request(query)
245        if form_data:
246            form_data = urllib.urlencode(form_data)
247            req = urllib2.Request(query, form_data)
248        req.add_header('User-Agent', self._user_agent)
249        for k, v in headers.items():
250            req.add_header(k, v)
251        if compression:
252            req.add_header('Accept-Encoding', 'gzip')
253        try:
254            response = urllib2.urlopen(req)
255        except urllib2.HTTPError as e:
256            pass
257
258
259    def _set_cloudflare(self, url, challenge, form_data={}, headers={}, compression=True):
260        """
261        Entry Point for _cloudflare_challenge
262        Calls cloudflare_challenge on netloc, not full url w/ path
263        Puts any cloudflare cookies in the main cookie jar
264        Args:
265            url (str): The URL to site of potential Cloudflare IUA.
266
267            challenge (str): html contents of the page that raised 503, containing potential Cloudflare IUA Challenge
268        Kwargs:
269            form_data (dict): A dictionary of form data if pass-through from POST.
270
271            headers (dict): A dictionary describing any headers you would like
272            to add to the request. (eg. ``{'X-Test': 'testing'}``)
273
274            compression (bool): If ``True`` (default), try to use gzip
275            compression.
276        """
277        netloc = urlparse(url).netloc
278        if not netloc:
279            netloc = urlparse(url).path
280        cloudflare_url = urlunparse((urlparse(url).scheme, netloc, '', '', '', ''))
281        try:
282            self._cloudflare_challenge(cloudflare_url, challenge, form_data, headers, compression)
283            for c in self._cloudflare_jar:
284                self._cj.set_cookie(c)
285            self._update_opener()
286        except:
287            # make sure we update to main jar
288            self._update_opener()
289            raise Exception
290
291
292    def url_with_headers(self, url, referer=None, user_agent=None, cookies=None, proxy=None, connection_timeout=None,
293                         encoding='', accept_charset='', sslcipherlist='', noshout='false', seekable='1'):
294        '''
295        Return url with Referer, User-Agent, Cookies, Proxy, Connection-Timeout, Encoding, Accept-Charset,
296        SSLCipherList, NoShout and Seekable
297        Based on: https://github.com/xbmc/xbmc/blob/master/xbmc/filesystem/CurlFile.cpp#L782
298        Args:
299            url (str): The URL to append headers to.
300
301        Kwargs:
302            referer (str): If None (default), urlunparse((urlparse(url).scheme, netloc, path, '', '', '')) is used and append if set
303
304            user_agent (str): If None (default), self._user_agent is used and append if set
305
306            cookies (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
307            Append cookies to URL as well
308
309            proxy (str): If None (default), self.proxy is used and append if set
310
311            connection_timeout (str): If None (default), self._socket_timeout is used and append if set
312
313            encoding (str): append if set
314
315            accept_charset (str): append if set
316
317            sslcipherlist (str): append if set
318
319            noshout (str): 'true'/'false', skip shout, append if 'true' ('false' is kodi default)
320
321            seekable (str): '0'/'1', append if 0 ('1' is kodi default)
322        Returns:
323            http://example.com/myimage.png|Referer=%%%%%&User-Agent=%%%%%...
324        '''
325        kodi_schemes = ('special', 'plugin', 'script', 'profile')
326        if ('://' not in url) or (url.startswith(kodi_schemes)):
327            # don't waste time and return url
328            return url
329
330        _tmp = re.search('(.+?)(?:\|.*|$)', url)
331        if _tmp:
332            # trim any headers that may already be attached to url
333            url = _tmp.group(1)
334
335        if referer is not None:
336            try:
337                referer = str(referer)
338            except:
339                referer = None
340        if referer is None:
341            path = urlparse(url).path
342            netloc = urlparse(url).netloc
343            if not netloc:
344                netloc = path
345                path = ''
346            referer = urlunparse((urlparse(url).scheme, netloc, path, '', '', ''))
347            if referer == url:
348                index = path.rfind('/')
349                if index >= 0:
350                    referer = urlunparse((urlparse(url).scheme, netloc, path[:index], '', '', ''))
351        if user_agent is None:
352            user_agent = self._user_agent
353        else:
354            try:
355                user_agent = str(user_agent)
356            except:
357                user_agent = self._user_agent
358        if cookies is None:
359            cookies = self.cloudflare
360        if proxy is None:
361            proxy = self._proxy
362        if connection_timeout is None:
363            connection_timeout = self._socket_timeout
364        try:
365            connection_timeout = str(connection_timeout)
366        except:
367            connection_timeout = None
368        try:
369            if str(seekable) != '0':
370                seekable = None
371        except:
372            seekable = None
373        try:
374            if str(noshout).lower() != 'true':
375                noshout = None
376        except:
377            noshout = None
378
379        url += '|Referer=' + urllib.quote_plus(referer) + '&User-Agent=' + urllib.quote_plus(user_agent)
380        if proxy:
381            try:
382                url += '&HTTPProxy=' + urllib.quote_plus(str(proxy))
383            except:
384                pass
385        if connection_timeout:
386            url += '&Connection-Timeout=' + urllib.quote_plus(connection_timeout)
387        if encoding:
388            try:
389                url += '&Encoding=' + urllib.quote_plus(str(encoding))
390            except:
391                pass
392        if accept_charset:
393            try:
394                url += '&Accept-Charset=' + urllib.quote_plus(str(accept_charset))
395            except:
396                pass
397        if sslcipherlist:
398            try:
399                url += '&SSLCipherList=' + urllib.quote_plus(str(sslcipherlist))
400            except:
401                pass
402        if noshout:
403            url += '&NoShout=' + urllib.quote_plus(str(noshout).lower())
404        if seekable:
405            url += '&Seekable=' + urllib.quote_plus(str(seekable))
406        if cookies:
407            cookie_string = ''
408            for c in self._cj:
409                if c.domain and (c.domain.lstrip('.') in url):
410                    cookie_string += '%s=%s;' % (c.name, c.value)
411            if cookie_string:
412                url += '&Cookie=' + urllib.quote_plus(cookie_string)
413        return url
414
415
416    def http_GET(self, url, headers={}, compression=True, cloudflare=None):
417        '''
418        Perform an HTTP GET request.
419       
420        Args:
421            url (str): The URL to GET.
422           
423        Kwargs:
424            headers (dict): A dictionary describing any headers you would like
425            to add to the request. (eg. ``{'X-Test': 'testing'}``)
426
427            compression (bool): If ``True`` (default), try to use gzip
428            compression.
429
430            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
431            On HTTPError 503 check for Cloudflare challenge and solve
432        Returns:
433            An :class:`HttpResponse` object containing headers and other
434            meta-information about the page and the page content.
435        '''
436        if cloudflare is None:
437            cloudflare = self.cloudflare
438        return self._fetch(url, headers=headers, compression=compression, cloudflare=cloudflare)
439       
440
441    def http_POST(self, url, form_data, headers={}, compression=True, cloudflare=None):
442        '''
443        Perform an HTTP POST request.
444       
445        Args:
446            url (str): The URL to POST.
447           
448            form_data (dict): A dictionary of form data to POST.
449           
450        Kwargs:
451            headers (dict): A dictionary describing any headers you would like
452            to add to the request. (eg. ``{'X-Test': 'testing'}``)
453
454            compression (bool): If ``True`` (default), try to use gzip
455            compression.
456
457            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
458            On HTTPError 503 check for Cloudflare challenge and solve
459        Returns:
460            An :class:`HttpResponse` object containing headers and other
461            meta-information about the page and the page content.
462        '''
463        if cloudflare is None:
464            cloudflare = self.cloudflare
465        return self._fetch(url, form_data, headers=headers,
466                           compression=compression, cloudflare=cloudflare)
467
468   
469    def http_HEAD(self, url, headers={}):
470        '''
471        Perform an HTTP HEAD request.
472       
473        Args:
474            url (str): The URL to GET.
475       
476        Kwargs:
477            headers (dict): A dictionary describing any headers you would like
478            to add to the request. (eg. ``{'X-Test': 'testing'}``)
479       
480        Returns:
481            An :class:`HttpResponse` object containing headers and other
482            meta-information about the page.
483        '''
484        req = HeadRequest(url)
485        req.add_header('User-Agent', self._user_agent)
486        req.add_header('Accept', self._accept)
487        for k, v in headers.items():
488            req.add_header(k, v)
489        response = urllib2.urlopen(req)
490        return HttpResponse(response)
491
492
493    def _fetch(self, url, form_data={}, headers={}, compression=True, cloudflare=None):
494        '''
495        Perform an HTTP GET or POST request.
496       
497        Args:
498            url (str): The URL to GET or POST.
499           
500            form_data (dict): A dictionary of form data to POST. If empty, the
501            request will be a GET, if it contains form data it will be a POST.
502           
503        Kwargs:
504            headers (dict): A dictionary describing any headers you would like
505            to add to the request. (eg. ``{'X-Test': 'testing'}``)
506
507            compression (bool): If ``True`` (default), try to use gzip
508            compression.
509
510            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
511            On HTTPError 503 check for Cloudflare challenge and solve
512        Returns:
513            An :class:`HttpResponse` object containing headers and other
514            meta-information about the page and the page content.
515        '''
516        if cloudflare is None:
517            cloudflare = self.cloudflare
518        encoding = ''
519        req = urllib2.Request(url)
520        if form_data:
521            form_data = urllib.urlencode(form_data)
522            req = urllib2.Request(url, form_data)
523        req.add_header('User-Agent', self._user_agent)
524        for k, v in headers.items():
525            req.add_header(k, v)
526        if compression:
527            req.add_header('Accept-Encoding', 'gzip')
528        if not cloudflare:
529            response = urllib2.urlopen(req)
530            return HttpResponse(response)
531        else:
532            try:
533                response = urllib2.urlopen(req)
534                return HttpResponse(response)
535            except urllib2.HTTPError as e:
536                if e.code == 503:
537                    try:
538                        self._set_cloudflare(url, e.read(), form_data, headers, compression)
539                    except:
540                        raise urllib2.HTTPError, e
541                    req = urllib2.Request(url)
542                    if form_data:
543                        form_data = urllib.urlencode(form_data)
544                        req = urllib2.Request(url, form_data)
545                    req.add_header('User-Agent', self._user_agent)
546                    for k, v in headers.items():
547                        req.add_header(k, v)
548                    if compression:
549                        req.add_header('Accept-Encoding', 'gzip')
550                    response = urllib2.urlopen(req)
551                    return HttpResponse(response)
552                else:
553                    raise urllib2.HTTPError, e
554
555
556class HttpResponse:
557    '''
558    This class represents a response from an HTTP request.
559   
560    The content is examined and every attempt is made to properly encode it to
561    Unicode.
562   
563    .. seealso::
564        :meth:`Net.http_GET`, :meth:`Net.http_HEAD` and :meth:`Net.http_POST`
565    '''
566   
567    content = ''
568    '''Unicode encoded string containing the body of the response.'''
569   
570   
571    def __init__(self, response):
572        '''
573        Args:
574            response (:class:`mimetools.Message`): The object returned by a call
575            to :func:`urllib2.urlopen`.
576        '''
577        self._response = response
578        html = response.read()
579        try:
580            if response.headers['content-encoding'].lower() == 'gzip':
581                html = gzip.GzipFile(fileobj=StringIO.StringIO(html)).read()
582        except:
583            pass
584       
585        try:
586            content_type = response.headers['content-type']
587            if 'charset=' in content_type:
588                encoding = content_type.split('charset=')[-1]
589        except:
590            pass
591
592        r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' +
593                      '\s+charset=(.+?)"', html, re.IGNORECASE)
594        if r:
595            encoding = r.group(1)
596                   
597        try:
598            html = unicode(html, encoding)
599        except:
600            pass
601       
602        #try:
603        #    if response.headers['content-encoding'].lower() == 'gzip':
604        #        r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' + '\s+charset=(.+?)"', html, re.IGNORECASE)
605        #        if r:
606        #               encoding = r.group(1)
607        #               try:
608        #                       html = unicode(html, encoding)
609        #               except:
610        #                       pass
611        #except:
612        #    pass
613           
614        self.content = html
615   
616   
617    def get_headers(self):
618        '''Returns a List of headers returned by the server.'''
619        return self._response.info().headers
620   
621       
622    def get_url(self):
623        '''
624        Return the URL of the resource retrieved, commonly used to determine if
625        a redirect was followed.
626        '''
627        return self._response.geturl()
Note: See TracBrowser for help on using the repository browser.