Changeset 42561


Ignore:
Timestamp:
Jul 27, 2018, 10:34:00 PM (17 months ago)
Author:
obi
Message:

fix net py weneed cloudflare

File:
1 edited

Legend:

Unmodified
Added
Removed
  • titan/mediathek/localhoster/lib/net.py

    r42560 r42561  
    1616    along with this program.  If not, see <http://www.gnu.org/licenses/>.
    1717'''
     18
    1819import random
    1920import cookielib
     
    2425import urllib2
    2526import socket
     27from urlparse import urlparse
     28from urlparse import urlunparse
    2629import time
    27 #import kodi
    28 
    29 # Set Global timeout - Useful for slow connections and Putlocker.
    30 socket.setdefaulttimeout(10)
    3130
    3231BR_VERS = [
     
    4847    except: last_gen = 0
    4948#    if not kodi.get_setting('current_ua') or last_gen < (time.time() - (7 * 24 * 60 * 60)):
    50 #    if not last_gen < (time.time() - (7 * 24 * 60 * 60)):
    5149    index = random.randrange(len(RAND_UAS))
    5250    versions = {'win_ver': random.choice(WIN_VERS), 'feature': random.choice(FEATURES), 'br_ver': random.choice(BR_VERS[index])}
     
    5957    return user_agent
    6058
     59class HeadRequest(urllib2.Request):
     60    '''A Request class that sends HEAD requests'''
     61    def get_method(self):
     62        return 'HEAD'
     63
    6164class Net:
    6265    '''
    6366    This class wraps :mod:`urllib2` and provides an easy way to make http
    64     requests while taking care of cookies, proxies, gzip compression and
     67    requests while taking care of cookies, proxies, gzip compression and 
    6568    character encoding.
    66 
     69   
    6770    Example::
    68 
     71   
    6972        from addon.common.net import Net
    7073        net = Net()
     
    7275        print response.content
    7376    '''
    74 
    75     _cj = cookielib.LWPCookieJar()
     77    IE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko'
     78    FF_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
     79    IOS_USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25'
     80    ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
     81
     82    _cj = cookielib.MozillaCookieJar()
     83
    7684    _proxy = None
    77     _user_agent = 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
     85    _user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'
     86    _accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
    7887    _http_debug = False
    79 
    80     def __init__(self, cookie_file='', proxy='', user_agent='', http_debug=False):
     88    _socket_timeout = 60
     89
     90    def __init__(self, cookie_file='', proxy='', user_agent='',
     91                 http_debug=False, accept=_accept, socket_timeout=_socket_timeout, cloudflare=False):
    8192        '''
    8293        Kwargs:
    8394            cookie_file (str): Full path to a file to be used to load and save
    8495            cookies to.
    85 
    86             proxy (str): Proxy setting (eg.
     96           
     97            proxy (str): Proxy setting (eg. 
    8798            ``'http://user:pass@example.com:1234'``)
    88 
    89             user_agent (str): String to use as the User Agent header. If not
     99           
     100            user_agent (str): String to use as the User Agent header. If not 
    90101            supplied the class will use a default user agent (chrome)
    91 
     102           
    92103            http_debug (bool): Set ``True`` to have HTTP header info written to
    93104            the XBMC log for all requests.
    94         '''
     105           
     106            accept (str) : String to use as HTTP Request Accept header.
     107           
     108            socket_timeout (int): time in seconds for socket connections to wait until time out
     109
     110            cloudflare (bool): Set ``True`` to check all requests that raise HTTPError 503 for Cloudflare challenge and solve
     111            This can be changed per request as well, see http_GET, http_PUSH
     112        '''
     113   
     114        #Set socket timeout - Useful for slow connections
     115        socket.setdefaulttimeout(socket_timeout)
     116
     117        # empty jar for each instance rather than scope of the import
     118        self._cloudflare_jar = cookielib.MozillaCookieJar()
     119
     120        self.cloudflare = cloudflare
    95121        if cookie_file:
    96122            self.set_cookies(cookie_file)
     
    101127        self._http_debug = http_debug
    102128        self._update_opener()
    103 
     129       
     130   
    104131    def set_cookies(self, cookie_file):
    105132        '''
    106133        Set the cookie file and try to load cookies from it if it exists.
    107 
     134       
    108135        Args:
    109136            cookie_file (str): Full path to a file to be used to load and save
     
    116143        except:
    117144            return False
    118 
    119     def get_cookies(self, as_dict=False):
     145       
     146   
     147    def get_cookies(self):
    120148        '''Returns A dictionary containing all cookie information by domain.'''
    121         if as_dict:
    122             return dict((cookie.name, cookie.value) for cookie in self._cj)
    123         else:
    124             return self._cj._cookies
     149        return self._cj._cookies
     150
    125151
    126152    def save_cookies(self, cookie_file):
    127153        '''
    128154        Saves cookies to a file.
    129 
     155       
    130156        Args:
    131157            cookie_file (str): Full path to a file to save cookies to.
    132158        '''
    133         self._cj.save(cookie_file, ignore_discard=True)
    134 
     159        self._cj.save(cookie_file, ignore_discard=True)       
     160
     161       
    135162    def set_proxy(self, proxy):
    136163        '''
    137164        Args:
    138             proxy (str): Proxy setting (eg.
     165            proxy (str): Proxy setting (eg. 
    139166            ``'http://user:pass@example.com:1234'``)
    140167        '''
     
    142169        self._update_opener()
    143170
     171       
    144172    def get_proxy(self):
    145173        '''Returns string containing proxy details.'''
    146174        return self._proxy
    147 
     175       
     176       
    148177    def set_user_agent(self, user_agent):
    149178        '''
     
    153182        self._user_agent = user_agent
    154183
     184       
    155185    def get_user_agent(self):
    156186        '''Returns user agent string.'''
    157187        return self._user_agent
    158188
    159     def _update_opener(self):
    160         '''
     189
     190    def _update_opener(self, cloudflare_jar=False):
     191        """
    161192        Builds and installs a new opener to be used by all future calls to
    162193        :func:`urllib2.urlopen`.
    163         '''
     194        """
    164195        if self._http_debug:
    165196            http = urllib2.HTTPHandler(debuglevel=1)
     
    167198            http = urllib2.HTTPHandler()
    168199
     200        if cloudflare_jar:
     201            self._cloudflare_jar = cookielib.MozillaCookieJar()
     202            jar = self._cloudflare_jar
     203        else:
     204            jar = self._cj
     205
    169206        if self._proxy:
    170             opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cj),
     207            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
    171208                                          urllib2.ProxyHandler({'http':
    172209                                                                self._proxy}),
     
    175212
    176213        else:
    177             opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cj),
     214            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
    178215                                          urllib2.HTTPBasicAuthHandler(),
    179216                                          http)
    180217        urllib2.install_opener(opener)
    181218
    182     def http_GET(self, url, headers={}, compression=True):
     219
     220    def _parseJSString(self, s):
     221        """
     222        lambda
     223        plugin.video.genesis\resources\lib\libraries\cloudflare.py
     224        https://offshoregit.com/lambda81/
     225        """
     226        try:
     227            offset=1 if s[0]=='+' else 0
     228            val = int(eval(s.replace('!+[]','1').replace('!![]','1').replace('[]','0').replace('(','str(')[offset:]))
     229            return val
     230        except:
     231            raise Exception
     232
     233
     234    def _cloudflare_challenge(self, url, challenge, form_data={}, headers={}, compression=True):
     235        """
     236        Use _set_cloudflare to call this, not intended to be called directly.
     237        Solve challenge and make request with cloudflare cookie jar
     238
     239        Part from:
     240        lambda
     241        plugin.video.genesis\resources\lib\libraries\cloudflare.py
     242        https://offshoregit.com/lambda81/
     243        """
     244        jschl = re.compile('name="jschl_vc" value="(.+?)"/>').findall(challenge)[0]
     245        init = re.compile('setTimeout\(function\(\){\s*.*?.*:(.*?)};').findall(challenge)[0]
     246        builder = re.compile(r"challenge-form\'\);\s*(.*)a.v").findall(challenge)[0]
     247        decrypt_val = self._parseJSString(init)
     248        lines = builder.split(';')
     249
     250        for line in lines:
     251            if len(line)>0 and '=' in line:
     252                sections=line.split('=')
     253                line_val = self._parseJSString(sections[1])
     254                decrypt_val = int(eval(str(decrypt_val)+sections[0][-1]+str(line_val)))
     255
     256        path = urlparse(url).path
     257        netloc = urlparse(url).netloc
     258        if not netloc:
     259            netloc = path
     260
     261        answer = decrypt_val + len(netloc)
     262
     263        url = url.rstrip('/')
     264        query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (url, jschl, answer)
     265
     266        if 'type="hidden" name="pass"' in challenge:
     267            passval = re.compile('name="pass" value="(.*?)"').findall(challenge)[0]
     268            query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % \
     269                    (url, urllib.quote_plus(passval), jschl, answer)
     270            time.sleep(9)
     271
     272        self._update_opener(cloudflare_jar=True)
     273        req = urllib2.Request(query)
     274        if form_data:
     275            form_data = urllib.urlencode(form_data)
     276            req = urllib2.Request(query, form_data)
     277        req.add_header('User-Agent', self._user_agent)
     278        for k, v in headers.items():
     279            req.add_header(k, v)
     280        if compression:
     281            req.add_header('Accept-Encoding', 'gzip')
     282        try:
     283            response = urllib2.urlopen(req)
     284        except urllib2.HTTPError as e:
     285            pass
     286
     287
     288    def _set_cloudflare(self, url, challenge, form_data={}, headers={}, compression=True):
     289        """
     290        Entry Point for _cloudflare_challenge
     291        Calls cloudflare_challenge on netloc, not full url w/ path
     292        Puts any cloudflare cookies in the main cookie jar
     293        Args:
     294            url (str): The URL to site of potential Cloudflare IUA.
     295
     296            challenge (str): html contents of the page that raised 503, containing potential Cloudflare IUA Challenge
     297        Kwargs:
     298            form_data (dict): A dictionary of form data if pass-through from POST.
     299
     300            headers (dict): A dictionary describing any headers you would like
     301            to add to the request. (eg. ``{'X-Test': 'testing'}``)
     302
     303            compression (bool): If ``True`` (default), try to use gzip
     304            compression.
     305        """
     306        netloc = urlparse(url).netloc
     307        if not netloc:
     308            netloc = urlparse(url).path
     309        cloudflare_url = urlunparse((urlparse(url).scheme, netloc, '', '', '', ''))
     310        try:
     311            self._cloudflare_challenge(cloudflare_url, challenge, form_data, headers, compression)
     312            for c in self._cloudflare_jar:
     313                self._cj.set_cookie(c)
     314            self._update_opener()
     315        except:
     316            # make sure we update to main jar
     317            self._update_opener()
     318            raise Exception
     319
     320
     321    def url_with_headers(self, url, referer=None, user_agent=None, cookies=None, proxy=None, connection_timeout=None,
     322                         encoding='', accept_charset='', sslcipherlist='', noshout='false', seekable='1'):
     323        '''
     324        Return url with Referer, User-Agent, Cookies, Proxy, Connection-Timeout, Encoding, Accept-Charset,
     325        SSLCipherList, NoShout and Seekable
     326        Based on: https://github.com/xbmc/xbmc/blob/master/xbmc/filesystem/CurlFile.cpp#L782
     327        Args:
     328            url (str): The URL to append headers to.
     329
     330        Kwargs:
     331            referer (str): If None (default), urlunparse((urlparse(url).scheme, netloc, path, '', '', '')) is used and append if set
     332
     333            user_agent (str): If None (default), self._user_agent is used and append if set
     334
     335            cookies (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
     336            Append cookies to URL as well
     337
     338            proxy (str): If None (default), self.proxy is used and append if set
     339
     340            connection_timeout (str): If None (default), self._socket_timeout is used and append if set
     341
     342            encoding (str): append if set
     343
     344            accept_charset (str): append if set
     345
     346            sslcipherlist (str): append if set
     347
     348            noshout (str): 'true'/'false', skip shout, append if 'true' ('false' is kodi default)
     349
     350            seekable (str): '0'/'1', append if 0 ('1' is kodi default)
     351        Returns:
     352            http://example.com/myimage.png|Referer=%%%%%&User-Agent=%%%%%...
     353        '''
     354        kodi_schemes = ('special', 'plugin', 'script', 'profile')
     355        if ('://' not in url) or (url.startswith(kodi_schemes)):
     356            # don't waste time and return url
     357            return url
     358
     359        _tmp = re.search('(.+?)(?:\|.*|$)', url)
     360        if _tmp:
     361            # trim any headers that may already be attached to url
     362            url = _tmp.group(1)
     363
     364        if referer is not None:
     365            try:
     366                referer = str(referer)
     367            except:
     368                referer = None
     369        if referer is None:
     370            path = urlparse(url).path
     371            netloc = urlparse(url).netloc
     372            if not netloc:
     373                netloc = path
     374                path = ''
     375            referer = urlunparse((urlparse(url).scheme, netloc, path, '', '', ''))
     376            if referer == url:
     377                index = path.rfind('/')
     378                if index >= 0:
     379                    referer = urlunparse((urlparse(url).scheme, netloc, path[:index], '', '', ''))
     380        if user_agent is None:
     381            user_agent = self._user_agent
     382        else:
     383            try:
     384                user_agent = str(user_agent)
     385            except:
     386                user_agent = self._user_agent
     387        if cookies is None:
     388            cookies = self.cloudflare
     389        if proxy is None:
     390            proxy = self._proxy
     391        if connection_timeout is None:
     392            connection_timeout = self._socket_timeout
     393        try:
     394            connection_timeout = str(connection_timeout)
     395        except:
     396            connection_timeout = None
     397        try:
     398            if str(seekable) != '0':
     399                seekable = None
     400        except:
     401            seekable = None
     402        try:
     403            if str(noshout).lower() != 'true':
     404                noshout = None
     405        except:
     406            noshout = None
     407
     408        url += '|Referer=' + urllib.quote_plus(referer) + '&User-Agent=' + urllib.quote_plus(user_agent)
     409        if proxy:
     410            try:
     411                url += '&HTTPProxy=' + urllib.quote_plus(str(proxy))
     412            except:
     413                pass
     414        if connection_timeout:
     415            url += '&Connection-Timeout=' + urllib.quote_plus(connection_timeout)
     416        if encoding:
     417            try:
     418                url += '&Encoding=' + urllib.quote_plus(str(encoding))
     419            except:
     420                pass
     421        if accept_charset:
     422            try:
     423                url += '&Accept-Charset=' + urllib.quote_plus(str(accept_charset))
     424            except:
     425                pass
     426        if sslcipherlist:
     427            try:
     428                url += '&SSLCipherList=' + urllib.quote_plus(str(sslcipherlist))
     429            except:
     430                pass
     431        if noshout:
     432            url += '&NoShout=' + urllib.quote_plus(str(noshout).lower())
     433        if seekable:
     434            url += '&Seekable=' + urllib.quote_plus(str(seekable))
     435        if cookies:
     436            cookie_string = ''
     437            for c in self._cj:
     438                if c.domain and (c.domain.lstrip('.') in url):
     439                    cookie_string += '%s=%s;' % (c.name, c.value)
     440            if cookie_string:
     441                url += '&Cookie=' + urllib.quote_plus(cookie_string)
     442        return url
     443
     444
     445    def http_GET(self, url, headers={}, compression=True, cloudflare=None):
    183446        '''
    184447        Perform an HTTP GET request.
    185 
     448       
    186449        Args:
    187450            url (str): The URL to GET.
    188 
     451           
    189452        Kwargs:
    190453            headers (dict): A dictionary describing any headers you would like
    191454            to add to the request. (eg. ``{'X-Test': 'testing'}``)
    192455
    193             compression (bool): If ``True`` (default), try to use gzip
     456            compression (bool): If ``True`` (default), try to use gzip 
    194457            compression.
    195458
     459            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
     460            On HTTPError 503 check for Cloudflare challenge and solve
    196461        Returns:
    197             An :class:`HttpResponse` object containing headers and other
     462            An :class:`HttpResponse` object containing headers and other 
    198463            meta-information about the page and the page content.
    199464        '''
    200         return self._fetch(url, headers=headers, compression=compression)
    201 
    202     def http_POST(self, url, form_data, headers={}, compression=True):
     465        if cloudflare is None:
     466            cloudflare = self.cloudflare
     467        return self._fetch(url, headers=headers, compression=compression, cloudflare=cloudflare)
     468       
     469
     470    def http_POST(self, url, form_data, headers={}, compression=True, cloudflare=None):
    203471        '''
    204472        Perform an HTTP POST request.
    205 
     473       
    206474        Args:
    207475            url (str): The URL to POST.
    208 
     476           
    209477            form_data (dict): A dictionary of form data to POST.
    210 
     478           
    211479        Kwargs:
    212480            headers (dict): A dictionary describing any headers you would like
    213481            to add to the request. (eg. ``{'X-Test': 'testing'}``)
    214482
    215             compression (bool): If ``True`` (default), try to use gzip
     483            compression (bool): If ``True`` (default), try to use gzip 
    216484            compression.
    217485
     486            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
     487            On HTTPError 503 check for Cloudflare challenge and solve
    218488        Returns:
    219             An :class:`HttpResponse` object containing headers and other
     489            An :class:`HttpResponse` object containing headers and other 
    220490            meta-information about the page and the page content.
    221491        '''
    222         return self._fetch(url, form_data, headers=headers, compression=compression)
    223 
     492        if cloudflare is None:
     493            cloudflare = self.cloudflare
     494        return self._fetch(url, form_data, headers=headers,
     495                           compression=compression, cloudflare=cloudflare)
     496
     497   
    224498    def http_HEAD(self, url, headers={}):
    225499        '''
    226500        Perform an HTTP HEAD request.
    227 
     501       
    228502        Args:
    229503            url (str): The URL to GET.
    230 
     504       
    231505        Kwargs:
    232506            headers (dict): A dictionary describing any headers you would like
    233507            to add to the request. (eg. ``{'X-Test': 'testing'}``)
    234 
     508       
    235509        Returns:
    236             An :class:`HttpResponse` object containing headers and other
     510            An :class:`HttpResponse` object containing headers and other 
    237511            meta-information about the page.
    238512        '''
    239         request = urllib2.Request(url)
    240         request.get_method = lambda: 'HEAD'
    241         request.add_header('User-Agent', self._user_agent)
    242         for key in headers:
    243             request.add_header(key, headers[key])
    244         response = urllib2.urlopen(request)
     513        req = HeadRequest(url)
     514        req.add_header('User-Agent', self._user_agent)
     515        req.add_header('Accept', self._accept)
     516        for k, v in headers.items():
     517            req.add_header(k, v)
     518        response = urllib2.urlopen(req)
    245519        return HttpResponse(response)
    246520
    247     def _fetch(self, url, form_data={}, headers={}, compression=True):
     521
     522    def _fetch(self, url, form_data={}, headers={}, compression=True, cloudflare=None):
    248523        '''
    249524        Perform an HTTP GET or POST request.
    250 
     525       
    251526        Args:
    252527            url (str): The URL to GET or POST.
    253 
    254             form_data (dict): A dictionary of form data to POST. If empty, the
     528           
     529            form_data (dict): A dictionary of form data to POST. If empty, the 
    255530            request will be a GET, if it contains form data it will be a POST.
    256 
     531           
    257532        Kwargs:
    258533            headers (dict): A dictionary describing any headers you would like
    259534            to add to the request. (eg. ``{'X-Test': 'testing'}``)
    260535
    261             compression (bool): If ``True`` (default), try to use gzip
     536            compression (bool): If ``True`` (default), try to use gzip 
    262537            compression.
    263538
     539            cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
     540            On HTTPError 503 check for Cloudflare challenge and solve
    264541        Returns:
    265             An :class:`HttpResponse` object containing headers and other
     542            An :class:`HttpResponse` object containing headers and other 
    266543            meta-information about the page and the page content.
    267544        '''
     545        if cloudflare is None:
     546            cloudflare = self.cloudflare
     547        encoding = ''
    268548        req = urllib2.Request(url)
    269549        if form_data:
    270             if isinstance(form_data, basestring):
    271                 form_data = form_data
    272             else:
    273                 form_data = urllib.urlencode(form_data, True)
     550            form_data = urllib.urlencode(form_data)
    274551            req = urllib2.Request(url, form_data)
    275552        req.add_header('User-Agent', self._user_agent)
    276         for key in headers:
    277             req.add_header(key, headers[key])
     553        for k, v in headers.items():
     554            req.add_header(k, v)
    278555        if compression:
    279556            req.add_header('Accept-Encoding', 'gzip')
    280         req.add_unredirected_header('Host', req.get_host())
    281         response = urllib2.urlopen(req)
    282         return HttpResponse(response)
     557        if not cloudflare:
     558            response = urllib2.urlopen(req)
     559            return HttpResponse(response)
     560        else:
     561            try:
     562                response = urllib2.urlopen(req)
     563                return HttpResponse(response)
     564            except urllib2.HTTPError as e:
     565                if e.code == 503:
     566                    try:
     567                        self._set_cloudflare(url, e.read(), form_data, headers, compression)
     568                    except:
     569                        raise urllib2.HTTPError, e
     570                    req = urllib2.Request(url)
     571                    if form_data:
     572                        form_data = urllib.urlencode(form_data)
     573                        req = urllib2.Request(url, form_data)
     574                    req.add_header('User-Agent', self._user_agent)
     575                    for k, v in headers.items():
     576                        req.add_header(k, v)
     577                    if compression:
     578                        req.add_header('Accept-Encoding', 'gzip')
     579                    response = urllib2.urlopen(req)
     580                    return HttpResponse(response)
     581                else:
     582                    raise urllib2.HTTPError, e
     583
    283584
    284585class HttpResponse:
    285586    '''
    286     This class represents a resoponse from an HTTP request.
    287 
     587    This class represents a response from an HTTP request.
     588   
    288589    The content is examined and every attempt is made to properly encode it to
    289590    Unicode.
    290 
     591   
    291592    .. seealso::
    292         :meth:`Net.http_GET`, :meth:`Net.http_HEAD` and :meth:`Net.http_POST`
     593        :meth:`Net.http_GET`, :meth:`Net.http_HEAD` and :meth:`Net.http_POST` 
    293594    '''
    294 
     595   
    295596    content = ''
    296     '''Unicode encoded string containing the body of the reposne.'''
    297 
     597    '''Unicode encoded string containing the body of the response.'''
     598   
     599   
    298600    def __init__(self, response):
    299601        '''
     
    303605        '''
    304606        self._response = response
    305 
    306     @property
    307     def content(self):
    308         html = self._response.read()
    309         encoding = None
    310         try:
    311             if self._response.headers['content-encoding'].lower() == 'gzip':
     607        html = response.read()
     608        try:
     609            if response.headers['content-encoding'].lower() == 'gzip':
    312610                html = gzip.GzipFile(fileobj=StringIO.StringIO(html)).read()
    313611        except:
    314612            pass
    315 
    316         try:
    317             content_type = self._response.headers['content-type']
     613       
     614        try:
     615            content_type = response.headers['content-type']
    318616            if 'charset=' in content_type:
    319617                encoding = content_type.split('charset=')[-1]
     
    321619            pass
    322620
    323         r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);\s+charset=(.+?)"', html, re.IGNORECASE)
     621        r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' +
     622                      '\s+charset=(.+?)"', html, re.IGNORECASE)
    324623        if r:
    325             encoding = r.group(1)
    326        
    327         if encoding is not None:
    328             try: html = html.decode(encoding)
    329             except: pass
    330         return html
    331        
    332     def get_headers(self, as_dict=False):
    333         '''Returns headers returned by the server.
    334         If as_dict is True, headers are returned as a dictionary otherwise a list'''
    335         if as_dict:
    336             return dict([(item[0].title(), item[1]) for item in self._response.info().items()])
    337         else:
    338             return self._response.info().headers
    339 
     624            encoding = r.group(1)
     625                   
     626        try:
     627            html = unicode(html, encoding)
     628        except:
     629            pass
     630       
     631        #try:
     632        #    if response.headers['content-encoding'].lower() == 'gzip':
     633        #        r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' + '\s+charset=(.+?)"', html, re.IGNORECASE)
     634        #        if r:
     635        #               encoding = r.group(1)
     636        #               try:
     637        #                       html = unicode(html, encoding)
     638        #               except:
     639        #                       pass
     640        #except:
     641        #    pass
     642           
     643        self.content = html
     644   
     645   
     646    def get_headers(self):
     647        '''Returns a List of headers returned by the server.'''
     648        return self._response.info().headers
     649   
     650       
    340651    def get_url(self):
    341652        '''
    342         Return the URL of the resource retrieved, commonly used to determine if
     653        Return the URL of the resource retrieved, commonly used to determine if 
    343654        a redirect was followed.
    344655        '''
Note: See TracChangeset for help on using the changeset viewer.