1 | '''
|
---|
2 | common XBMC Module
|
---|
3 | Copyright (C) 2011 t0mm0
|
---|
4 |
|
---|
5 | This program is free software: you can redistribute it and/or modify
|
---|
6 | it under the terms of the GNU General Public License as published by
|
---|
7 | the Free Software Foundation, either version 3 of the License, or
|
---|
8 | (at your option) any later version.
|
---|
9 |
|
---|
10 | This program is distributed in the hope that it will be useful,
|
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | GNU General Public License for more details.
|
---|
14 |
|
---|
15 | You should have received a copy of the GNU General Public License
|
---|
16 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
17 | '''
|
---|
18 |
|
---|
19 | import cookielib
|
---|
20 | import gzip
|
---|
21 | import re
|
---|
22 | import StringIO
|
---|
23 | import urllib
|
---|
24 | import urllib2
|
---|
25 | import socket
|
---|
26 | from urlparse import urlparse
|
---|
27 | from urlparse import urlunparse
|
---|
28 | import time
|
---|
29 |
|
---|
30 | class HeadRequest(urllib2.Request):
|
---|
31 | '''A Request class that sends HEAD requests'''
|
---|
32 | def get_method(self):
|
---|
33 | return 'HEAD'
|
---|
34 |
|
---|
35 | class Net:
|
---|
36 | '''
|
---|
37 | This class wraps :mod:`urllib2` and provides an easy way to make http
|
---|
38 | requests while taking care of cookies, proxies, gzip compression and
|
---|
39 | character encoding.
|
---|
40 |
|
---|
41 | Example::
|
---|
42 |
|
---|
43 | from addon.common.net import Net
|
---|
44 | net = Net()
|
---|
45 | response = net.http_GET('http://xbmc.org')
|
---|
46 | print response.content
|
---|
47 | '''
|
---|
48 | IE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko'
|
---|
49 | FF_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
|
---|
50 | IOS_USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25'
|
---|
51 | ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
---|
52 |
|
---|
53 | _cj = cookielib.MozillaCookieJar()
|
---|
54 |
|
---|
55 | _proxy = None
|
---|
56 | _user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'
|
---|
57 | _accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
---|
58 | _http_debug = False
|
---|
59 | _socket_timeout = 60
|
---|
60 |
|
---|
61 | def __init__(self, cookie_file='', proxy='', user_agent='',
|
---|
62 | http_debug=False, accept=_accept, socket_timeout=_socket_timeout, cloudflare=False):
|
---|
63 | '''
|
---|
64 | Kwargs:
|
---|
65 | cookie_file (str): Full path to a file to be used to load and save
|
---|
66 | cookies to.
|
---|
67 |
|
---|
68 | proxy (str): Proxy setting (eg.
|
---|
69 | ``'http://user:pass@example.com:1234'``)
|
---|
70 |
|
---|
71 | user_agent (str): String to use as the User Agent header. If not
|
---|
72 | supplied the class will use a default user agent (chrome)
|
---|
73 |
|
---|
74 | http_debug (bool): Set ``True`` to have HTTP header info written to
|
---|
75 | the XBMC log for all requests.
|
---|
76 |
|
---|
77 | accept (str) : String to use as HTTP Request Accept header.
|
---|
78 |
|
---|
79 | socket_timeout (int): time in seconds for socket connections to wait until time out
|
---|
80 |
|
---|
81 | cloudflare (bool): Set ``True`` to check all requests that raise HTTPError 503 for Cloudflare challenge and solve
|
---|
82 | This can be changed per request as well, see http_GET, http_PUSH
|
---|
83 | '''
|
---|
84 |
|
---|
85 | #Set socket timeout - Useful for slow connections
|
---|
86 | socket.setdefaulttimeout(socket_timeout)
|
---|
87 |
|
---|
88 | # empty jar for each instance rather than scope of the import
|
---|
89 | self._cloudflare_jar = cookielib.MozillaCookieJar()
|
---|
90 |
|
---|
91 | self.cloudflare = cloudflare
|
---|
92 | if cookie_file:
|
---|
93 | self.set_cookies(cookie_file)
|
---|
94 | if proxy:
|
---|
95 | self.set_proxy(proxy)
|
---|
96 | if user_agent:
|
---|
97 | self.set_user_agent(user_agent)
|
---|
98 | self._http_debug = http_debug
|
---|
99 | self._update_opener()
|
---|
100 |
|
---|
101 |
|
---|
102 | def set_cookies(self, cookie_file):
|
---|
103 | '''
|
---|
104 | Set the cookie file and try to load cookies from it if it exists.
|
---|
105 |
|
---|
106 | Args:
|
---|
107 | cookie_file (str): Full path to a file to be used to load and save
|
---|
108 | cookies to.
|
---|
109 | '''
|
---|
110 | try:
|
---|
111 | self._cj.load(cookie_file, ignore_discard=True)
|
---|
112 | self._update_opener()
|
---|
113 | return True
|
---|
114 | except:
|
---|
115 | return False
|
---|
116 |
|
---|
117 |
|
---|
118 | def get_cookies(self):
|
---|
119 | '''Returns A dictionary containing all cookie information by domain.'''
|
---|
120 | return self._cj._cookies
|
---|
121 |
|
---|
122 |
|
---|
123 | def save_cookies(self, cookie_file):
|
---|
124 | '''
|
---|
125 | Saves cookies to a file.
|
---|
126 |
|
---|
127 | Args:
|
---|
128 | cookie_file (str): Full path to a file to save cookies to.
|
---|
129 | '''
|
---|
130 | self._cj.save(cookie_file, ignore_discard=True)
|
---|
131 |
|
---|
132 |
|
---|
133 | def set_proxy(self, proxy):
|
---|
134 | '''
|
---|
135 | Args:
|
---|
136 | proxy (str): Proxy setting (eg.
|
---|
137 | ``'http://user:pass@example.com:1234'``)
|
---|
138 | '''
|
---|
139 | self._proxy = proxy
|
---|
140 | self._update_opener()
|
---|
141 |
|
---|
142 |
|
---|
143 | def get_proxy(self):
|
---|
144 | '''Returns string containing proxy details.'''
|
---|
145 | return self._proxy
|
---|
146 |
|
---|
147 |
|
---|
148 | def set_user_agent(self, user_agent):
|
---|
149 | '''
|
---|
150 | Args:
|
---|
151 | user_agent (str): String to use as the User Agent header.
|
---|
152 | '''
|
---|
153 | self._user_agent = user_agent
|
---|
154 |
|
---|
155 |
|
---|
156 | def get_user_agent(self):
|
---|
157 | '''Returns user agent string.'''
|
---|
158 | return self._user_agent
|
---|
159 |
|
---|
160 |
|
---|
161 | def _update_opener(self, cloudflare_jar=False):
|
---|
162 | """
|
---|
163 | Builds and installs a new opener to be used by all future calls to
|
---|
164 | :func:`urllib2.urlopen`.
|
---|
165 | """
|
---|
166 | if self._http_debug:
|
---|
167 | http = urllib2.HTTPHandler(debuglevel=1)
|
---|
168 | else:
|
---|
169 | http = urllib2.HTTPHandler()
|
---|
170 |
|
---|
171 | if cloudflare_jar:
|
---|
172 | self._cloudflare_jar = cookielib.MozillaCookieJar()
|
---|
173 | jar = self._cloudflare_jar
|
---|
174 | else:
|
---|
175 | jar = self._cj
|
---|
176 |
|
---|
177 | if self._proxy:
|
---|
178 | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
|
---|
179 | urllib2.ProxyHandler({'http':
|
---|
180 | self._proxy}),
|
---|
181 | urllib2.HTTPBasicAuthHandler(),
|
---|
182 | http)
|
---|
183 |
|
---|
184 | else:
|
---|
185 | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
|
---|
186 | urllib2.HTTPBasicAuthHandler(),
|
---|
187 | http)
|
---|
188 | urllib2.install_opener(opener)
|
---|
189 |
|
---|
190 |
|
---|
191 | def _parseJSString(self, s):
|
---|
192 | """
|
---|
193 | lambda
|
---|
194 | plugin.video.genesis\resources\lib\libraries\cloudflare.py
|
---|
195 | https://offshoregit.com/lambda81/
|
---|
196 | """
|
---|
197 | try:
|
---|
198 | offset=1 if s[0]=='+' else 0
|
---|
199 | val = int(eval(s.replace('!+[]','1').replace('!![]','1').replace('[]','0').replace('(','str(')[offset:]))
|
---|
200 | return val
|
---|
201 | except:
|
---|
202 | raise Exception
|
---|
203 |
|
---|
204 |
|
---|
205 | def _cloudflare_challenge(self, url, challenge, form_data={}, headers={}, compression=True):
|
---|
206 | """
|
---|
207 | Use _set_cloudflare to call this, not intended to be called directly.
|
---|
208 | Solve challenge and make request with cloudflare cookie jar
|
---|
209 |
|
---|
210 | Part from:
|
---|
211 | lambda
|
---|
212 | plugin.video.genesis\resources\lib\libraries\cloudflare.py
|
---|
213 | https://offshoregit.com/lambda81/
|
---|
214 | """
|
---|
215 | jschl = re.compile('name="jschl_vc" value="(.+?)"/>').findall(challenge)[0]
|
---|
216 | init = re.compile('setTimeout\(function\(\){\s*.*?.*:(.*?)};').findall(challenge)[0]
|
---|
217 | builder = re.compile(r"challenge-form\'\);\s*(.*)a.v").findall(challenge)[0]
|
---|
218 | decrypt_val = self._parseJSString(init)
|
---|
219 | lines = builder.split(';')
|
---|
220 |
|
---|
221 | for line in lines:
|
---|
222 | if len(line)>0 and '=' in line:
|
---|
223 | sections=line.split('=')
|
---|
224 | line_val = self._parseJSString(sections[1])
|
---|
225 | decrypt_val = int(eval(str(decrypt_val)+sections[0][-1]+str(line_val)))
|
---|
226 |
|
---|
227 | path = urlparse(url).path
|
---|
228 | netloc = urlparse(url).netloc
|
---|
229 | if not netloc:
|
---|
230 | netloc = path
|
---|
231 |
|
---|
232 | answer = decrypt_val + len(netloc)
|
---|
233 |
|
---|
234 | url = url.rstrip('/')
|
---|
235 | query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (url, jschl, answer)
|
---|
236 |
|
---|
237 | if 'type="hidden" name="pass"' in challenge:
|
---|
238 | passval = re.compile('name="pass" value="(.*?)"').findall(challenge)[0]
|
---|
239 | query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % \
|
---|
240 | (url, urllib.quote_plus(passval), jschl, answer)
|
---|
241 | time.sleep(9)
|
---|
242 |
|
---|
243 | self._update_opener(cloudflare_jar=True)
|
---|
244 | req = urllib2.Request(query)
|
---|
245 | if form_data:
|
---|
246 | form_data = urllib.urlencode(form_data)
|
---|
247 | req = urllib2.Request(query, form_data)
|
---|
248 | req.add_header('User-Agent', self._user_agent)
|
---|
249 | for k, v in headers.items():
|
---|
250 | req.add_header(k, v)
|
---|
251 | if compression:
|
---|
252 | req.add_header('Accept-Encoding', 'gzip')
|
---|
253 | try:
|
---|
254 | response = urllib2.urlopen(req)
|
---|
255 | except urllib2.HTTPError as e:
|
---|
256 | pass
|
---|
257 |
|
---|
258 |
|
---|
259 | def _set_cloudflare(self, url, challenge, form_data={}, headers={}, compression=True):
|
---|
260 | """
|
---|
261 | Entry Point for _cloudflare_challenge
|
---|
262 | Calls cloudflare_challenge on netloc, not full url w/ path
|
---|
263 | Puts any cloudflare cookies in the main cookie jar
|
---|
264 | Args:
|
---|
265 | url (str): The URL to site of potential Cloudflare IUA.
|
---|
266 |
|
---|
267 | challenge (str): html contents of the page that raised 503, containing potential Cloudflare IUA Challenge
|
---|
268 | Kwargs:
|
---|
269 | form_data (dict): A dictionary of form data if pass-through from POST.
|
---|
270 |
|
---|
271 | headers (dict): A dictionary describing any headers you would like
|
---|
272 | to add to the request. (eg. ``{'X-Test': 'testing'}``)
|
---|
273 |
|
---|
274 | compression (bool): If ``True`` (default), try to use gzip
|
---|
275 | compression.
|
---|
276 | """
|
---|
277 | netloc = urlparse(url).netloc
|
---|
278 | if not netloc:
|
---|
279 | netloc = urlparse(url).path
|
---|
280 | cloudflare_url = urlunparse((urlparse(url).scheme, netloc, '', '', '', ''))
|
---|
281 | try:
|
---|
282 | self._cloudflare_challenge(cloudflare_url, challenge, form_data, headers, compression)
|
---|
283 | for c in self._cloudflare_jar:
|
---|
284 | self._cj.set_cookie(c)
|
---|
285 | self._update_opener()
|
---|
286 | except:
|
---|
287 | # make sure we update to main jar
|
---|
288 | self._update_opener()
|
---|
289 | raise Exception
|
---|
290 |
|
---|
291 |
|
---|
292 | def url_with_headers(self, url, referer=None, user_agent=None, cookies=None, proxy=None, connection_timeout=None,
|
---|
293 | encoding='', accept_charset='', sslcipherlist='', noshout='false', seekable='1'):
|
---|
294 | '''
|
---|
295 | Return url with Referer, User-Agent, Cookies, Proxy, Connection-Timeout, Encoding, Accept-Charset,
|
---|
296 | SSLCipherList, NoShout and Seekable
|
---|
297 | Based on: https://github.com/xbmc/xbmc/blob/master/xbmc/filesystem/CurlFile.cpp#L782
|
---|
298 | Args:
|
---|
299 | url (str): The URL to append headers to.
|
---|
300 |
|
---|
301 | Kwargs:
|
---|
302 | referer (str): If None (default), urlunparse((urlparse(url).scheme, netloc, path, '', '', '')) is used and append if set
|
---|
303 |
|
---|
304 | user_agent (str): If None (default), self._user_agent is used and append if set
|
---|
305 |
|
---|
306 | cookies (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
|
---|
307 | Append cookies to URL as well
|
---|
308 |
|
---|
309 | proxy (str): If None (default), self.proxy is used and append if set
|
---|
310 |
|
---|
311 | connection_timeout (str): If None (default), self._socket_timeout is used and append if set
|
---|
312 |
|
---|
313 | encoding (str): append if set
|
---|
314 |
|
---|
315 | accept_charset (str): append if set
|
---|
316 |
|
---|
317 | sslcipherlist (str): append if set
|
---|
318 |
|
---|
319 | noshout (str): 'true'/'false', skip shout, append if 'true' ('false' is kodi default)
|
---|
320 |
|
---|
321 | seekable (str): '0'/'1', append if 0 ('1' is kodi default)
|
---|
322 | Returns:
|
---|
323 | http://example.com/myimage.png|Referer=%%%%%&User-Agent=%%%%%...
|
---|
324 | '''
|
---|
325 | kodi_schemes = ('special', 'plugin', 'script', 'profile')
|
---|
326 | if ('://' not in url) or (url.startswith(kodi_schemes)):
|
---|
327 | # don't waste time and return url
|
---|
328 | return url
|
---|
329 |
|
---|
330 | _tmp = re.search('(.+?)(?:\|.*|$)', url)
|
---|
331 | if _tmp:
|
---|
332 | # trim any headers that may already be attached to url
|
---|
333 | url = _tmp.group(1)
|
---|
334 |
|
---|
335 | if referer is not None:
|
---|
336 | try:
|
---|
337 | referer = str(referer)
|
---|
338 | except:
|
---|
339 | referer = None
|
---|
340 | if referer is None:
|
---|
341 | path = urlparse(url).path
|
---|
342 | netloc = urlparse(url).netloc
|
---|
343 | if not netloc:
|
---|
344 | netloc = path
|
---|
345 | path = ''
|
---|
346 | referer = urlunparse((urlparse(url).scheme, netloc, path, '', '', ''))
|
---|
347 | if referer == url:
|
---|
348 | index = path.rfind('/')
|
---|
349 | if index >= 0:
|
---|
350 | referer = urlunparse((urlparse(url).scheme, netloc, path[:index], '', '', ''))
|
---|
351 | if user_agent is None:
|
---|
352 | user_agent = self._user_agent
|
---|
353 | else:
|
---|
354 | try:
|
---|
355 | user_agent = str(user_agent)
|
---|
356 | except:
|
---|
357 | user_agent = self._user_agent
|
---|
358 | if cookies is None:
|
---|
359 | cookies = self.cloudflare
|
---|
360 | if proxy is None:
|
---|
361 | proxy = self._proxy
|
---|
362 | if connection_timeout is None:
|
---|
363 | connection_timeout = self._socket_timeout
|
---|
364 | try:
|
---|
365 | connection_timeout = str(connection_timeout)
|
---|
366 | except:
|
---|
367 | connection_timeout = None
|
---|
368 | try:
|
---|
369 | if str(seekable) != '0':
|
---|
370 | seekable = None
|
---|
371 | except:
|
---|
372 | seekable = None
|
---|
373 | try:
|
---|
374 | if str(noshout).lower() != 'true':
|
---|
375 | noshout = None
|
---|
376 | except:
|
---|
377 | noshout = None
|
---|
378 |
|
---|
379 | url += '|Referer=' + urllib.quote_plus(referer) + '&User-Agent=' + urllib.quote_plus(user_agent)
|
---|
380 | if proxy:
|
---|
381 | try:
|
---|
382 | url += '&HTTPProxy=' + urllib.quote_plus(str(proxy))
|
---|
383 | except:
|
---|
384 | pass
|
---|
385 | if connection_timeout:
|
---|
386 | url += '&Connection-Timeout=' + urllib.quote_plus(connection_timeout)
|
---|
387 | if encoding:
|
---|
388 | try:
|
---|
389 | url += '&Encoding=' + urllib.quote_plus(str(encoding))
|
---|
390 | except:
|
---|
391 | pass
|
---|
392 | if accept_charset:
|
---|
393 | try:
|
---|
394 | url += '&Accept-Charset=' + urllib.quote_plus(str(accept_charset))
|
---|
395 | except:
|
---|
396 | pass
|
---|
397 | if sslcipherlist:
|
---|
398 | try:
|
---|
399 | url += '&SSLCipherList=' + urllib.quote_plus(str(sslcipherlist))
|
---|
400 | except:
|
---|
401 | pass
|
---|
402 | if noshout:
|
---|
403 | url += '&NoShout=' + urllib.quote_plus(str(noshout).lower())
|
---|
404 | if seekable:
|
---|
405 | url += '&Seekable=' + urllib.quote_plus(str(seekable))
|
---|
406 | if cookies:
|
---|
407 | cookie_string = ''
|
---|
408 | for c in self._cj:
|
---|
409 | if c.domain and (c.domain.lstrip('.') in url):
|
---|
410 | cookie_string += '%s=%s;' % (c.name, c.value)
|
---|
411 | if cookie_string:
|
---|
412 | url += '&Cookie=' + urllib.quote_plus(cookie_string)
|
---|
413 | return url
|
---|
414 |
|
---|
415 |
|
---|
416 | def http_GET(self, url, headers={}, compression=True, cloudflare=None):
|
---|
417 | '''
|
---|
418 | Perform an HTTP GET request.
|
---|
419 |
|
---|
420 | Args:
|
---|
421 | url (str): The URL to GET.
|
---|
422 |
|
---|
423 | Kwargs:
|
---|
424 | headers (dict): A dictionary describing any headers you would like
|
---|
425 | to add to the request. (eg. ``{'X-Test': 'testing'}``)
|
---|
426 |
|
---|
427 | compression (bool): If ``True`` (default), try to use gzip
|
---|
428 | compression.
|
---|
429 |
|
---|
430 | cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
|
---|
431 | On HTTPError 503 check for Cloudflare challenge and solve
|
---|
432 | Returns:
|
---|
433 | An :class:`HttpResponse` object containing headers and other
|
---|
434 | meta-information about the page and the page content.
|
---|
435 | '''
|
---|
436 | if cloudflare is None:
|
---|
437 | cloudflare = self.cloudflare
|
---|
438 | return self._fetch(url, headers=headers, compression=compression, cloudflare=cloudflare)
|
---|
439 |
|
---|
440 |
|
---|
441 | def http_POST(self, url, form_data, headers={}, compression=True, cloudflare=None):
|
---|
442 | '''
|
---|
443 | Perform an HTTP POST request.
|
---|
444 |
|
---|
445 | Args:
|
---|
446 | url (str): The URL to POST.
|
---|
447 |
|
---|
448 | form_data (dict): A dictionary of form data to POST.
|
---|
449 |
|
---|
450 | Kwargs:
|
---|
451 | headers (dict): A dictionary describing any headers you would like
|
---|
452 | to add to the request. (eg. ``{'X-Test': 'testing'}``)
|
---|
453 |
|
---|
454 | compression (bool): If ``True`` (default), try to use gzip
|
---|
455 | compression.
|
---|
456 |
|
---|
457 | cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
|
---|
458 | On HTTPError 503 check for Cloudflare challenge and solve
|
---|
459 | Returns:
|
---|
460 | An :class:`HttpResponse` object containing headers and other
|
---|
461 | meta-information about the page and the page content.
|
---|
462 | '''
|
---|
463 | if cloudflare is None:
|
---|
464 | cloudflare = self.cloudflare
|
---|
465 | return self._fetch(url, form_data, headers=headers,
|
---|
466 | compression=compression, cloudflare=cloudflare)
|
---|
467 |
|
---|
468 |
|
---|
469 | def http_HEAD(self, url, headers={}):
|
---|
470 | '''
|
---|
471 | Perform an HTTP HEAD request.
|
---|
472 |
|
---|
473 | Args:
|
---|
474 | url (str): The URL to GET.
|
---|
475 |
|
---|
476 | Kwargs:
|
---|
477 | headers (dict): A dictionary describing any headers you would like
|
---|
478 | to add to the request. (eg. ``{'X-Test': 'testing'}``)
|
---|
479 |
|
---|
480 | Returns:
|
---|
481 | An :class:`HttpResponse` object containing headers and other
|
---|
482 | meta-information about the page.
|
---|
483 | '''
|
---|
484 | req = HeadRequest(url)
|
---|
485 | req.add_header('User-Agent', self._user_agent)
|
---|
486 | req.add_header('Accept', self._accept)
|
---|
487 | for k, v in headers.items():
|
---|
488 | req.add_header(k, v)
|
---|
489 | response = urllib2.urlopen(req)
|
---|
490 | return HttpResponse(response)
|
---|
491 |
|
---|
492 |
|
---|
493 | def _fetch(self, url, form_data={}, headers={}, compression=True, cloudflare=None):
|
---|
494 | '''
|
---|
495 | Perform an HTTP GET or POST request.
|
---|
496 |
|
---|
497 | Args:
|
---|
498 | url (str): The URL to GET or POST.
|
---|
499 |
|
---|
500 | form_data (dict): A dictionary of form data to POST. If empty, the
|
---|
501 | request will be a GET, if it contains form data it will be a POST.
|
---|
502 |
|
---|
503 | Kwargs:
|
---|
504 | headers (dict): A dictionary describing any headers you would like
|
---|
505 | to add to the request. (eg. ``{'X-Test': 'testing'}``)
|
---|
506 |
|
---|
507 | compression (bool): If ``True`` (default), try to use gzip
|
---|
508 | compression.
|
---|
509 |
|
---|
510 | cloudflare (bool): If ``None`` (default), use self.cloudflare as bool (False as default)
|
---|
511 | On HTTPError 503 check for Cloudflare challenge and solve
|
---|
512 | Returns:
|
---|
513 | An :class:`HttpResponse` object containing headers and other
|
---|
514 | meta-information about the page and the page content.
|
---|
515 | '''
|
---|
516 | if cloudflare is None:
|
---|
517 | cloudflare = self.cloudflare
|
---|
518 | encoding = ''
|
---|
519 | req = urllib2.Request(url)
|
---|
520 | if form_data:
|
---|
521 | form_data = urllib.urlencode(form_data)
|
---|
522 | req = urllib2.Request(url, form_data)
|
---|
523 | req.add_header('User-Agent', self._user_agent)
|
---|
524 | for k, v in headers.items():
|
---|
525 | req.add_header(k, v)
|
---|
526 | if compression:
|
---|
527 | req.add_header('Accept-Encoding', 'gzip')
|
---|
528 | if not cloudflare:
|
---|
529 | response = urllib2.urlopen(req)
|
---|
530 | return HttpResponse(response)
|
---|
531 | else:
|
---|
532 | try:
|
---|
533 | response = urllib2.urlopen(req)
|
---|
534 | return HttpResponse(response)
|
---|
535 | except urllib2.HTTPError as e:
|
---|
536 | if e.code == 503:
|
---|
537 | try:
|
---|
538 | self._set_cloudflare(url, e.read(), form_data, headers, compression)
|
---|
539 | except:
|
---|
540 | raise urllib2.HTTPError, e
|
---|
541 | req = urllib2.Request(url)
|
---|
542 | if form_data:
|
---|
543 | form_data = urllib.urlencode(form_data)
|
---|
544 | req = urllib2.Request(url, form_data)
|
---|
545 | req.add_header('User-Agent', self._user_agent)
|
---|
546 | for k, v in headers.items():
|
---|
547 | req.add_header(k, v)
|
---|
548 | if compression:
|
---|
549 | req.add_header('Accept-Encoding', 'gzip')
|
---|
550 | response = urllib2.urlopen(req)
|
---|
551 | return HttpResponse(response)
|
---|
552 | else:
|
---|
553 | raise urllib2.HTTPError, e
|
---|
554 |
|
---|
555 |
|
---|
556 | class HttpResponse:
|
---|
557 | '''
|
---|
558 | This class represents a response from an HTTP request.
|
---|
559 |
|
---|
560 | The content is examined and every attempt is made to properly encode it to
|
---|
561 | Unicode.
|
---|
562 |
|
---|
563 | .. seealso::
|
---|
564 | :meth:`Net.http_GET`, :meth:`Net.http_HEAD` and :meth:`Net.http_POST`
|
---|
565 | '''
|
---|
566 |
|
---|
567 | content = ''
|
---|
568 | '''Unicode encoded string containing the body of the response.'''
|
---|
569 |
|
---|
570 |
|
---|
571 | def __init__(self, response):
|
---|
572 | '''
|
---|
573 | Args:
|
---|
574 | response (:class:`mimetools.Message`): The object returned by a call
|
---|
575 | to :func:`urllib2.urlopen`.
|
---|
576 | '''
|
---|
577 | self._response = response
|
---|
578 | html = response.read()
|
---|
579 | try:
|
---|
580 | if response.headers['content-encoding'].lower() == 'gzip':
|
---|
581 | html = gzip.GzipFile(fileobj=StringIO.StringIO(html)).read()
|
---|
582 | except:
|
---|
583 | pass
|
---|
584 |
|
---|
585 | try:
|
---|
586 | content_type = response.headers['content-type']
|
---|
587 | if 'charset=' in content_type:
|
---|
588 | encoding = content_type.split('charset=')[-1]
|
---|
589 | except:
|
---|
590 | pass
|
---|
591 |
|
---|
592 | r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' +
|
---|
593 | '\s+charset=(.+?)"', html, re.IGNORECASE)
|
---|
594 | if r:
|
---|
595 | encoding = r.group(1)
|
---|
596 |
|
---|
597 | try:
|
---|
598 | html = unicode(html, encoding)
|
---|
599 | except:
|
---|
600 | pass
|
---|
601 |
|
---|
602 | #try:
|
---|
603 | # if response.headers['content-encoding'].lower() == 'gzip':
|
---|
604 | # r = re.search('<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);' + '\s+charset=(.+?)"', html, re.IGNORECASE)
|
---|
605 | # if r:
|
---|
606 | # encoding = r.group(1)
|
---|
607 | # try:
|
---|
608 | # html = unicode(html, encoding)
|
---|
609 | # except:
|
---|
610 | # pass
|
---|
611 | #except:
|
---|
612 | # pass
|
---|
613 |
|
---|
614 | self.content = html
|
---|
615 |
|
---|
616 |
|
---|
617 | def get_headers(self):
|
---|
618 | '''Returns a List of headers returned by the server.'''
|
---|
619 | return self._response.info().headers
|
---|
620 |
|
---|
621 |
|
---|
622 | def get_url(self):
|
---|
623 | '''
|
---|
624 | Return the URL of the resource retrieved, commonly used to determine if
|
---|
625 | a redirect was followed.
|
---|
626 | '''
|
---|
627 | return self._response.geturl() |
---|