source: titan/mediathek/localhoster/lib/python2.7/httplib.py @ 40114

Last change on this file since 40114 was 40094, checked in by obi, 7 years ago

tithek add yoztube-dl support

File size: 50.0 KB
RevLine 
[40094]1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |
24      | response = getresponse()
25      v
26    Unread-response   [Response-headers-read]
27      |\____________________
28      |                     |
29      | response.read()     | putrequest()
30      v                     v
31    Idle                  Req-started-unread-response
32                     ______/|
33                   /        |
34   response.read() |        | ( putheader() )*  endheaders()
35                   v        v
36       Request-started    Req-sent-unread-response
37                            |
38                            | response.read()
39                            v
40                          Request-sent
41
42This diagram presents the following rules:
43  -- a second request may not be started until {response-headers-read}
44  -- a response [object] cannot be retrieved until {request-sent}
45  -- there is no differentiation between an unread response body and a
46     partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49      HTTPResponse class does not enforce this state machine, which
50      implies sophisticated clients may accelerate the request/response
51      pipeline. Caution should be taken, though: accelerating the states
52      beyond the above pattern may imply knowledge of the server's
53      connection-close behavior for certain requests. For example, it
54      is impossible to tell whether the server will close the connection
55      UNTIL the response headers have been read; this means that further
56      requests cannot be placed into the pipeline until it is known that
57      the server will NOT be closing the connection.
58
59Logical State                  __state            __response
60-------------                  -------            ----------
61Idle                           _CS_IDLE           None
62Request-started                _CS_REQ_STARTED    None
63Request-sent                   _CS_REQ_SENT       None
64Unread-response                _CS_IDLE           <response_class>
65Req-started-unread-response    _CS_REQ_STARTED    <response_class>
66Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67"""
68
69from array import array
70import os
71import re
72import socket
73from sys import py3kwarning
74from urlparse import urlsplit
75import warnings
76with warnings.catch_warnings():
77    if py3kwarning:
78        warnings.filterwarnings("ignore", ".*mimetools has been removed",
79                                DeprecationWarning)
80    import mimetools
81
82try:
83    from cStringIO import StringIO
84except ImportError:
85    from StringIO import StringIO
86
87__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
88           "HTTPException", "NotConnected", "UnknownProtocol",
89           "UnknownTransferEncoding", "UnimplementedFileMode",
90           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
91           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
92           "BadStatusLine", "error", "responses"]
93
94HTTP_PORT = 80
95HTTPS_PORT = 443
96
97_UNKNOWN = 'UNKNOWN'
98
99# connection states
100_CS_IDLE = 'Idle'
101_CS_REQ_STARTED = 'Request-started'
102_CS_REQ_SENT = 'Request-sent'
103
104# status codes
105# informational
106CONTINUE = 100
107SWITCHING_PROTOCOLS = 101
108PROCESSING = 102
109
110# successful
111OK = 200
112CREATED = 201
113ACCEPTED = 202
114NON_AUTHORITATIVE_INFORMATION = 203
115NO_CONTENT = 204
116RESET_CONTENT = 205
117PARTIAL_CONTENT = 206
118MULTI_STATUS = 207
119IM_USED = 226
120
121# redirection
122MULTIPLE_CHOICES = 300
123MOVED_PERMANENTLY = 301
124FOUND = 302
125SEE_OTHER = 303
126NOT_MODIFIED = 304
127USE_PROXY = 305
128TEMPORARY_REDIRECT = 307
129
130# client error
131BAD_REQUEST = 400
132UNAUTHORIZED = 401
133PAYMENT_REQUIRED = 402
134FORBIDDEN = 403
135NOT_FOUND = 404
136METHOD_NOT_ALLOWED = 405
137NOT_ACCEPTABLE = 406
138PROXY_AUTHENTICATION_REQUIRED = 407
139REQUEST_TIMEOUT = 408
140CONFLICT = 409
141GONE = 410
142LENGTH_REQUIRED = 411
143PRECONDITION_FAILED = 412
144REQUEST_ENTITY_TOO_LARGE = 413
145REQUEST_URI_TOO_LONG = 414
146UNSUPPORTED_MEDIA_TYPE = 415
147REQUESTED_RANGE_NOT_SATISFIABLE = 416
148EXPECTATION_FAILED = 417
149UNPROCESSABLE_ENTITY = 422
150LOCKED = 423
151FAILED_DEPENDENCY = 424
152UPGRADE_REQUIRED = 426
153
154# server error
155INTERNAL_SERVER_ERROR = 500
156NOT_IMPLEMENTED = 501
157BAD_GATEWAY = 502
158SERVICE_UNAVAILABLE = 503
159GATEWAY_TIMEOUT = 504
160HTTP_VERSION_NOT_SUPPORTED = 505
161INSUFFICIENT_STORAGE = 507
162NOT_EXTENDED = 510
163
164# Mapping status codes to official W3C names
165responses = {
166    100: 'Continue',
167    101: 'Switching Protocols',
168
169    200: 'OK',
170    201: 'Created',
171    202: 'Accepted',
172    203: 'Non-Authoritative Information',
173    204: 'No Content',
174    205: 'Reset Content',
175    206: 'Partial Content',
176
177    300: 'Multiple Choices',
178    301: 'Moved Permanently',
179    302: 'Found',
180    303: 'See Other',
181    304: 'Not Modified',
182    305: 'Use Proxy',
183    306: '(Unused)',
184    307: 'Temporary Redirect',
185
186    400: 'Bad Request',
187    401: 'Unauthorized',
188    402: 'Payment Required',
189    403: 'Forbidden',
190    404: 'Not Found',
191    405: 'Method Not Allowed',
192    406: 'Not Acceptable',
193    407: 'Proxy Authentication Required',
194    408: 'Request Timeout',
195    409: 'Conflict',
196    410: 'Gone',
197    411: 'Length Required',
198    412: 'Precondition Failed',
199    413: 'Request Entity Too Large',
200    414: 'Request-URI Too Long',
201    415: 'Unsupported Media Type',
202    416: 'Requested Range Not Satisfiable',
203    417: 'Expectation Failed',
204
205    500: 'Internal Server Error',
206    501: 'Not Implemented',
207    502: 'Bad Gateway',
208    503: 'Service Unavailable',
209    504: 'Gateway Timeout',
210    505: 'HTTP Version Not Supported',
211}
212
213# maximal amount of data to read at one time in _safe_read
214MAXAMOUNT = 1048576
215
216# maximal line length when calling readline().
217_MAXLINE = 65536
218
219# maximum amount of headers accepted
220_MAXHEADERS = 100
221
222# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
223#
224# VCHAR          = %x21-7E
225# obs-text       = %x80-FF
226# header-field   = field-name ":" OWS field-value OWS
227# field-name     = token
228# field-value    = *( field-content / obs-fold )
229# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
230# field-vchar    = VCHAR / obs-text
231#
232# obs-fold       = CRLF 1*( SP / HTAB )
233#                ; obsolete line folding
234#                ; see Section 3.2.4
235
236# token          = 1*tchar
237#
238# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
239#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
240#                / DIGIT / ALPHA
241#                ; any VCHAR, except delimiters
242#
243# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
244
245# the patterns for both name and value are more leniant than RFC
246# definitions to allow for backwards compatibility
247_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
248_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
249
250# We always set the Content-Length header for these methods because some
251# servers will otherwise respond with a 411
252_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
253
254
255class HTTPMessage(mimetools.Message):
256
257    def addheader(self, key, value):
258        """Add header for field key handling repeats."""
259        prev = self.dict.get(key)
260        if prev is None:
261            self.dict[key] = value
262        else:
263            combined = ", ".join((prev, value))
264            self.dict[key] = combined
265
266    def addcontinue(self, key, more):
267        """Add more field data from a continuation line."""
268        prev = self.dict[key]
269        self.dict[key] = prev + "\n " + more
270
271    def readheaders(self):
272        """Read header lines.
273
274        Read header lines up to the entirely blank line that terminates them.
275        The (normally blank) line that ends the headers is skipped, but not
276        included in the returned list.  If a non-header line ends the headers,
277        (which is an error), an attempt is made to backspace over it; it is
278        never included in the returned list.
279
280        The variable self.status is set to the empty string if all went well,
281        otherwise it is an error message.  The variable self.headers is a
282        completely uninterpreted list of lines contained in the header (so
283        printing them will reproduce the header exactly as it appears in the
284        file).
285
286        If multiple header fields with the same name occur, they are combined
287        according to the rules in RFC 2616 sec 4.2:
288
289        Appending each subsequent field-value to the first, each separated
290        by a comma. The order in which header fields with the same field-name
291        are received is significant to the interpretation of the combined
292        field value.
293        """
294        # XXX The implementation overrides the readheaders() method of
295        # rfc822.Message.  The base class design isn't amenable to
296        # customized behavior here so the method here is a copy of the
297        # base class code with a few small changes.
298
299        self.dict = {}
300        self.unixfrom = ''
301        self.headers = hlist = []
302        self.status = ''
303        headerseen = ""
304        firstline = 1
305        startofline = unread = tell = None
306        if hasattr(self.fp, 'unread'):
307            unread = self.fp.unread
308        elif self.seekable:
309            tell = self.fp.tell
310        while True:
311            if len(hlist) > _MAXHEADERS:
312                raise HTTPException("got more than %d headers" % _MAXHEADERS)
313            if tell:
314                try:
315                    startofline = tell()
316                except IOError:
317                    startofline = tell = None
318                    self.seekable = 0
319            line = self.fp.readline(_MAXLINE + 1)
320            if len(line) > _MAXLINE:
321                raise LineTooLong("header line")
322            if not line:
323                self.status = 'EOF in headers'
324                break
325            # Skip unix From name time lines
326            if firstline and line.startswith('From '):
327                self.unixfrom = self.unixfrom + line
328                continue
329            firstline = 0
330            if headerseen and line[0] in ' \t':
331                # XXX Not sure if continuation lines are handled properly
332                # for http and/or for repeating headers
333                # It's a continuation line.
334                hlist.append(line)
335                self.addcontinue(headerseen, line.strip())
336                continue
337            elif self.iscomment(line):
338                # It's a comment.  Ignore it.
339                continue
340            elif self.islast(line):
341                # Note! No pushback here!  The delimiter line gets eaten.
342                break
343            headerseen = self.isheader(line)
344            if headerseen:
345                # It's a legal header line, save it.
346                hlist.append(line)
347                self.addheader(headerseen, line[len(headerseen)+1:].strip())
348                continue
349            elif headerseen is not None:
350                # An empty header name. These aren't allowed in HTTP, but it's
351                # probably a benign mistake. Don't add the header, just keep
352                # going.
353                continue
354            else:
355                # It's not a header line; throw it back and stop here.
356                if not self.dict:
357                    self.status = 'No headers'
358                else:
359                    self.status = 'Non-header line where header expected'
360                # Try to undo the read.
361                if unread:
362                    unread(line)
363                elif tell:
364                    self.fp.seek(startofline)
365                else:
366                    self.status = self.status + '; bad seek'
367                break
368
369class HTTPResponse:
370
371    # strict: If true, raise BadStatusLine if the status line can't be
372    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
373    # false because it prevents clients from talking to HTTP/0.9
374    # servers.  Note that a response with a sufficiently corrupted
375    # status line will look like an HTTP/0.9 response.
376
377    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
378
379    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
380        if buffering:
381            # The caller won't be using any sock.recv() calls, so buffering
382            # is fine and recommended for performance.
383            self.fp = sock.makefile('rb')
384        else:
385            # The buffer size is specified as zero, because the headers of
386            # the response are read with readline().  If the reads were
387            # buffered the readline() calls could consume some of the
388            # response, which make be read via a recv() on the underlying
389            # socket.
390            self.fp = sock.makefile('rb', 0)
391        self.debuglevel = debuglevel
392        self.strict = strict
393        self._method = method
394
395        self.msg = None
396
397        # from the Status-Line of the response
398        self.version = _UNKNOWN # HTTP-Version
399        self.status = _UNKNOWN  # Status-Code
400        self.reason = _UNKNOWN  # Reason-Phrase
401
402        self.chunked = _UNKNOWN         # is "chunked" being used?
403        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
404        self.length = _UNKNOWN          # number of bytes left in response
405        self.will_close = _UNKNOWN      # conn will close at end of response
406
407    def _read_status(self):
408        # Initialize with Simple-Response defaults
409        line = self.fp.readline(_MAXLINE + 1)
410        if len(line) > _MAXLINE:
411            raise LineTooLong("header line")
412        if self.debuglevel > 0:
413            print "reply:", repr(line)
414        if not line:
415            # Presumably, the server closed the connection before
416            # sending a valid response.
417            raise BadStatusLine(line)
418        try:
419            [version, status, reason] = line.split(None, 2)
420        except ValueError:
421            try:
422                [version, status] = line.split(None, 1)
423                reason = ""
424            except ValueError:
425                # empty version will cause next test to fail and status
426                # will be treated as 0.9 response.
427                version = ""
428        if not version.startswith('HTTP/'):
429            if self.strict:
430                self.close()
431                raise BadStatusLine(line)
432            else:
433                # assume it's a Simple-Response from an 0.9 server
434                self.fp = LineAndFileWrapper(line, self.fp)
435                return "HTTP/0.9", 200, ""
436
437        # The status code is a three-digit number
438        try:
439            status = int(status)
440            if status < 100 or status > 999:
441                raise BadStatusLine(line)
442        except ValueError:
443            raise BadStatusLine(line)
444        return version, status, reason
445
446    def begin(self):
447        if self.msg is not None:
448            # we've already started reading the response
449            return
450
451        # read until we get a non-100 response
452        while True:
453            version, status, reason = self._read_status()
454            if status != CONTINUE:
455                break
456            # skip the header from the 100 response
457            while True:
458                skip = self.fp.readline(_MAXLINE + 1)
459                if len(skip) > _MAXLINE:
460                    raise LineTooLong("header line")
461                skip = skip.strip()
462                if not skip:
463                    break
464                if self.debuglevel > 0:
465                    print "header:", skip
466
467        self.status = status
468        self.reason = reason.strip()
469        if version == 'HTTP/1.0':
470            self.version = 10
471        elif version.startswith('HTTP/1.'):
472            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
473        elif version == 'HTTP/0.9':
474            self.version = 9
475        else:
476            raise UnknownProtocol(version)
477
478        if self.version == 9:
479            self.length = None
480            self.chunked = 0
481            self.will_close = 1
482            self.msg = HTTPMessage(StringIO())
483            return
484
485        self.msg = HTTPMessage(self.fp, 0)
486        if self.debuglevel > 0:
487            for hdr in self.msg.headers:
488                print "header:", hdr,
489
490        # don't let the msg keep an fp
491        self.msg.fp = None
492
493        # are we using the chunked-style of transfer encoding?
494        tr_enc = self.msg.getheader('transfer-encoding')
495        if tr_enc and tr_enc.lower() == "chunked":
496            self.chunked = 1
497            self.chunk_left = None
498        else:
499            self.chunked = 0
500
501        # will the connection close at the end of the response?
502        self.will_close = self._check_close()
503
504        # do we have a Content-Length?
505        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
506        length = self.msg.getheader('content-length')
507        if length and not self.chunked:
508            try:
509                self.length = int(length)
510            except ValueError:
511                self.length = None
512            else:
513                if self.length < 0:  # ignore nonsensical negative lengths
514                    self.length = None
515        else:
516            self.length = None
517
518        # does the body have a fixed length? (of zero)
519        if (status == NO_CONTENT or status == NOT_MODIFIED or
520            100 <= status < 200 or      # 1xx codes
521            self._method == 'HEAD'):
522            self.length = 0
523
524        # if the connection remains open, and we aren't using chunked, and
525        # a content-length was not provided, then assume that the connection
526        # WILL close.
527        if not self.will_close and \
528           not self.chunked and \
529           self.length is None:
530            self.will_close = 1
531
532    def _check_close(self):
533        conn = self.msg.getheader('connection')
534        if self.version == 11:
535            # An HTTP/1.1 proxy is assumed to stay open unless
536            # explicitly closed.
537            conn = self.msg.getheader('connection')
538            if conn and "close" in conn.lower():
539                return True
540            return False
541
542        # Some HTTP/1.0 implementations have support for persistent
543        # connections, using rules different than HTTP/1.1.
544
545        # For older HTTP, Keep-Alive indicates persistent connection.
546        if self.msg.getheader('keep-alive'):
547            return False
548
549        # At least Akamai returns a "Connection: Keep-Alive" header,
550        # which was supposed to be sent by the client.
551        if conn and "keep-alive" in conn.lower():
552            return False
553
554        # Proxy-Connection is a netscape hack.
555        pconn = self.msg.getheader('proxy-connection')
556        if pconn and "keep-alive" in pconn.lower():
557            return False
558
559        # otherwise, assume it will close
560        return True
561
562    def close(self):
563        fp = self.fp
564        if fp:
565            self.fp = None
566            fp.close()
567
568    def isclosed(self):
569        # NOTE: it is possible that we will not ever call self.close(). This
570        #       case occurs when will_close is TRUE, length is None, and we
571        #       read up to the last byte, but NOT past it.
572        #
573        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
574        #          called, meaning self.isclosed() is meaningful.
575        return self.fp is None
576
577    # XXX It would be nice to have readline and __iter__ for this, too.
578
579    def read(self, amt=None):
580        if self.fp is None:
581            return ''
582
583        if self._method == 'HEAD':
584            self.close()
585            return ''
586
587        if self.chunked:
588            return self._read_chunked(amt)
589
590        if amt is None:
591            # unbounded read
592            if self.length is None:
593                s = self.fp.read()
594            else:
595                try:
596                    s = self._safe_read(self.length)
597                except IncompleteRead:
598                    self.close()
599                    raise
600                self.length = 0
601            self.close()        # we read everything
602            return s
603
604        if self.length is not None:
605            if amt > self.length:
606                # clip the read to the "end of response"
607                amt = self.length
608
609        # we do not use _safe_read() here because this may be a .will_close
610        # connection, and the user is reading more bytes than will be provided
611        # (for example, reading in 1k chunks)
612        s = self.fp.read(amt)
613        if not s and amt:
614            # Ideally, we would raise IncompleteRead if the content-length
615            # wasn't satisfied, but it might break compatibility.
616            self.close()
617        if self.length is not None:
618            self.length -= len(s)
619            if not self.length:
620                self.close()
621
622        return s
623
624    def _read_chunked(self, amt):
625        assert self.chunked != _UNKNOWN
626        chunk_left = self.chunk_left
627        value = []
628        while True:
629            if chunk_left is None:
630                line = self.fp.readline(_MAXLINE + 1)
631                if len(line) > _MAXLINE:
632                    raise LineTooLong("chunk size")
633                i = line.find(';')
634                if i >= 0:
635                    line = line[:i] # strip chunk-extensions
636                try:
637                    chunk_left = int(line, 16)
638                except ValueError:
639                    # close the connection as protocol synchronisation is
640                    # probably lost
641                    self.close()
642                    raise IncompleteRead(''.join(value))
643                if chunk_left == 0:
644                    break
645            if amt is None:
646                value.append(self._safe_read(chunk_left))
647            elif amt < chunk_left:
648                value.append(self._safe_read(amt))
649                self.chunk_left = chunk_left - amt
650                return ''.join(value)
651            elif amt == chunk_left:
652                value.append(self._safe_read(amt))
653                self._safe_read(2)  # toss the CRLF at the end of the chunk
654                self.chunk_left = None
655                return ''.join(value)
656            else:
657                value.append(self._safe_read(chunk_left))
658                amt -= chunk_left
659
660            # we read the whole chunk, get another
661            self._safe_read(2)      # toss the CRLF at the end of the chunk
662            chunk_left = None
663
664        # read and discard trailer up to the CRLF terminator
665        ### note: we shouldn't have any trailers!
666        while True:
667            line = self.fp.readline(_MAXLINE + 1)
668            if len(line) > _MAXLINE:
669                raise LineTooLong("trailer line")
670            if not line:
671                # a vanishingly small number of sites EOF without
672                # sending the trailer
673                break
674            if line == '\r\n':
675                break
676
677        # we read everything; close the "file"
678        self.close()
679
680        return ''.join(value)
681
682    def _safe_read(self, amt):
683        """Read the number of bytes requested, compensating for partial reads.
684
685        Normally, we have a blocking socket, but a read() can be interrupted
686        by a signal (resulting in a partial read).
687
688        Note that we cannot distinguish between EOF and an interrupt when zero
689        bytes have been read. IncompleteRead() will be raised in this
690        situation.
691
692        This function should be used when <amt> bytes "should" be present for
693        reading. If the bytes are truly not available (due to EOF), then the
694        IncompleteRead exception can be used to detect the problem.
695        """
696        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
697        # return less than x bytes unless EOF is encountered.  It now handles
698        # signal interruptions (socket.error EINTR) internally.  This code
699        # never caught that exception anyways.  It seems largely pointless.
700        # self.fp.read(amt) will work fine.
701        s = []
702        while amt > 0:
703            chunk = self.fp.read(min(amt, MAXAMOUNT))
704            if not chunk:
705                raise IncompleteRead(''.join(s), amt)
706            s.append(chunk)
707            amt -= len(chunk)
708        return ''.join(s)
709
710    def fileno(self):
711        return self.fp.fileno()
712
713    def getheader(self, name, default=None):
714        if self.msg is None:
715            raise ResponseNotReady()
716        return self.msg.getheader(name, default)
717
718    def getheaders(self):
719        """Return list of (header, value) tuples."""
720        if self.msg is None:
721            raise ResponseNotReady()
722        return self.msg.items()
723
724
725class HTTPConnection:
726
727    _http_vsn = 11
728    _http_vsn_str = 'HTTP/1.1'
729
730    response_class = HTTPResponse
731    default_port = HTTP_PORT
732    auto_open = 1
733    debuglevel = 0
734    strict = 0
735
736    def __init__(self, host, port=None, strict=None,
737                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
738        self.timeout = timeout
739        self.source_address = source_address
740        self.sock = None
741        self._buffer = []
742        self.__response = None
743        self.__state = _CS_IDLE
744        self._method = None
745        self._tunnel_host = None
746        self._tunnel_port = None
747        self._tunnel_headers = {}
748        if strict is not None:
749            self.strict = strict
750
751        (self.host, self.port) = self._get_hostport(host, port)
752
753        # This is stored as an instance variable to allow unittests
754        # to replace with a suitable mock
755        self._create_connection = socket.create_connection
756
757    def set_tunnel(self, host, port=None, headers=None):
758        """ Set up host and port for HTTP CONNECT tunnelling.
759
760        In a connection that uses HTTP Connect tunneling, the host passed to the
761        constructor is used as proxy server that relays all communication to the
762        endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT
763        request to the proxy server when the connection is established.
764
765        This method must be called before the HTTP connection has been
766        established.
767
768        The headers argument should be a mapping of extra HTTP headers
769        to send with the CONNECT request.
770        """
771        # Verify if this is required.
772        if self.sock:
773            raise RuntimeError("Can't setup tunnel for established connection.")
774
775        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
776        if headers:
777            self._tunnel_headers = headers
778        else:
779            self._tunnel_headers.clear()
780
781    def _get_hostport(self, host, port):
782        if port is None:
783            i = host.rfind(':')
784            j = host.rfind(']')         # ipv6 addresses have [...]
785            if i > j:
786                try:
787                    port = int(host[i+1:])
788                except ValueError:
789                    if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
790                        port = self.default_port
791                    else:
792                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
793                host = host[:i]
794            else:
795                port = self.default_port
796            if host and host[0] == '[' and host[-1] == ']':
797                host = host[1:-1]
798        return (host, port)
799
800    def set_debuglevel(self, level):
801        self.debuglevel = level
802
803    def _tunnel(self):
804        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
805            self._tunnel_port))
806        for header, value in self._tunnel_headers.iteritems():
807            self.send("%s: %s\r\n" % (header, value))
808        self.send("\r\n")
809        response = self.response_class(self.sock, strict = self.strict,
810                                       method = self._method)
811        (version, code, message) = response._read_status()
812
813        if version == "HTTP/0.9":
814            # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
815            # concluded HTTP/0.9 is being used something has gone wrong.
816            self.close()
817            raise socket.error("Invalid response from tunnel request")
818        if code != 200:
819            self.close()
820            raise socket.error("Tunnel connection failed: %d %s" % (code,
821                                                                    message.strip()))
822        while True:
823            line = response.fp.readline(_MAXLINE + 1)
824            if len(line) > _MAXLINE:
825                raise LineTooLong("header line")
826            if not line:
827                # for sites which EOF without sending trailer
828                break
829            if line == '\r\n':
830                break
831
832
833    def connect(self):
834        """Connect to the host and port specified in __init__."""
835        self.sock = self._create_connection((self.host,self.port),
836                                           self.timeout, self.source_address)
837
838        if self._tunnel_host:
839            self._tunnel()
840
841    def close(self):
842        """Close the connection to the HTTP server."""
843        self.__state = _CS_IDLE
844        try:
845            sock = self.sock
846            if sock:
847                self.sock = None
848                sock.close()   # close it manually... there may be other refs
849        finally:
850            response = self.__response
851            if response:
852                self.__response = None
853                response.close()
854
855    def send(self, data):
856        """Send `data' to the server."""
857        if self.sock is None:
858            if self.auto_open:
859                self.connect()
860            else:
861                raise NotConnected()
862
863        if self.debuglevel > 0:
864            print "send:", repr(data)
865        blocksize = 8192
866        if hasattr(data,'read') and not isinstance(data, array):
867            if self.debuglevel > 0: print "sendIng a read()able"
868            datablock = data.read(blocksize)
869            while datablock:
870                self.sock.sendall(datablock)
871                datablock = data.read(blocksize)
872        else:
873            self.sock.sendall(data)
874
875    def _output(self, s):
876        """Add a line of output to the current request buffer.
877
878        Assumes that the line does *not* end with \\r\\n.
879        """
880        self._buffer.append(s)
881
882    def _send_output(self, message_body=None):
883        """Send the currently buffered request and clear the buffer.
884
885        Appends an extra \\r\\n to the buffer.
886        A message_body may be specified, to be appended to the request.
887        """
888        self._buffer.extend(("", ""))
889        msg = "\r\n".join(self._buffer)
890        del self._buffer[:]
891        # If msg and message_body are sent in a single send() call,
892        # it will avoid performance problems caused by the interaction
893        # between delayed ack and the Nagle algorithm.
894        if isinstance(message_body, str):
895            msg += message_body
896            message_body = None
897        self.send(msg)
898        if message_body is not None:
899            #message_body was not a string (i.e. it is a file) and
900            #we must run the risk of Nagle
901            self.send(message_body)
902
903    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
904        """Send a request to the server.
905
906        `method' specifies an HTTP request method, e.g. 'GET'.
907        `url' specifies the object being requested, e.g. '/index.html'.
908        `skip_host' if True does not add automatically a 'Host:' header
909        `skip_accept_encoding' if True does not add automatically an
910           'Accept-Encoding:' header
911        """
912
913        # if a prior response has been completed, then forget about it.
914        if self.__response and self.__response.isclosed():
915            self.__response = None
916
917
918        # in certain cases, we cannot issue another request on this connection.
919        # this occurs when:
920        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
921        #   2) a response to a previous request has signalled that it is going
922        #      to close the connection upon completion.
923        #   3) the headers for the previous response have not been read, thus
924        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
925        #
926        # if there is no prior response, then we can request at will.
927        #
928        # if point (2) is true, then we will have passed the socket to the
929        # response (effectively meaning, "there is no prior response"), and
930        # will open a new one when a new request is made.
931        #
932        # Note: if a prior response exists, then we *can* start a new request.
933        #       We are not allowed to begin fetching the response to this new
934        #       request, however, until that prior response is complete.
935        #
936        if self.__state == _CS_IDLE:
937            self.__state = _CS_REQ_STARTED
938        else:
939            raise CannotSendRequest()
940
941        # Save the method we use, we need it later in the response phase
942        self._method = method
943        if not url:
944            url = '/'
945        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
946
947        self._output(hdr)
948
949        if self._http_vsn == 11:
950            # Issue some standard headers for better HTTP/1.1 compliance
951
952            if not skip_host:
953                # this header is issued *only* for HTTP/1.1
954                # connections. more specifically, this means it is
955                # only issued when the client uses the new
956                # HTTPConnection() class. backwards-compat clients
957                # will be using HTTP/1.0 and those clients may be
958                # issuing this header themselves. we should NOT issue
959                # it twice; some web servers (such as Apache) barf
960                # when they see two Host: headers
961
962                # If we need a non-standard port,include it in the
963                # header.  If the request is going through a proxy,
964                # but the host of the actual URL, not the host of the
965                # proxy.
966
967                netloc = ''
968                if url.startswith('http'):
969                    nil, netloc, nil, nil, nil = urlsplit(url)
970
971                if netloc:
972                    try:
973                        netloc_enc = netloc.encode("ascii")
974                    except UnicodeEncodeError:
975                        netloc_enc = netloc.encode("idna")
976                    self.putheader('Host', netloc_enc)
977                else:
978                    if self._tunnel_host:
979                        host = self._tunnel_host
980                        port = self._tunnel_port
981                    else:
982                        host = self.host
983                        port = self.port
984
985                    try:
986                        host_enc = host.encode("ascii")
987                    except UnicodeEncodeError:
988                        host_enc = host.encode("idna")
989                    # Wrap the IPv6 Host Header with [] (RFC 2732)
990                    if host_enc.find(':') >= 0:
991                        host_enc = "[" + host_enc + "]"
992                    if port == self.default_port:
993                        self.putheader('Host', host_enc)
994                    else:
995                        self.putheader('Host', "%s:%s" % (host_enc, port))
996
997            # note: we are assuming that clients will not attempt to set these
998            #       headers since *this* library must deal with the
999            #       consequences. this also means that when the supporting
1000            #       libraries are updated to recognize other forms, then this
1001            #       code should be changed (removed or updated).
1002
1003            # we only want a Content-Encoding of "identity" since we don't
1004            # support encodings such as x-gzip or x-deflate.
1005            if not skip_accept_encoding:
1006                self.putheader('Accept-Encoding', 'identity')
1007
1008            # we can accept "chunked" Transfer-Encodings, but no others
1009            # NOTE: no TE header implies *only* "chunked"
1010            #self.putheader('TE', 'chunked')
1011
1012            # if TE is supplied in the header, then it must appear in a
1013            # Connection header.
1014            #self.putheader('Connection', 'TE')
1015
1016        else:
1017            # For HTTP/1.0, the server will assume "not chunked"
1018            pass
1019
1020    def putheader(self, header, *values):
1021        """Send a request header line to the server.
1022
1023        For example: h.putheader('Accept', 'text/html')
1024        """
1025        if self.__state != _CS_REQ_STARTED:
1026            raise CannotSendHeader()
1027
1028        header = '%s' % header
1029        if not _is_legal_header_name(header):
1030            raise ValueError('Invalid header name %r' % (header,))
1031
1032        values = [str(v) for v in values]
1033        for one_value in values:
1034            if _is_illegal_header_value(one_value):
1035                raise ValueError('Invalid header value %r' % (one_value,))
1036
1037        hdr = '%s: %s' % (header, '\r\n\t'.join(values))
1038        self._output(hdr)
1039
1040    def endheaders(self, message_body=None):
1041        """Indicate that the last header line has been sent to the server.
1042
1043        This method sends the request to the server.  The optional
1044        message_body argument can be used to pass a message body
1045        associated with the request.  The message body will be sent in
1046        the same packet as the message headers if it is string, otherwise it is
1047        sent as a separate packet.
1048        """
1049        if self.__state == _CS_REQ_STARTED:
1050            self.__state = _CS_REQ_SENT
1051        else:
1052            raise CannotSendHeader()
1053        self._send_output(message_body)
1054
1055    def request(self, method, url, body=None, headers={}):
1056        """Send a complete request to the server."""
1057        self._send_request(method, url, body, headers)
1058
1059    def _set_content_length(self, body, method):
1060        # Set the content-length based on the body. If the body is "empty", we
1061        # set Content-Length: 0 for methods that expect a body (RFC 7230,
1062        # Section 3.3.2). If the body is set for other methods, we set the
1063        # header provided we can figure out what the length is.
1064        thelen = None
1065        if body is None and method.upper() in _METHODS_EXPECTING_BODY:
1066            thelen = '0'
1067        elif body is not None:
1068            try:
1069                thelen = str(len(body))
1070            except (TypeError, AttributeError):
1071                # If this is a file-like object, try to
1072                # fstat its file descriptor
1073                try:
1074                    thelen = str(os.fstat(body.fileno()).st_size)
1075                except (AttributeError, OSError):
1076                    # Don't send a length if this failed
1077                    if self.debuglevel > 0: print "Cannot stat!!"
1078
1079        if thelen is not None:
1080            self.putheader('Content-Length', thelen)
1081
1082    def _send_request(self, method, url, body, headers):
1083        # Honor explicitly requested Host: and Accept-Encoding: headers.
1084        header_names = dict.fromkeys([k.lower() for k in headers])
1085        skips = {}
1086        if 'host' in header_names:
1087            skips['skip_host'] = 1
1088        if 'accept-encoding' in header_names:
1089            skips['skip_accept_encoding'] = 1
1090
1091        self.putrequest(method, url, **skips)
1092
1093        if 'content-length' not in header_names:
1094            self._set_content_length(body, method)
1095        for hdr, value in headers.iteritems():
1096            self.putheader(hdr, value)
1097        self.endheaders(body)
1098
1099    def getresponse(self, buffering=False):
1100        "Get the response from the server."
1101
1102        # if a prior response has been completed, then forget about it.
1103        if self.__response and self.__response.isclosed():
1104            self.__response = None
1105
1106        #
1107        # if a prior response exists, then it must be completed (otherwise, we
1108        # cannot read this response's header to determine the connection-close
1109        # behavior)
1110        #
1111        # note: if a prior response existed, but was connection-close, then the
1112        # socket and response were made independent of this HTTPConnection
1113        # object since a new request requires that we open a whole new
1114        # connection
1115        #
1116        # this means the prior response had one of two states:
1117        #   1) will_close: this connection was reset and the prior socket and
1118        #                  response operate independently
1119        #   2) persistent: the response was retained and we await its
1120        #                  isclosed() status to become true.
1121        #
1122        if self.__state != _CS_REQ_SENT or self.__response:
1123            raise ResponseNotReady()
1124
1125        args = (self.sock,)
1126        kwds = {"strict":self.strict, "method":self._method}
1127        if self.debuglevel > 0:
1128            args += (self.debuglevel,)
1129        if buffering:
1130            #only add this keyword if non-default, for compatibility with
1131            #other response_classes.
1132            kwds["buffering"] = True;
1133        response = self.response_class(*args, **kwds)
1134
1135        try:
1136            response.begin()
1137            assert response.will_close != _UNKNOWN
1138            self.__state = _CS_IDLE
1139
1140            if response.will_close:
1141                # this effectively passes the connection to the response
1142                self.close()
1143            else:
1144                # remember this, so we can tell when it is complete
1145                self.__response = response
1146
1147            return response
1148        except:
1149            response.close()
1150            raise
1151
1152
1153class HTTP:
1154    "Compatibility class with httplib.py from 1.5."
1155
1156    _http_vsn = 10
1157    _http_vsn_str = 'HTTP/1.0'
1158
1159    debuglevel = 0
1160
1161    _connection_class = HTTPConnection
1162
1163    def __init__(self, host='', port=None, strict=None):
1164        "Provide a default host, since the superclass requires one."
1165
1166        # some joker passed 0 explicitly, meaning default port
1167        if port == 0:
1168            port = None
1169
1170        # Note that we may pass an empty string as the host; this will raise
1171        # an error when we attempt to connect. Presumably, the client code
1172        # will call connect before then, with a proper host.
1173        self._setup(self._connection_class(host, port, strict))
1174
1175    def _setup(self, conn):
1176        self._conn = conn
1177
1178        # set up delegation to flesh out interface
1179        self.send = conn.send
1180        self.putrequest = conn.putrequest
1181        self.putheader = conn.putheader
1182        self.endheaders = conn.endheaders
1183        self.set_debuglevel = conn.set_debuglevel
1184
1185        conn._http_vsn = self._http_vsn
1186        conn._http_vsn_str = self._http_vsn_str
1187
1188        self.file = None
1189
1190    def connect(self, host=None, port=None):
1191        "Accept arguments to set the host/port, since the superclass doesn't."
1192
1193        if host is not None:
1194            (self._conn.host, self._conn.port) = self._conn._get_hostport(host, port)
1195        self._conn.connect()
1196
1197    def getfile(self):
1198        "Provide a getfile, since the superclass' does not use this concept."
1199        return self.file
1200
1201    def getreply(self, buffering=False):
1202        """Compat definition since superclass does not define it.
1203
1204        Returns a tuple consisting of:
1205        - server status code (e.g. '200' if all goes well)
1206        - server "reason" corresponding to status code
1207        - any RFC822 headers in the response from the server
1208        """
1209        try:
1210            if not buffering:
1211                response = self._conn.getresponse()
1212            else:
1213                #only add this keyword if non-default for compatibility
1214                #with other connection classes
1215                response = self._conn.getresponse(buffering)
1216        except BadStatusLine, e:
1217            ### hmm. if getresponse() ever closes the socket on a bad request,
1218            ### then we are going to have problems with self.sock
1219
1220            ### should we keep this behavior? do people use it?
1221            # keep the socket open (as a file), and return it
1222            self.file = self._conn.sock.makefile('rb', 0)
1223
1224            # close our socket -- we want to restart after any protocol error
1225            self.close()
1226
1227            self.headers = None
1228            return -1, e.line, None
1229
1230        self.headers = response.msg
1231        self.file = response.fp
1232        return response.status, response.reason, response.msg
1233
1234    def close(self):
1235        self._conn.close()
1236
1237        # note that self.file == response.fp, which gets closed by the
1238        # superclass. just clear the object ref here.
1239        ### hmm. messy. if status==-1, then self.file is owned by us.
1240        ### well... we aren't explicitly closing, but losing this ref will
1241        ### do it
1242        self.file = None
1243
1244try:
1245    import ssl
1246except ImportError:
1247    pass
1248else:
1249    class HTTPSConnection(HTTPConnection):
1250        "This class allows communication via SSL."
1251
1252        default_port = HTTPS_PORT
1253
1254        def __init__(self, host, port=None, key_file=None, cert_file=None,
1255                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1256                     source_address=None, context=None):
1257            HTTPConnection.__init__(self, host, port, strict, timeout,
1258                                    source_address)
1259            self.key_file = key_file
1260            self.cert_file = cert_file
1261            if context is None:
1262                context = ssl._create_default_https_context()
1263            if key_file or cert_file:
1264                context.load_cert_chain(cert_file, key_file)
1265            self._context = context
1266
1267        def connect(self):
1268            "Connect to a host on a given (SSL) port."
1269
1270            HTTPConnection.connect(self)
1271
1272            if self._tunnel_host:
1273                server_hostname = self._tunnel_host
1274            else:
1275                server_hostname = self.host
1276
1277            self.sock = self._context.wrap_socket(self.sock,
1278                                                  server_hostname=server_hostname)
1279
1280    __all__.append("HTTPSConnection")
1281
1282    class HTTPS(HTTP):
1283        """Compatibility with 1.5 httplib interface
1284
1285        Python 1.5.2 did not have an HTTPS class, but it defined an
1286        interface for sending http requests that is also useful for
1287        https.
1288        """
1289
1290        _connection_class = HTTPSConnection
1291
1292        def __init__(self, host='', port=None, key_file=None, cert_file=None,
1293                     strict=None, context=None):
1294            # provide a default host, pass the X509 cert info
1295
1296            # urf. compensate for bad input.
1297            if port == 0:
1298                port = None
1299            self._setup(self._connection_class(host, port, key_file,
1300                                               cert_file, strict,
1301                                               context=context))
1302
1303            # we never actually use these for anything, but we keep them
1304            # here for compatibility with post-1.5.2 CVS.
1305            self.key_file = key_file
1306            self.cert_file = cert_file
1307
1308
1309    def FakeSocket (sock, sslobj):
1310        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1311                      "Use the result of ssl.wrap_socket() directly instead.",
1312                      DeprecationWarning, stacklevel=2)
1313        return sslobj
1314
1315
1316class HTTPException(Exception):
1317    # Subclasses that define an __init__ must call Exception.__init__
1318    # or define self.args.  Otherwise, str() will fail.
1319    pass
1320
1321class NotConnected(HTTPException):
1322    pass
1323
1324class InvalidURL(HTTPException):
1325    pass
1326
1327class UnknownProtocol(HTTPException):
1328    def __init__(self, version):
1329        self.args = version,
1330        self.version = version
1331
1332class UnknownTransferEncoding(HTTPException):
1333    pass
1334
1335class UnimplementedFileMode(HTTPException):
1336    pass
1337
1338class IncompleteRead(HTTPException):
1339    def __init__(self, partial, expected=None):
1340        self.args = partial,
1341        self.partial = partial
1342        self.expected = expected
1343    def __repr__(self):
1344        if self.expected is not None:
1345            e = ', %i more expected' % self.expected
1346        else:
1347            e = ''
1348        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1349    def __str__(self):
1350        return repr(self)
1351
1352class ImproperConnectionState(HTTPException):
1353    pass
1354
1355class CannotSendRequest(ImproperConnectionState):
1356    pass
1357
1358class CannotSendHeader(ImproperConnectionState):
1359    pass
1360
1361class ResponseNotReady(ImproperConnectionState):
1362    pass
1363
1364class BadStatusLine(HTTPException):
1365    def __init__(self, line):
1366        if not line:
1367            line = repr(line)
1368        self.args = line,
1369        self.line = line
1370
1371class LineTooLong(HTTPException):
1372    def __init__(self, line_type):
1373        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1374                                     % (_MAXLINE, line_type))
1375
1376# for backwards compatibility
1377error = HTTPException
1378
1379class LineAndFileWrapper:
1380    """A limited file-like object for HTTP/0.9 responses."""
1381
1382    # The status-line parsing code calls readline(), which normally
1383    # get the HTTP status line.  For a 0.9 response, however, this is
1384    # actually the first line of the body!  Clients need to get a
1385    # readable file object that contains that line.
1386
1387    def __init__(self, line, file):
1388        self._line = line
1389        self._file = file
1390        self._line_consumed = 0
1391        self._line_offset = 0
1392        self._line_left = len(line)
1393
1394    def __getattr__(self, attr):
1395        return getattr(self._file, attr)
1396
1397    def _done(self):
1398        # called when the last byte is read from the line.  After the
1399        # call, all read methods are delegated to the underlying file
1400        # object.
1401        self._line_consumed = 1
1402        self.read = self._file.read
1403        self.readline = self._file.readline
1404        self.readlines = self._file.readlines
1405
1406    def read(self, amt=None):
1407        if self._line_consumed:
1408            return self._file.read(amt)
1409        assert self._line_left
1410        if amt is None or amt > self._line_left:
1411            s = self._line[self._line_offset:]
1412            self._done()
1413            if amt is None:
1414                return s + self._file.read()
1415            else:
1416                return s + self._file.read(amt - len(s))
1417        else:
1418            assert amt <= self._line_left
1419            i = self._line_offset
1420            j = i + amt
1421            s = self._line[i:j]
1422            self._line_offset = j
1423            self._line_left -= amt
1424            if self._line_left == 0:
1425                self._done()
1426            return s
1427
1428    def readline(self):
1429        if self._line_consumed:
1430            return self._file.readline()
1431        assert self._line_left
1432        s = self._line[self._line_offset:]
1433        self._done()
1434        return s
1435
1436    def readlines(self, size=None):
1437        if self._line_consumed:
1438            return self._file.readlines(size)
1439        assert self._line_left
1440        L = [self._line[self._line_offset:]]
1441        self._done()
1442        if size is None:
1443            return L + self._file.readlines()
1444        else:
1445            return L + self._file.readlines(size)
Note: See TracBrowser for help on using the repository browser.