source: titan/mediathek/localhoster/lib/python2.7/_pyio.py @ 40094

Last change on this file since 40094 was 40094, checked in by obi, 7 years ago

tithek add yoztube-dl support

File size: 68.0 KB
Line 
1"""
2Python implementation of the io module.
3"""
4
5from __future__ import (print_function, unicode_literals)
6
7import os
8import abc
9import codecs
10import sys
11import warnings
12import errno
13# Import thread instead of threading to reduce startup cost
14try:
15    from thread import allocate_lock as Lock
16except ImportError:
17    from dummy_thread import allocate_lock as Lock
18
19import io
20from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
21from errno import EINTR
22
23__metaclass__ = type
24
25# open() uses st_blksize whenever we can
26DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
27
28# NOTE: Base classes defined here are registered with the "official" ABCs
29# defined in io.py. We don't use real inheritance though, because we don't want
30# to inherit the C implementations.
31
32
33class BlockingIOError(IOError):
34
35    """Exception raised when I/O would block on a non-blocking I/O stream."""
36
37    def __init__(self, errno, strerror, characters_written=0):
38        super(IOError, self).__init__(errno, strerror)
39        if not isinstance(characters_written, (int, long)):
40            raise TypeError("characters_written must be a integer")
41        self.characters_written = characters_written
42
43
44def open(file, mode="r", buffering=-1,
45         encoding=None, errors=None,
46         newline=None, closefd=True):
47
48    r"""Open file and return a stream.  Raise IOError upon failure.
49
50    file is either a text or byte string giving the name (and the path
51    if the file isn't in the current working directory) of the file to
52    be opened or an integer file descriptor of the file to be
53    wrapped. (If a file descriptor is given, it is closed when the
54    returned I/O object is closed, unless closefd is set to False.)
55
56    mode is an optional string that specifies the mode in which the file
57    is opened. It defaults to 'r' which means open for reading in text
58    mode.  Other common values are 'w' for writing (truncating the file if
59    it already exists), and 'a' for appending (which on some Unix systems,
60    means that all writes append to the end of the file regardless of the
61    current seek position). In text mode, if encoding is not specified the
62    encoding used is platform dependent. (For reading and writing raw
63    bytes use binary mode and leave encoding unspecified.) The available
64    modes are:
65
66    ========= ===============================================================
67    Character Meaning
68    --------- ---------------------------------------------------------------
69    'r'       open for reading (default)
70    'w'       open for writing, truncating the file first
71    'a'       open for writing, appending to the end of the file if it exists
72    'b'       binary mode
73    't'       text mode (default)
74    '+'       open a disk file for updating (reading and writing)
75    'U'       universal newline mode (for backwards compatibility; unneeded
76              for new code)
77    ========= ===============================================================
78
79    The default mode is 'rt' (open for reading text). For binary random
80    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
81    'r+b' opens the file without truncation.
82
83    Python distinguishes between files opened in binary and text modes,
84    even when the underlying operating system doesn't. Files opened in
85    binary mode (appending 'b' to the mode argument) return contents as
86    bytes objects without any decoding. In text mode (the default, or when
87    't' is appended to the mode argument), the contents of the file are
88    returned as strings, the bytes having been first decoded using a
89    platform-dependent encoding or using the specified encoding if given.
90
91    buffering is an optional integer used to set the buffering policy.
92    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93    line buffering (only usable in text mode), and an integer > 1 to indicate
94    the size of a fixed-size chunk buffer.  When no buffering argument is
95    given, the default buffering policy works as follows:
96
97    * Binary files are buffered in fixed-size chunks; the size of the buffer
98      is chosen using a heuristic trying to determine the underlying device's
99      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100      On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102    * "Interactive" text files (files for which isatty() returns True)
103      use line buffering.  Other text files use the policy described above
104      for binary files.
105
106    encoding is the name of the encoding used to decode or encode the
107    file. This should only be used in text mode. The default encoding is
108    platform dependent, but any encoding supported by Python can be
109    passed.  See the codecs module for the list of supported encodings.
110
111    errors is an optional string that specifies how encoding errors are to
112    be handled---this argument should not be used in binary mode. Pass
113    'strict' to raise a ValueError exception if there is an encoding error
114    (the default of None has the same effect), or pass 'ignore' to ignore
115    errors. (Note that ignoring encoding errors can lead to data loss.)
116    See the documentation for codecs.register for a list of the permitted
117    encoding error strings.
118
119    newline controls how universal newlines works (it only applies to text
120    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
121    follows:
122
123    * On input, if newline is None, universal newlines mode is
124      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125      these are translated into '\n' before being returned to the
126      caller. If it is '', universal newline mode is enabled, but line
127      endings are returned to the caller untranslated. If it has any of
128      the other legal values, input lines are only terminated by the given
129      string, and the line ending is returned to the caller untranslated.
130
131    * On output, if newline is None, any '\n' characters written are
132      translated to the system default line separator, os.linesep. If
133      newline is '', no translation takes place. If newline is any of the
134      other legal values, any '\n' characters written are translated to
135      the given string.
136
137    If closefd is False, the underlying file descriptor will be kept open
138    when the file is closed. This does not work when a file name is given
139    and must be True in that case.
140
141    open() returns a file object whose type depends on the mode, and
142    through which the standard file operations such as reading and writing
143    are performed. When open() is used to open a file in a text mode ('w',
144    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
145    a file in a binary mode, the returned class varies: in read binary
146    mode, it returns a BufferedReader; in write binary and append binary
147    modes, it returns a BufferedWriter, and in read/write mode, it returns
148    a BufferedRandom.
149
150    It is also possible to use a string or bytearray as a file for both
151    reading and writing. For strings StringIO can be used like a file
152    opened in a text mode, and for bytes a BytesIO can be used like a file
153    opened in a binary mode.
154    """
155    if not isinstance(file, (basestring, int, long)):
156        raise TypeError("invalid file: %r" % file)
157    if not isinstance(mode, basestring):
158        raise TypeError("invalid mode: %r" % mode)
159    if not isinstance(buffering, (int, long)):
160        raise TypeError("invalid buffering: %r" % buffering)
161    if encoding is not None and not isinstance(encoding, basestring):
162        raise TypeError("invalid encoding: %r" % encoding)
163    if errors is not None and not isinstance(errors, basestring):
164        raise TypeError("invalid errors: %r" % errors)
165    modes = set(mode)
166    if modes - set("arwb+tU") or len(mode) > len(modes):
167        raise ValueError("invalid mode: %r" % mode)
168    reading = "r" in modes
169    writing = "w" in modes
170    appending = "a" in modes
171    updating = "+" in modes
172    text = "t" in modes
173    binary = "b" in modes
174    if "U" in modes:
175        if writing or appending:
176            raise ValueError("can't use U and writing mode at once")
177        reading = True
178    if text and binary:
179        raise ValueError("can't have text and binary mode at once")
180    if reading + writing + appending > 1:
181        raise ValueError("can't have read/write/append mode at once")
182    if not (reading or writing or appending):
183        raise ValueError("must have exactly one of read/write/append mode")
184    if binary and encoding is not None:
185        raise ValueError("binary mode doesn't take an encoding argument")
186    if binary and errors is not None:
187        raise ValueError("binary mode doesn't take an errors argument")
188    if binary and newline is not None:
189        raise ValueError("binary mode doesn't take a newline argument")
190    raw = FileIO(file,
191                 (reading and "r" or "") +
192                 (writing and "w" or "") +
193                 (appending and "a" or "") +
194                 (updating and "+" or ""),
195                 closefd)
196    result = raw
197    try:
198        line_buffering = False
199        if buffering == 1 or buffering < 0 and raw.isatty():
200            buffering = -1
201            line_buffering = True
202        if buffering < 0:
203            buffering = DEFAULT_BUFFER_SIZE
204            try:
205                bs = os.fstat(raw.fileno()).st_blksize
206            except (os.error, AttributeError):
207                pass
208            else:
209                if bs > 1:
210                    buffering = bs
211        if buffering < 0:
212            raise ValueError("invalid buffering size")
213        if buffering == 0:
214            if binary:
215                return result
216            raise ValueError("can't have unbuffered text I/O")
217        if updating:
218            buffer = BufferedRandom(raw, buffering)
219        elif writing or appending:
220            buffer = BufferedWriter(raw, buffering)
221        elif reading:
222            buffer = BufferedReader(raw, buffering)
223        else:
224            raise ValueError("unknown mode: %r" % mode)
225        result = buffer
226        if binary:
227            return result
228        text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
229        result = text
230        text.mode = mode
231        return result
232    except:
233        result.close()
234        raise
235
236
237class DocDescriptor:
238    """Helper for builtins.open.__doc__
239    """
240    def __get__(self, obj, typ):
241        return (
242            "open(file, mode='r', buffering=-1, encoding=None, "
243                 "errors=None, newline=None, closefd=True)\n\n" +
244            open.__doc__)
245
246class OpenWrapper:
247    """Wrapper for builtins.open
248
249    Trick so that open won't become a bound method when stored
250    as a class variable (as dbm.dumb does).
251
252    See initstdio() in Python/pythonrun.c.
253    """
254    __doc__ = DocDescriptor()
255
256    def __new__(cls, *args, **kwargs):
257        return open(*args, **kwargs)
258
259
260class UnsupportedOperation(ValueError, IOError):
261    pass
262
263
264class IOBase:
265    __metaclass__ = abc.ABCMeta
266
267    """The abstract base class for all I/O classes, acting on streams of
268    bytes. There is no public constructor.
269
270    This class provides dummy implementations for many methods that
271    derived classes can override selectively; the default implementations
272    represent a file that cannot be read, written or seeked.
273
274    Even though IOBase does not declare read, readinto, or write because
275    their signatures will vary, implementations and clients should
276    consider those methods part of the interface. Also, implementations
277    may raise a IOError when operations they do not support are called.
278
279    The basic type used for binary data read from or written to a file is
280    bytes. bytearrays are accepted too, and in some cases (such as
281    readinto) needed. Text I/O classes work with str data.
282
283    Note that calling any method (even inquiries) on a closed stream is
284    undefined. Implementations may raise IOError in this case.
285
286    IOBase (and its subclasses) support the iterator protocol, meaning
287    that an IOBase object can be iterated over yielding the lines in a
288    stream.
289
290    IOBase also supports the :keyword:`with` statement. In this example,
291    fp is closed after the suite of the with statement is complete:
292
293    with open('spam.txt', 'r') as fp:
294        fp.write('Spam and eggs!')
295    """
296
297    ### Internal ###
298
299    def _unsupported(self, name):
300        """Internal: raise an exception for unsupported operations."""
301        raise UnsupportedOperation("%s.%s() not supported" %
302                                   (self.__class__.__name__, name))
303
304    ### Positioning ###
305
306    def seek(self, pos, whence=0):
307        """Change stream position.
308
309        Change the stream position to byte offset pos. Argument pos is
310        interpreted relative to the position indicated by whence.  Values
311        for whence are:
312
313        * 0 -- start of stream (the default); offset should be zero or positive
314        * 1 -- current stream position; offset may be negative
315        * 2 -- end of stream; offset is usually negative
316
317        Return the new absolute position.
318        """
319        self._unsupported("seek")
320
321    def tell(self):
322        """Return current stream position."""
323        return self.seek(0, 1)
324
325    def truncate(self, pos=None):
326        """Truncate file to size bytes.
327
328        Size defaults to the current IO position as reported by tell().  Return
329        the new size.
330        """
331        self._unsupported("truncate")
332
333    ### Flush and close ###
334
335    def flush(self):
336        """Flush write buffers, if applicable.
337
338        This is not implemented for read-only and non-blocking streams.
339        """
340        self._checkClosed()
341        # XXX Should this return the number of bytes written???
342
343    __closed = False
344
345    def close(self):
346        """Flush and close the IO object.
347
348        This method has no effect if the file is already closed.
349        """
350        if not self.__closed:
351            try:
352                self.flush()
353            finally:
354                self.__closed = True
355
356    def __del__(self):
357        """Destructor.  Calls close()."""
358        # The try/except block is in case this is called at program
359        # exit time, when it's possible that globals have already been
360        # deleted, and then the close() call might fail.  Since
361        # there's nothing we can do about such failures and they annoy
362        # the end users, we suppress the traceback.
363        try:
364            self.close()
365        except:
366            pass
367
368    ### Inquiries ###
369
370    def seekable(self):
371        """Return whether object supports random access.
372
373        If False, seek(), tell() and truncate() will raise IOError.
374        This method may need to do a test seek().
375        """
376        return False
377
378    def _checkSeekable(self, msg=None):
379        """Internal: raise an IOError if file is not seekable
380        """
381        if not self.seekable():
382            raise IOError("File or stream is not seekable."
383                          if msg is None else msg)
384
385
386    def readable(self):
387        """Return whether object was opened for reading.
388
389        If False, read() will raise IOError.
390        """
391        return False
392
393    def _checkReadable(self, msg=None):
394        """Internal: raise an IOError if file is not readable
395        """
396        if not self.readable():
397            raise IOError("File or stream is not readable."
398                          if msg is None else msg)
399
400    def writable(self):
401        """Return whether object was opened for writing.
402
403        If False, write() and truncate() will raise IOError.
404        """
405        return False
406
407    def _checkWritable(self, msg=None):
408        """Internal: raise an IOError if file is not writable
409        """
410        if not self.writable():
411            raise IOError("File or stream is not writable."
412                          if msg is None else msg)
413
414    @property
415    def closed(self):
416        """closed: bool.  True iff the file has been closed.
417
418        For backwards compatibility, this is a property, not a predicate.
419        """
420        return self.__closed
421
422    def _checkClosed(self, msg=None):
423        """Internal: raise an ValueError if file is closed
424        """
425        if self.closed:
426            raise ValueError("I/O operation on closed file."
427                             if msg is None else msg)
428
429    ### Context manager ###
430
431    def __enter__(self):
432        """Context management protocol.  Returns self."""
433        self._checkClosed()
434        return self
435
436    def __exit__(self, *args):
437        """Context management protocol.  Calls close()"""
438        self.close()
439
440    ### Lower-level APIs ###
441
442    # XXX Should these be present even if unimplemented?
443
444    def fileno(self):
445        """Returns underlying file descriptor if one exists.
446
447        An IOError is raised if the IO object does not use a file descriptor.
448        """
449        self._unsupported("fileno")
450
451    def isatty(self):
452        """Return whether this is an 'interactive' stream.
453
454        Return False if it can't be determined.
455        """
456        self._checkClosed()
457        return False
458
459    ### Readline[s] and writelines ###
460
461    def readline(self, limit=-1):
462        r"""Read and return a line from the stream.
463
464        If limit is specified, at most limit bytes will be read.
465
466        The line terminator is always b'\n' for binary files; for text
467        files, the newlines argument to open can be used to select the line
468        terminator(s) recognized.
469        """
470        # For backwards compatibility, a (slowish) readline().
471        if hasattr(self, "peek"):
472            def nreadahead():
473                readahead = self.peek(1)
474                if not readahead:
475                    return 1
476                n = (readahead.find(b"\n") + 1) or len(readahead)
477                if limit >= 0:
478                    n = min(n, limit)
479                return n
480        else:
481            def nreadahead():
482                return 1
483        if limit is None:
484            limit = -1
485        elif not isinstance(limit, (int, long)):
486            raise TypeError("limit must be an integer")
487        res = bytearray()
488        while limit < 0 or len(res) < limit:
489            b = self.read(nreadahead())
490            if not b:
491                break
492            res += b
493            if res.endswith(b"\n"):
494                break
495        return bytes(res)
496
497    def __iter__(self):
498        self._checkClosed()
499        return self
500
501    def next(self):
502        line = self.readline()
503        if not line:
504            raise StopIteration
505        return line
506
507    def readlines(self, hint=None):
508        """Return a list of lines from the stream.
509
510        hint can be specified to control the number of lines read: no more
511        lines will be read if the total size (in bytes/characters) of all
512        lines so far exceeds hint.
513        """
514        if hint is not None and not isinstance(hint, (int, long)):
515            raise TypeError("integer or None expected")
516        if hint is None or hint <= 0:
517            return list(self)
518        n = 0
519        lines = []
520        for line in self:
521            lines.append(line)
522            n += len(line)
523            if n >= hint:
524                break
525        return lines
526
527    def writelines(self, lines):
528        self._checkClosed()
529        for line in lines:
530            self.write(line)
531
532io.IOBase.register(IOBase)
533
534
535class RawIOBase(IOBase):
536
537    """Base class for raw binary I/O."""
538
539    # The read() method is implemented by calling readinto(); derived
540    # classes that want to support read() only need to implement
541    # readinto() as a primitive operation.  In general, readinto() can be
542    # more efficient than read().
543
544    # (It would be tempting to also provide an implementation of
545    # readinto() in terms of read(), in case the latter is a more suitable
546    # primitive operation, but that would lead to nasty recursion in case
547    # a subclass doesn't implement either.)
548
549    def read(self, n=-1):
550        """Read and return up to n bytes.
551
552        Returns an empty bytes object on EOF, or None if the object is
553        set not to block and has no data to read.
554        """
555        if n is None:
556            n = -1
557        if n < 0:
558            return self.readall()
559        b = bytearray(n.__index__())
560        n = self.readinto(b)
561        if n is None:
562            return None
563        del b[n:]
564        return bytes(b)
565
566    def readall(self):
567        """Read until EOF, using multiple read() call."""
568        res = bytearray()
569        while True:
570            data = self.read(DEFAULT_BUFFER_SIZE)
571            if not data:
572                break
573            res += data
574        if res:
575            return bytes(res)
576        else:
577            # b'' or None
578            return data
579
580    def readinto(self, b):
581        """Read up to len(b) bytes into b.
582
583        Returns number of bytes read (0 for EOF), or None if the object
584        is set not to block and has no data to read.
585        """
586        self._unsupported("readinto")
587
588    def write(self, b):
589        """Write the given buffer to the IO stream.
590
591        Returns the number of bytes written, which may be less than len(b).
592        """
593        self._unsupported("write")
594
595io.RawIOBase.register(RawIOBase)
596from _io import FileIO
597RawIOBase.register(FileIO)
598
599
600class BufferedIOBase(IOBase):
601
602    """Base class for buffered IO objects.
603
604    The main difference with RawIOBase is that the read() method
605    supports omitting the size argument, and does not have a default
606    implementation that defers to readinto().
607
608    In addition, read(), readinto() and write() may raise
609    BlockingIOError if the underlying raw stream is in non-blocking
610    mode and not ready; unlike their raw counterparts, they will never
611    return None.
612
613    A typical implementation should not inherit from a RawIOBase
614    implementation, but wrap one.
615    """
616
617    def read(self, n=None):
618        """Read and return up to n bytes.
619
620        If the argument is omitted, None, or negative, reads and
621        returns all data until EOF.
622
623        If the argument is positive, and the underlying raw stream is
624        not 'interactive', multiple raw reads may be issued to satisfy
625        the byte count (unless EOF is reached first).  But for
626        interactive raw streams (XXX and for pipes?), at most one raw
627        read will be issued, and a short result does not imply that
628        EOF is imminent.
629
630        Returns an empty bytes array on EOF.
631
632        Raises BlockingIOError if the underlying raw stream has no
633        data at the moment.
634        """
635        self._unsupported("read")
636
637    def read1(self, n=None):
638        """Read up to n bytes with at most one read() system call."""
639        self._unsupported("read1")
640
641    def readinto(self, b):
642        """Read up to len(b) bytes into b.
643
644        Like read(), this may issue multiple reads to the underlying raw
645        stream, unless the latter is 'interactive'.
646
647        Returns the number of bytes read (0 for EOF).
648
649        Raises BlockingIOError if the underlying raw stream has no
650        data at the moment.
651        """
652        # XXX This ought to work with anything that supports the buffer API
653        data = self.read(len(b))
654        n = len(data)
655        try:
656            b[:n] = data
657        except TypeError as err:
658            import array
659            if not isinstance(b, array.array):
660                raise err
661            b[:n] = array.array(b'b', data)
662        return n
663
664    def write(self, b):
665        """Write the given buffer to the IO stream.
666
667        Return the number of bytes written, which is never less than
668        len(b).
669
670        Raises BlockingIOError if the buffer is full and the
671        underlying raw stream cannot accept more data at the moment.
672        """
673        self._unsupported("write")
674
675    def detach(self):
676        """
677        Separate the underlying raw stream from the buffer and return it.
678
679        After the raw stream has been detached, the buffer is in an unusable
680        state.
681        """
682        self._unsupported("detach")
683
684io.BufferedIOBase.register(BufferedIOBase)
685
686
687class _BufferedIOMixin(BufferedIOBase):
688
689    """A mixin implementation of BufferedIOBase with an underlying raw stream.
690
691    This passes most requests on to the underlying raw stream.  It
692    does *not* provide implementations of read(), readinto() or
693    write().
694    """
695
696    def __init__(self, raw):
697        self._raw = raw
698
699    ### Positioning ###
700
701    def seek(self, pos, whence=0):
702        new_position = self.raw.seek(pos, whence)
703        if new_position < 0:
704            raise IOError("seek() returned an invalid position")
705        return new_position
706
707    def tell(self):
708        pos = self.raw.tell()
709        if pos < 0:
710            raise IOError("tell() returned an invalid position")
711        return pos
712
713    def truncate(self, pos=None):
714        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
715        # and a flush may be necessary to synch both views of the current
716        # file state.
717        self.flush()
718
719        if pos is None:
720            pos = self.tell()
721        # XXX: Should seek() be used, instead of passing the position
722        # XXX  directly to truncate?
723        return self.raw.truncate(pos)
724
725    ### Flush and close ###
726
727    def flush(self):
728        if self.closed:
729            raise ValueError("flush of closed file")
730        self.raw.flush()
731
732    def close(self):
733        if self.raw is not None and not self.closed:
734            try:
735                # may raise BlockingIOError or BrokenPipeError etc
736                self.flush()
737            finally:
738                self.raw.close()
739
740    def detach(self):
741        if self.raw is None:
742            raise ValueError("raw stream already detached")
743        self.flush()
744        raw = self._raw
745        self._raw = None
746        return raw
747
748    ### Inquiries ###
749
750    def seekable(self):
751        return self.raw.seekable()
752
753    def readable(self):
754        return self.raw.readable()
755
756    def writable(self):
757        return self.raw.writable()
758
759    @property
760    def raw(self):
761        return self._raw
762
763    @property
764    def closed(self):
765        return self.raw.closed
766
767    @property
768    def name(self):
769        return self.raw.name
770
771    @property
772    def mode(self):
773        return self.raw.mode
774
775    def __repr__(self):
776        clsname = self.__class__.__name__
777        try:
778            name = self.name
779        except Exception:
780            return "<_pyio.{0}>".format(clsname)
781        else:
782            return "<_pyio.{0} name={1!r}>".format(clsname, name)
783
784    ### Lower-level APIs ###
785
786    def fileno(self):
787        return self.raw.fileno()
788
789    def isatty(self):
790        return self.raw.isatty()
791
792
793class BytesIO(BufferedIOBase):
794
795    """Buffered I/O implementation using an in-memory bytes buffer."""
796
797    def __init__(self, initial_bytes=None):
798        buf = bytearray()
799        if initial_bytes is not None:
800            buf.extend(initial_bytes)
801        self._buffer = buf
802        self._pos = 0
803
804    def __getstate__(self):
805        if self.closed:
806            raise ValueError("__getstate__ on closed file")
807        return self.__dict__.copy()
808
809    def getvalue(self):
810        """Return the bytes value (contents) of the buffer
811        """
812        if self.closed:
813            raise ValueError("getvalue on closed file")
814        return bytes(self._buffer)
815
816    def read(self, n=None):
817        if self.closed:
818            raise ValueError("read from closed file")
819        if n is None:
820            n = -1
821        if not isinstance(n, (int, long)):
822            raise TypeError("integer argument expected, got {0!r}".format(
823                type(n)))
824        if n < 0:
825            n = len(self._buffer)
826        if len(self._buffer) <= self._pos:
827            return b""
828        newpos = min(len(self._buffer), self._pos + n)
829        b = self._buffer[self._pos : newpos]
830        self._pos = newpos
831        return bytes(b)
832
833    def read1(self, n):
834        """This is the same as read.
835        """
836        return self.read(n)
837
838    def write(self, b):
839        if self.closed:
840            raise ValueError("write to closed file")
841        if isinstance(b, unicode):
842            raise TypeError("can't write unicode to binary stream")
843        n = len(b)
844        if n == 0:
845            return 0
846        pos = self._pos
847        if pos > len(self._buffer):
848            # Inserts null bytes between the current end of the file
849            # and the new write position.
850            padding = b'\x00' * (pos - len(self._buffer))
851            self._buffer += padding
852        self._buffer[pos:pos + n] = b
853        self._pos += n
854        return n
855
856    def seek(self, pos, whence=0):
857        if self.closed:
858            raise ValueError("seek on closed file")
859        try:
860            pos.__index__
861        except AttributeError:
862            raise TypeError("an integer is required")
863        if whence == 0:
864            if pos < 0:
865                raise ValueError("negative seek position %r" % (pos,))
866            self._pos = pos
867        elif whence == 1:
868            self._pos = max(0, self._pos + pos)
869        elif whence == 2:
870            self._pos = max(0, len(self._buffer) + pos)
871        else:
872            raise ValueError("invalid whence value")
873        return self._pos
874
875    def tell(self):
876        if self.closed:
877            raise ValueError("tell on closed file")
878        return self._pos
879
880    def truncate(self, pos=None):
881        if self.closed:
882            raise ValueError("truncate on closed file")
883        if pos is None:
884            pos = self._pos
885        else:
886            try:
887                pos.__index__
888            except AttributeError:
889                raise TypeError("an integer is required")
890            if pos < 0:
891                raise ValueError("negative truncate position %r" % (pos,))
892        del self._buffer[pos:]
893        return pos
894
895    def readable(self):
896        if self.closed:
897            raise ValueError("I/O operation on closed file.")
898        return True
899
900    def writable(self):
901        if self.closed:
902            raise ValueError("I/O operation on closed file.")
903        return True
904
905    def seekable(self):
906        if self.closed:
907            raise ValueError("I/O operation on closed file.")
908        return True
909
910
911class BufferedReader(_BufferedIOMixin):
912
913    """BufferedReader(raw[, buffer_size])
914
915    A buffer for a readable, sequential BaseRawIO object.
916
917    The constructor creates a BufferedReader for the given readable raw
918    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
919    is used.
920    """
921
922    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
923        """Create a new buffered reader using the given readable raw IO object.
924        """
925        if not raw.readable():
926            raise IOError('"raw" argument must be readable.')
927
928        _BufferedIOMixin.__init__(self, raw)
929        if buffer_size <= 0:
930            raise ValueError("invalid buffer size")
931        self.buffer_size = buffer_size
932        self._reset_read_buf()
933        self._read_lock = Lock()
934
935    def _reset_read_buf(self):
936        self._read_buf = b""
937        self._read_pos = 0
938
939    def read(self, n=None):
940        """Read n bytes.
941
942        Returns exactly n bytes of data unless the underlying raw IO
943        stream reaches EOF or if the call would block in non-blocking
944        mode. If n is negative, read until EOF or until read() would
945        block.
946        """
947        if n is not None and n < -1:
948            raise ValueError("invalid number of bytes to read")
949        with self._read_lock:
950            return self._read_unlocked(n)
951
952    def _read_unlocked(self, n=None):
953        nodata_val = b""
954        empty_values = (b"", None)
955        buf = self._read_buf
956        pos = self._read_pos
957
958        # Special case for when the number of bytes to read is unspecified.
959        if n is None or n == -1:
960            self._reset_read_buf()
961            chunks = [buf[pos:]]  # Strip the consumed bytes.
962            current_size = 0
963            while True:
964                # Read until EOF or until read() would block.
965                try:
966                    chunk = self.raw.read()
967                except IOError as e:
968                    if e.errno != EINTR:
969                        raise
970                    continue
971                if chunk in empty_values:
972                    nodata_val = chunk
973                    break
974                current_size += len(chunk)
975                chunks.append(chunk)
976            return b"".join(chunks) or nodata_val
977
978        # The number of bytes to read is specified, return at most n bytes.
979        avail = len(buf) - pos  # Length of the available buffered data.
980        if n <= avail:
981            # Fast path: the data to read is fully buffered.
982            self._read_pos += n
983            return buf[pos:pos+n]
984        # Slow path: read from the stream until enough bytes are read,
985        # or until an EOF occurs or until read() would block.
986        chunks = [buf[pos:]]
987        wanted = max(self.buffer_size, n)
988        while avail < n:
989            try:
990                chunk = self.raw.read(wanted)
991            except IOError as e:
992                if e.errno != EINTR:
993                    raise
994                continue
995            if chunk in empty_values:
996                nodata_val = chunk
997                break
998            avail += len(chunk)
999            chunks.append(chunk)
1000        # n is more then avail only when an EOF occurred or when
1001        # read() would have blocked.
1002        n = min(n, avail)
1003        out = b"".join(chunks)
1004        self._read_buf = out[n:]  # Save the extra data in the buffer.
1005        self._read_pos = 0
1006        return out[:n] if out else nodata_val
1007
1008    def peek(self, n=0):
1009        """Returns buffered bytes without advancing the position.
1010
1011        The argument indicates a desired minimal number of bytes; we
1012        do at most one raw read to satisfy it.  We never return more
1013        than self.buffer_size.
1014        """
1015        with self._read_lock:
1016            return self._peek_unlocked(n)
1017
1018    def _peek_unlocked(self, n=0):
1019        want = min(n, self.buffer_size)
1020        have = len(self._read_buf) - self._read_pos
1021        if have < want or have <= 0:
1022            to_read = self.buffer_size - have
1023            while True:
1024                try:
1025                    current = self.raw.read(to_read)
1026                except IOError as e:
1027                    if e.errno != EINTR:
1028                        raise
1029                    continue
1030                break
1031            if current:
1032                self._read_buf = self._read_buf[self._read_pos:] + current
1033                self._read_pos = 0
1034        return self._read_buf[self._read_pos:]
1035
1036    def read1(self, n):
1037        """Reads up to n bytes, with at most one read() system call."""
1038        # Returns up to n bytes.  If at least one byte is buffered, we
1039        # only return buffered bytes.  Otherwise, we do one raw read.
1040        if n < 0:
1041            raise ValueError("number of bytes to read must be positive")
1042        if n == 0:
1043            return b""
1044        with self._read_lock:
1045            self._peek_unlocked(1)
1046            return self._read_unlocked(
1047                min(n, len(self._read_buf) - self._read_pos))
1048
1049    def tell(self):
1050        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1051
1052    def seek(self, pos, whence=0):
1053        if not (0 <= whence <= 2):
1054            raise ValueError("invalid whence value")
1055        with self._read_lock:
1056            if whence == 1:
1057                pos -= len(self._read_buf) - self._read_pos
1058            pos = _BufferedIOMixin.seek(self, pos, whence)
1059            self._reset_read_buf()
1060            return pos
1061
1062class BufferedWriter(_BufferedIOMixin):
1063
1064    """A buffer for a writeable sequential RawIO object.
1065
1066    The constructor creates a BufferedWriter for the given writeable raw
1067    stream. If the buffer_size is not given, it defaults to
1068    DEFAULT_BUFFER_SIZE.
1069    """
1070
1071    _warning_stack_offset = 2
1072
1073    def __init__(self, raw,
1074                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1075        if not raw.writable():
1076            raise IOError('"raw" argument must be writable.')
1077
1078        _BufferedIOMixin.__init__(self, raw)
1079        if buffer_size <= 0:
1080            raise ValueError("invalid buffer size")
1081        if max_buffer_size is not None:
1082            warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1083                          self._warning_stack_offset)
1084        self.buffer_size = buffer_size
1085        self._write_buf = bytearray()
1086        self._write_lock = Lock()
1087
1088    def write(self, b):
1089        if self.closed:
1090            raise ValueError("write to closed file")
1091        if isinstance(b, unicode):
1092            raise TypeError("can't write unicode to binary stream")
1093        with self._write_lock:
1094            # XXX we can implement some more tricks to try and avoid
1095            # partial writes
1096            if len(self._write_buf) > self.buffer_size:
1097                # We're full, so let's pre-flush the buffer.  (This may
1098                # raise BlockingIOError with characters_written == 0.)
1099                self._flush_unlocked()
1100            before = len(self._write_buf)
1101            self._write_buf.extend(b)
1102            written = len(self._write_buf) - before
1103            if len(self._write_buf) > self.buffer_size:
1104                try:
1105                    self._flush_unlocked()
1106                except BlockingIOError as e:
1107                    if len(self._write_buf) > self.buffer_size:
1108                        # We've hit the buffer_size. We have to accept a partial
1109                        # write and cut back our buffer.
1110                        overage = len(self._write_buf) - self.buffer_size
1111                        written -= overage
1112                        self._write_buf = self._write_buf[:self.buffer_size]
1113                        raise BlockingIOError(e.errno, e.strerror, written)
1114            return written
1115
1116    def truncate(self, pos=None):
1117        with self._write_lock:
1118            self._flush_unlocked()
1119            if pos is None:
1120                pos = self.raw.tell()
1121            return self.raw.truncate(pos)
1122
1123    def flush(self):
1124        with self._write_lock:
1125            self._flush_unlocked()
1126
1127    def _flush_unlocked(self):
1128        if self.closed:
1129            raise ValueError("flush of closed file")
1130        while self._write_buf:
1131            try:
1132                n = self.raw.write(self._write_buf)
1133            except BlockingIOError:
1134                raise RuntimeError("self.raw should implement RawIOBase: it "
1135                                   "should not raise BlockingIOError")
1136            except IOError as e:
1137                if e.errno != EINTR:
1138                    raise
1139                continue
1140            if n is None:
1141                raise BlockingIOError(
1142                    errno.EAGAIN,
1143                    "write could not complete without blocking", 0)
1144            if n > len(self._write_buf) or n < 0:
1145                raise IOError("write() returned incorrect number of bytes")
1146            del self._write_buf[:n]
1147
1148    def tell(self):
1149        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1150
1151    def seek(self, pos, whence=0):
1152        if not (0 <= whence <= 2):
1153            raise ValueError("invalid whence")
1154        with self._write_lock:
1155            self._flush_unlocked()
1156            return _BufferedIOMixin.seek(self, pos, whence)
1157
1158
1159class BufferedRWPair(BufferedIOBase):
1160
1161    """A buffered reader and writer object together.
1162
1163    A buffered reader object and buffered writer object put together to
1164    form a sequential IO object that can read and write. This is typically
1165    used with a socket or two-way pipe.
1166
1167    reader and writer are RawIOBase objects that are readable and
1168    writeable respectively. If the buffer_size is omitted it defaults to
1169    DEFAULT_BUFFER_SIZE.
1170    """
1171
1172    # XXX The usefulness of this (compared to having two separate IO
1173    # objects) is questionable.
1174
1175    def __init__(self, reader, writer,
1176                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1177        """Constructor.
1178
1179        The arguments are two RawIO instances.
1180        """
1181        if max_buffer_size is not None:
1182            warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1183
1184        if not reader.readable():
1185            raise IOError('"reader" argument must be readable.')
1186
1187        if not writer.writable():
1188            raise IOError('"writer" argument must be writable.')
1189
1190        self.reader = BufferedReader(reader, buffer_size)
1191        self.writer = BufferedWriter(writer, buffer_size)
1192
1193    def read(self, n=None):
1194        if n is None:
1195            n = -1
1196        return self.reader.read(n)
1197
1198    def readinto(self, b):
1199        return self.reader.readinto(b)
1200
1201    def write(self, b):
1202        return self.writer.write(b)
1203
1204    def peek(self, n=0):
1205        return self.reader.peek(n)
1206
1207    def read1(self, n):
1208        return self.reader.read1(n)
1209
1210    def readable(self):
1211        return self.reader.readable()
1212
1213    def writable(self):
1214        return self.writer.writable()
1215
1216    def flush(self):
1217        return self.writer.flush()
1218
1219    def close(self):
1220        try:
1221            self.writer.close()
1222        finally:
1223            self.reader.close()
1224
1225    def isatty(self):
1226        return self.reader.isatty() or self.writer.isatty()
1227
1228    @property
1229    def closed(self):
1230        return self.writer.closed
1231
1232
1233class BufferedRandom(BufferedWriter, BufferedReader):
1234
1235    """A buffered interface to random access streams.
1236
1237    The constructor creates a reader and writer for a seekable stream,
1238    raw, given in the first argument. If the buffer_size is omitted it
1239    defaults to DEFAULT_BUFFER_SIZE.
1240    """
1241
1242    _warning_stack_offset = 3
1243
1244    def __init__(self, raw,
1245                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1246        raw._checkSeekable()
1247        BufferedReader.__init__(self, raw, buffer_size)
1248        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1249
1250    def seek(self, pos, whence=0):
1251        if not (0 <= whence <= 2):
1252            raise ValueError("invalid whence")
1253        self.flush()
1254        if self._read_buf:
1255            # Undo read ahead.
1256            with self._read_lock:
1257                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1258        # First do the raw seek, then empty the read buffer, so that
1259        # if the raw seek fails, we don't lose buffered data forever.
1260        pos = self.raw.seek(pos, whence)
1261        with self._read_lock:
1262            self._reset_read_buf()
1263        if pos < 0:
1264            raise IOError("seek() returned invalid position")
1265        return pos
1266
1267    def tell(self):
1268        if self._write_buf:
1269            return BufferedWriter.tell(self)
1270        else:
1271            return BufferedReader.tell(self)
1272
1273    def truncate(self, pos=None):
1274        if pos is None:
1275            pos = self.tell()
1276        # Use seek to flush the read buffer.
1277        return BufferedWriter.truncate(self, pos)
1278
1279    def read(self, n=None):
1280        if n is None:
1281            n = -1
1282        self.flush()
1283        return BufferedReader.read(self, n)
1284
1285    def readinto(self, b):
1286        self.flush()
1287        return BufferedReader.readinto(self, b)
1288
1289    def peek(self, n=0):
1290        self.flush()
1291        return BufferedReader.peek(self, n)
1292
1293    def read1(self, n):
1294        self.flush()
1295        return BufferedReader.read1(self, n)
1296
1297    def write(self, b):
1298        if self._read_buf:
1299            # Undo readahead
1300            with self._read_lock:
1301                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1302                self._reset_read_buf()
1303        return BufferedWriter.write(self, b)
1304
1305
1306class TextIOBase(IOBase):
1307
1308    """Base class for text I/O.
1309
1310    This class provides a character and line based interface to stream
1311    I/O. There is no readinto method because Python's character strings
1312    are immutable. There is no public constructor.
1313    """
1314
1315    def read(self, n=-1):
1316        """Read at most n characters from stream.
1317
1318        Read from underlying buffer until we have n characters or we hit EOF.
1319        If n is negative or omitted, read until EOF.
1320        """
1321        self._unsupported("read")
1322
1323    def write(self, s):
1324        """Write string s to stream."""
1325        self._unsupported("write")
1326
1327    def truncate(self, pos=None):
1328        """Truncate size to pos."""
1329        self._unsupported("truncate")
1330
1331    def readline(self):
1332        """Read until newline or EOF.
1333
1334        Returns an empty string if EOF is hit immediately.
1335        """
1336        self._unsupported("readline")
1337
1338    def detach(self):
1339        """
1340        Separate the underlying buffer from the TextIOBase and return it.
1341
1342        After the underlying buffer has been detached, the TextIO is in an
1343        unusable state.
1344        """
1345        self._unsupported("detach")
1346
1347    @property
1348    def encoding(self):
1349        """Subclasses should override."""
1350        return None
1351
1352    @property
1353    def newlines(self):
1354        """Line endings translated so far.
1355
1356        Only line endings translated during reading are considered.
1357
1358        Subclasses should override.
1359        """
1360        return None
1361
1362    @property
1363    def errors(self):
1364        """Error setting of the decoder or encoder.
1365
1366        Subclasses should override."""
1367        return None
1368
1369io.TextIOBase.register(TextIOBase)
1370
1371
1372class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1373    r"""Codec used when reading a file in universal newlines mode.  It wraps
1374    another incremental decoder, translating \r\n and \r into \n.  It also
1375    records the types of newlines encountered.  When used with
1376    translate=False, it ensures that the newline sequence is returned in
1377    one piece.
1378    """
1379    def __init__(self, decoder, translate, errors='strict'):
1380        codecs.IncrementalDecoder.__init__(self, errors=errors)
1381        self.translate = translate
1382        self.decoder = decoder
1383        self.seennl = 0
1384        self.pendingcr = False
1385
1386    def decode(self, input, final=False):
1387        # decode input (with the eventual \r from a previous pass)
1388        if self.decoder is None:
1389            output = input
1390        else:
1391            output = self.decoder.decode(input, final=final)
1392        if self.pendingcr and (output or final):
1393            output = "\r" + output
1394            self.pendingcr = False
1395
1396        # retain last \r even when not translating data:
1397        # then readline() is sure to get \r\n in one pass
1398        if output.endswith("\r") and not final:
1399            output = output[:-1]
1400            self.pendingcr = True
1401
1402        # Record which newlines are read
1403        crlf = output.count('\r\n')
1404        cr = output.count('\r') - crlf
1405        lf = output.count('\n') - crlf
1406        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1407                    | (crlf and self._CRLF)
1408
1409        if self.translate:
1410            if crlf:
1411                output = output.replace("\r\n", "\n")
1412            if cr:
1413                output = output.replace("\r", "\n")
1414
1415        return output
1416
1417    def getstate(self):
1418        if self.decoder is None:
1419            buf = b""
1420            flag = 0
1421        else:
1422            buf, flag = self.decoder.getstate()
1423        flag <<= 1
1424        if self.pendingcr:
1425            flag |= 1
1426        return buf, flag
1427
1428    def setstate(self, state):
1429        buf, flag = state
1430        self.pendingcr = bool(flag & 1)
1431        if self.decoder is not None:
1432            self.decoder.setstate((buf, flag >> 1))
1433
1434    def reset(self):
1435        self.seennl = 0
1436        self.pendingcr = False
1437        if self.decoder is not None:
1438            self.decoder.reset()
1439
1440    _LF = 1
1441    _CR = 2
1442    _CRLF = 4
1443
1444    @property
1445    def newlines(self):
1446        return (None,
1447                "\n",
1448                "\r",
1449                ("\r", "\n"),
1450                "\r\n",
1451                ("\n", "\r\n"),
1452                ("\r", "\r\n"),
1453                ("\r", "\n", "\r\n")
1454               )[self.seennl]
1455
1456
1457class TextIOWrapper(TextIOBase):
1458
1459    r"""Character and line based layer over a BufferedIOBase object, buffer.
1460
1461    encoding gives the name of the encoding that the stream will be
1462    decoded or encoded with. It defaults to locale.getpreferredencoding.
1463
1464    errors determines the strictness of encoding and decoding (see the
1465    codecs.register) and defaults to "strict".
1466
1467    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1468    handling of line endings. If it is None, universal newlines is
1469    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1470    or '\r\n' are translated to '\n' before being returned to the
1471    caller. Conversely, on output, '\n' is translated to the system
1472    default line separator, os.linesep. If newline is any other of its
1473    legal values, that newline becomes the newline when the file is read
1474    and it is returned untranslated. On output, '\n' is converted to the
1475    newline.
1476
1477    If line_buffering is True, a call to flush is implied when a call to
1478    write contains a newline character.
1479    """
1480
1481    _CHUNK_SIZE = 2048
1482
1483    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1484                 line_buffering=False):
1485        if newline is not None and not isinstance(newline, basestring):
1486            raise TypeError("illegal newline type: %r" % (type(newline),))
1487        if newline not in (None, "", "\n", "\r", "\r\n"):
1488            raise ValueError("illegal newline value: %r" % (newline,))
1489        if encoding is None:
1490            try:
1491                import locale
1492            except ImportError:
1493                # Importing locale may fail if Python is being built
1494                encoding = "ascii"
1495            else:
1496                encoding = locale.getpreferredencoding()
1497
1498        if not isinstance(encoding, basestring):
1499            raise ValueError("invalid encoding: %r" % encoding)
1500
1501        if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding:
1502            msg = ("%r is not a text encoding; "
1503                   "use codecs.open() to handle arbitrary codecs")
1504            warnings.warnpy3k(msg % encoding, stacklevel=2)
1505
1506        if errors is None:
1507            errors = "strict"
1508        else:
1509            if not isinstance(errors, basestring):
1510                raise ValueError("invalid errors: %r" % errors)
1511
1512        self._buffer = buffer
1513        self._line_buffering = line_buffering
1514        self._encoding = encoding
1515        self._errors = errors
1516        self._readuniversal = not newline
1517        self._readtranslate = newline is None
1518        self._readnl = newline
1519        self._writetranslate = newline != ''
1520        self._writenl = newline or os.linesep
1521        self._encoder = None
1522        self._decoder = None
1523        self._decoded_chars = ''  # buffer for text returned from decoder
1524        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1525        self._snapshot = None  # info for reconstructing decoder state
1526        self._seekable = self._telling = self.buffer.seekable()
1527
1528        if self._seekable and self.writable():
1529            position = self.buffer.tell()
1530            if position != 0:
1531                try:
1532                    self._get_encoder().setstate(0)
1533                except LookupError:
1534                    # Sometimes the encoder doesn't exist
1535                    pass
1536
1537    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1538    # where dec_flags is the second (integer) item of the decoder state
1539    # and next_input is the chunk of input bytes that comes next after the
1540    # snapshot point.  We use this to reconstruct decoder states in tell().
1541
1542    # Naming convention:
1543    #   - "bytes_..." for integer variables that count input bytes
1544    #   - "chars_..." for integer variables that count decoded characters
1545
1546    def __repr__(self):
1547        try:
1548            name = self.name
1549        except Exception:
1550            return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1551        else:
1552            return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1553                name, self.encoding)
1554
1555    @property
1556    def encoding(self):
1557        return self._encoding
1558
1559    @property
1560    def errors(self):
1561        return self._errors
1562
1563    @property
1564    def line_buffering(self):
1565        return self._line_buffering
1566
1567    @property
1568    def buffer(self):
1569        return self._buffer
1570
1571    def seekable(self):
1572        if self.closed:
1573            raise ValueError("I/O operation on closed file.")
1574        return self._seekable
1575
1576    def readable(self):
1577        return self.buffer.readable()
1578
1579    def writable(self):
1580        return self.buffer.writable()
1581
1582    def flush(self):
1583        self.buffer.flush()
1584        self._telling = self._seekable
1585
1586    def close(self):
1587        if self.buffer is not None and not self.closed:
1588            try:
1589                self.flush()
1590            finally:
1591                self.buffer.close()
1592
1593    @property
1594    def closed(self):
1595        return self.buffer.closed
1596
1597    @property
1598    def name(self):
1599        return self.buffer.name
1600
1601    def fileno(self):
1602        return self.buffer.fileno()
1603
1604    def isatty(self):
1605        return self.buffer.isatty()
1606
1607    def write(self, s):
1608        if self.closed:
1609            raise ValueError("write to closed file")
1610        if not isinstance(s, unicode):
1611            raise TypeError("can't write %s to text stream" %
1612                            s.__class__.__name__)
1613        length = len(s)
1614        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1615        if haslf and self._writetranslate and self._writenl != "\n":
1616            s = s.replace("\n", self._writenl)
1617        encoder = self._encoder or self._get_encoder()
1618        # XXX What if we were just reading?
1619        b = encoder.encode(s)
1620        self.buffer.write(b)
1621        if self._line_buffering and (haslf or "\r" in s):
1622            self.flush()
1623        self._snapshot = None
1624        if self._decoder:
1625            self._decoder.reset()
1626        return length
1627
1628    def _get_encoder(self):
1629        make_encoder = codecs.getincrementalencoder(self._encoding)
1630        self._encoder = make_encoder(self._errors)
1631        return self._encoder
1632
1633    def _get_decoder(self):
1634        make_decoder = codecs.getincrementaldecoder(self._encoding)
1635        decoder = make_decoder(self._errors)
1636        if self._readuniversal:
1637            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1638        self._decoder = decoder
1639        return decoder
1640
1641    # The following three methods implement an ADT for _decoded_chars.
1642    # Text returned from the decoder is buffered here until the client
1643    # requests it by calling our read() or readline() method.
1644    def _set_decoded_chars(self, chars):
1645        """Set the _decoded_chars buffer."""
1646        self._decoded_chars = chars
1647        self._decoded_chars_used = 0
1648
1649    def _get_decoded_chars(self, n=None):
1650        """Advance into the _decoded_chars buffer."""
1651        offset = self._decoded_chars_used
1652        if n is None:
1653            chars = self._decoded_chars[offset:]
1654        else:
1655            chars = self._decoded_chars[offset:offset + n]
1656        self._decoded_chars_used += len(chars)
1657        return chars
1658
1659    def _rewind_decoded_chars(self, n):
1660        """Rewind the _decoded_chars buffer."""
1661        if self._decoded_chars_used < n:
1662            raise AssertionError("rewind decoded_chars out of bounds")
1663        self._decoded_chars_used -= n
1664
1665    def _read_chunk(self):
1666        """
1667        Read and decode the next chunk of data from the BufferedReader.
1668        """
1669
1670        # The return value is True unless EOF was reached.  The decoded
1671        # string is placed in self._decoded_chars (replacing its previous
1672        # value).  The entire input chunk is sent to the decoder, though
1673        # some of it may remain buffered in the decoder, yet to be
1674        # converted.
1675
1676        if self._decoder is None:
1677            raise ValueError("no decoder")
1678
1679        if self._telling:
1680            # To prepare for tell(), we need to snapshot a point in the
1681            # file where the decoder's input buffer is empty.
1682
1683            dec_buffer, dec_flags = self._decoder.getstate()
1684            # Given this, we know there was a valid snapshot point
1685            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1686
1687        # Read a chunk, decode it, and put the result in self._decoded_chars.
1688        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1689        eof = not input_chunk
1690        self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1691
1692        if self._telling:
1693            # At the snapshot point, len(dec_buffer) bytes before the read,
1694            # the next input to be decoded is dec_buffer + input_chunk.
1695            self._snapshot = (dec_flags, dec_buffer + input_chunk)
1696
1697        return not eof
1698
1699    def _pack_cookie(self, position, dec_flags=0,
1700                           bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1701        # The meaning of a tell() cookie is: seek to position, set the
1702        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1703        # into the decoder with need_eof as the EOF flag, then skip
1704        # chars_to_skip characters of the decoded result.  For most simple
1705        # decoders, tell() will often just give a byte offset in the file.
1706        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1707               (chars_to_skip<<192) | bool(need_eof)<<256)
1708
1709    def _unpack_cookie(self, bigint):
1710        rest, position = divmod(bigint, 1<<64)
1711        rest, dec_flags = divmod(rest, 1<<64)
1712        rest, bytes_to_feed = divmod(rest, 1<<64)
1713        need_eof, chars_to_skip = divmod(rest, 1<<64)
1714        return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1715
1716    def tell(self):
1717        if not self._seekable:
1718            raise IOError("underlying stream is not seekable")
1719        if not self._telling:
1720            raise IOError("telling position disabled by next() call")
1721        self.flush()
1722        position = self.buffer.tell()
1723        decoder = self._decoder
1724        if decoder is None or self._snapshot is None:
1725            if self._decoded_chars:
1726                # This should never happen.
1727                raise AssertionError("pending decoded text")
1728            return position
1729
1730        # Skip backward to the snapshot point (see _read_chunk).
1731        dec_flags, next_input = self._snapshot
1732        position -= len(next_input)
1733
1734        # How many decoded characters have been used up since the snapshot?
1735        chars_to_skip = self._decoded_chars_used
1736        if chars_to_skip == 0:
1737            # We haven't moved from the snapshot point.
1738            return self._pack_cookie(position, dec_flags)
1739
1740        # Starting from the snapshot position, we will walk the decoder
1741        # forward until it gives us enough decoded characters.
1742        saved_state = decoder.getstate()
1743        try:
1744            # Note our initial start point.
1745            decoder.setstate((b'', dec_flags))
1746            start_pos = position
1747            start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1748            need_eof = 0
1749
1750            # Feed the decoder one byte at a time.  As we go, note the
1751            # nearest "safe start point" before the current location
1752            # (a point where the decoder has nothing buffered, so seek()
1753            # can safely start from there and advance to this location).
1754            for next_byte in next_input:
1755                bytes_fed += 1
1756                chars_decoded += len(decoder.decode(next_byte))
1757                dec_buffer, dec_flags = decoder.getstate()
1758                if not dec_buffer and chars_decoded <= chars_to_skip:
1759                    # Decoder buffer is empty, so this is a safe start point.
1760                    start_pos += bytes_fed
1761                    chars_to_skip -= chars_decoded
1762                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1763                if chars_decoded >= chars_to_skip:
1764                    break
1765            else:
1766                # We didn't get enough decoded data; signal EOF to get more.
1767                chars_decoded += len(decoder.decode(b'', final=True))
1768                need_eof = 1
1769                if chars_decoded < chars_to_skip:
1770                    raise IOError("can't reconstruct logical file position")
1771
1772            # The returned cookie corresponds to the last safe start point.
1773            return self._pack_cookie(
1774                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1775        finally:
1776            decoder.setstate(saved_state)
1777
1778    def truncate(self, pos=None):
1779        self.flush()
1780        if pos is None:
1781            pos = self.tell()
1782        return self.buffer.truncate(pos)
1783
1784    def detach(self):
1785        if self.buffer is None:
1786            raise ValueError("buffer is already detached")
1787        self.flush()
1788        buffer = self._buffer
1789        self._buffer = None
1790        return buffer
1791
1792    def seek(self, cookie, whence=0):
1793        if self.closed:
1794            raise ValueError("tell on closed file")
1795        if not self._seekable:
1796            raise IOError("underlying stream is not seekable")
1797        if whence == 1: # seek relative to current position
1798            if cookie != 0:
1799                raise IOError("can't do nonzero cur-relative seeks")
1800            # Seeking to the current position should attempt to
1801            # sync the underlying buffer with the current position.
1802            whence = 0
1803            cookie = self.tell()
1804        if whence == 2: # seek relative to end of file
1805            if cookie != 0:
1806                raise IOError("can't do nonzero end-relative seeks")
1807            self.flush()
1808            position = self.buffer.seek(0, 2)
1809            self._set_decoded_chars('')
1810            self._snapshot = None
1811            if self._decoder:
1812                self._decoder.reset()
1813            return position
1814        if whence != 0:
1815            raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1816                             (whence,))
1817        if cookie < 0:
1818            raise ValueError("negative seek position %r" % (cookie,))
1819        self.flush()
1820
1821        # The strategy of seek() is to go back to the safe start point
1822        # and replay the effect of read(chars_to_skip) from there.
1823        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1824            self._unpack_cookie(cookie)
1825
1826        # Seek back to the safe start point.
1827        self.buffer.seek(start_pos)
1828        self._set_decoded_chars('')
1829        self._snapshot = None
1830
1831        # Restore the decoder to its state from the safe start point.
1832        if cookie == 0 and self._decoder:
1833            self._decoder.reset()
1834        elif self._decoder or dec_flags or chars_to_skip:
1835            self._decoder = self._decoder or self._get_decoder()
1836            self._decoder.setstate((b'', dec_flags))
1837            self._snapshot = (dec_flags, b'')
1838
1839        if chars_to_skip:
1840            # Just like _read_chunk, feed the decoder and save a snapshot.
1841            input_chunk = self.buffer.read(bytes_to_feed)
1842            self._set_decoded_chars(
1843                self._decoder.decode(input_chunk, need_eof))
1844            self._snapshot = (dec_flags, input_chunk)
1845
1846            # Skip chars_to_skip of the decoded characters.
1847            if len(self._decoded_chars) < chars_to_skip:
1848                raise IOError("can't restore logical file position")
1849            self._decoded_chars_used = chars_to_skip
1850
1851        # Finally, reset the encoder (merely useful for proper BOM handling)
1852        try:
1853            encoder = self._encoder or self._get_encoder()
1854        except LookupError:
1855            # Sometimes the encoder doesn't exist
1856            pass
1857        else:
1858            if cookie != 0:
1859                encoder.setstate(0)
1860            else:
1861                encoder.reset()
1862        return cookie
1863
1864    def read(self, n=None):
1865        self._checkReadable()
1866        if n is None:
1867            n = -1
1868        decoder = self._decoder or self._get_decoder()
1869        try:
1870            n.__index__
1871        except AttributeError:
1872            raise TypeError("an integer is required")
1873        if n < 0:
1874            # Read everything.
1875            result = (self._get_decoded_chars() +
1876                      decoder.decode(self.buffer.read(), final=True))
1877            self._set_decoded_chars('')
1878            self._snapshot = None
1879            return result
1880        else:
1881            # Keep reading chunks until we have n characters to return.
1882            eof = False
1883            result = self._get_decoded_chars(n)
1884            while len(result) < n and not eof:
1885                eof = not self._read_chunk()
1886                result += self._get_decoded_chars(n - len(result))
1887            return result
1888
1889    def next(self):
1890        self._telling = False
1891        line = self.readline()
1892        if not line:
1893            self._snapshot = None
1894            self._telling = self._seekable
1895            raise StopIteration
1896        return line
1897
1898    def readline(self, limit=None):
1899        if self.closed:
1900            raise ValueError("read from closed file")
1901        if limit is None:
1902            limit = -1
1903        elif not isinstance(limit, (int, long)):
1904            raise TypeError("limit must be an integer")
1905
1906        # Grab all the decoded text (we will rewind any extra bits later).
1907        line = self._get_decoded_chars()
1908
1909        start = 0
1910        # Make the decoder if it doesn't already exist.
1911        if not self._decoder:
1912            self._get_decoder()
1913
1914        pos = endpos = None
1915        while True:
1916            if self._readtranslate:
1917                # Newlines are already translated, only search for \n
1918                pos = line.find('\n', start)
1919                if pos >= 0:
1920                    endpos = pos + 1
1921                    break
1922                else:
1923                    start = len(line)
1924
1925            elif self._readuniversal:
1926                # Universal newline search. Find any of \r, \r\n, \n
1927                # The decoder ensures that \r\n are not split in two pieces
1928
1929                # In C we'd look for these in parallel of course.
1930                nlpos = line.find("\n", start)
1931                crpos = line.find("\r", start)
1932                if crpos == -1:
1933                    if nlpos == -1:
1934                        # Nothing found
1935                        start = len(line)
1936                    else:
1937                        # Found \n
1938                        endpos = nlpos + 1
1939                        break
1940                elif nlpos == -1:
1941                    # Found lone \r
1942                    endpos = crpos + 1
1943                    break
1944                elif nlpos < crpos:
1945                    # Found \n
1946                    endpos = nlpos + 1
1947                    break
1948                elif nlpos == crpos + 1:
1949                    # Found \r\n
1950                    endpos = crpos + 2
1951                    break
1952                else:
1953                    # Found \r
1954                    endpos = crpos + 1
1955                    break
1956            else:
1957                # non-universal
1958                pos = line.find(self._readnl)
1959                if pos >= 0:
1960                    endpos = pos + len(self._readnl)
1961                    break
1962
1963            if limit >= 0 and len(line) >= limit:
1964                endpos = limit  # reached length limit
1965                break
1966
1967            # No line ending seen yet - get more data'
1968            while self._read_chunk():
1969                if self._decoded_chars:
1970                    break
1971            if self._decoded_chars:
1972                line += self._get_decoded_chars()
1973            else:
1974                # end of file
1975                self._set_decoded_chars('')
1976                self._snapshot = None
1977                return line
1978
1979        if limit >= 0 and endpos > limit:
1980            endpos = limit  # don't exceed limit
1981
1982        # Rewind _decoded_chars to just after the line ending we found.
1983        self._rewind_decoded_chars(len(line) - endpos)
1984        return line[:endpos]
1985
1986    @property
1987    def newlines(self):
1988        return self._decoder.newlines if self._decoder else None
1989
1990
1991class StringIO(TextIOWrapper):
1992    """Text I/O implementation using an in-memory buffer.
1993
1994    The initial_value argument sets the value of object.  The newline
1995    argument is like the one of TextIOWrapper's constructor.
1996    """
1997
1998    def __init__(self, initial_value="", newline="\n"):
1999        super(StringIO, self).__init__(BytesIO(),
2000                                       encoding="utf-8",
2001                                       errors="strict",
2002                                       newline=newline)
2003        # Issue #5645: make universal newlines semantics the same as in the
2004        # C version, even under Windows.
2005        if newline is None:
2006            self._writetranslate = False
2007        if initial_value:
2008            if not isinstance(initial_value, unicode):
2009                initial_value = unicode(initial_value)
2010            self.write(initial_value)
2011            self.seek(0)
2012
2013    def getvalue(self):
2014        self.flush()
2015        decoder = self._decoder or self._get_decoder()
2016        old_state = decoder.getstate()
2017        decoder.reset()
2018        try:
2019            return decoder.decode(self.buffer.getvalue(), final=True)
2020        finally:
2021            decoder.setstate(old_state)
2022
2023    def __repr__(self):
2024        # TextIOWrapper tells the encoding in its repr. In StringIO,
2025        # that's a implementation detail.
2026        return object.__repr__(self)
2027
2028    @property
2029    def errors(self):
2030        return None
2031
2032    @property
2033    def encoding(self):
2034        return None
2035
2036    def detach(self):
2037        # This doesn't make sense on StringIO.
2038        self._unsupported("detach")
Note: See TracBrowser for help on using the repository browser.