source: titan/mediathek/localhoster/lib/python2.7/tarfile.py @ 40114

Last change on this file since 40114 was 40094, checked in by obi, 5 years ago

tithek add yoztube-dl support

File size: 88.2 KB
Line 
1# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision: 85213 $"
33# $Source$
34
35version     = "0.9.0"
36__author__  = "Lars Gustäbel (lars@gustaebel.de)"
37__date__    = "$Date$"
38__cvsid__   = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44from __builtin__ import open as bltn_open
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52import copy
53import re
54import operator
55
56try:
57    import grp, pwd
58except ImportError:
59    grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
67NUL = "\0"                      # the null character
68BLOCKSIZE = 512                 # length of processing blocks
69RECORDSIZE = BLOCKSIZE * 20     # length of records
70GNU_MAGIC = "ustar  \0"         # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
72
73LENGTH_NAME = 100               # maximum length of a filename
74LENGTH_LINK = 100               # maximum length of a linkname
75LENGTH_PREFIX = 155             # maximum length of the prefix field
76
77REGTYPE = "0"                   # regular file
78AREGTYPE = "\0"                 # regular file
79LNKTYPE = "1"                   # link (inside tarfile)
80SYMTYPE = "2"                   # symbolic link
81CHRTYPE = "3"                   # character special device
82BLKTYPE = "4"                   # block special device
83DIRTYPE = "5"                   # directory
84FIFOTYPE = "6"                  # fifo special device
85CONTTYPE = "7"                  # contiguous file
86
87GNUTYPE_LONGNAME = "L"          # GNU tar longname
88GNUTYPE_LONGLINK = "K"          # GNU tar longlink
89GNUTYPE_SPARSE = "S"            # GNU tar sparse file
90
91XHDTYPE = "x"                   # POSIX.1-2001 extended header
92XGLTYPE = "g"                   # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X"           # Solaris extended header
94
95USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1                  # GNU tar format
97PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
99
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105                   SYMTYPE, DIRTYPE, FIFOTYPE,
106                   CONTTYPE, CHRTYPE, BLKTYPE,
107                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108                   GNUTYPE_SPARSE)
109
110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112                 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116             GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120              "uid", "gid", "uname", "gname")
121
122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125    "atime": float,
126    "ctime": float,
127    "mtime": float,
128    "uid": int,
129    "gid": int,
130    "size": int
131}
132
133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000        # symbolic link
137S_IFREG = 0100000        # regular file
138S_IFBLK = 0060000        # block device
139S_IFDIR = 0040000        # directory
140S_IFCHR = 0020000        # character device
141S_IFIFO = 0010000        # fifo
142
143TSUID   = 04000          # set UID on execution
144TSGID   = 02000          # set GID on execution
145TSVTX   = 01000          # reserved
146
147TUREAD  = 0400           # read by owner
148TUWRITE = 0200           # write by owner
149TUEXEC  = 0100           # execute/search by owner
150TGREAD  = 0040           # read by group
151TGWRITE = 0020           # write by group
152TGEXEC  = 0010           # execute/search by group
153TOREAD  = 0004           # read by other
154TOWRITE = 0002           # write by other
155TOEXEC  = 0001           # execute/search by other
156
157#---------------------------------------------------------
158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
162    ENCODING = sys.getdefaultencoding()
163
164#---------------------------------------------------------
165# Some useful functions
166#---------------------------------------------------------
167
168def stn(s, length):
169    """Convert a python string to a null-terminated string buffer.
170    """
171    return s[:length] + (length - len(s)) * NUL
172
173def nts(s):
174    """Convert a null-terminated string field to a python string.
175    """
176    # Use the string up to the first null char.
177    p = s.find("\0")
178    if p == -1:
179        return s
180    return s[:p]
181
182def nti(s):
183    """Convert a number field to a python number.
184    """
185    # There are two possible encodings for a number field, see
186    # itn() below.
187    if s[0] != chr(0200):
188        try:
189            n = int(nts(s).strip() or "0", 8)
190        except ValueError:
191            raise InvalidHeaderError("invalid header")
192    else:
193        n = 0L
194        for i in xrange(len(s) - 1):
195            n <<= 8
196            n += ord(s[i + 1])
197    return n
198
199def itn(n, digits=8, format=DEFAULT_FORMAT):
200    """Convert a python number to a number field.
201    """
202    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203    # octal digits followed by a null-byte, this allows values up to
204    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205    # that if necessary. A leading 0200 byte indicates this particular
206    # encoding, the following digits-1 bytes are a big-endian
207    # representation. This allows values up to (256**(digits-1))-1.
208    if 0 <= n < 8 ** (digits - 1):
209        s = "%0*o" % (digits - 1, n) + NUL
210    else:
211        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
212            raise ValueError("overflow in number field")
213
214        if n < 0:
215            # XXX We mimic GNU tar's behaviour with negative numbers,
216            # this could raise OverflowError.
217            n = struct.unpack("L", struct.pack("l", n))[0]
218
219        s = ""
220        for i in xrange(digits - 1):
221            s = chr(n & 0377) + s
222            n >>= 8
223        s = chr(0200) + s
224    return s
225
226def uts(s, encoding, errors):
227    """Convert a unicode object to a string.
228    """
229    if errors == "utf-8":
230        # An extra error handler similar to the -o invalid=UTF-8 option
231        # in POSIX.1-2001. Replace untranslatable characters with their
232        # UTF-8 representation.
233        try:
234            return s.encode(encoding, "strict")
235        except UnicodeEncodeError:
236            x = []
237            for c in s:
238                try:
239                    x.append(c.encode(encoding, "strict"))
240                except UnicodeEncodeError:
241                    x.append(c.encode("utf8"))
242            return "".join(x)
243    else:
244        return s.encode(encoding, errors)
245
246def calc_chksums(buf):
247    """Calculate the checksum for a member's header by summing up all
248       characters except for the chksum field which is treated as if
249       it was filled with spaces. According to the GNU tar sources,
250       some tars (Sun and NeXT) calculate chksum with signed char,
251       which will be different if there are chars in the buffer with
252       the high bit set. So we calculate two checksums, unsigned and
253       signed.
254    """
255    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257    return unsigned_chksum, signed_chksum
258
259def copyfileobj(src, dst, length=None):
260    """Copy length bytes from fileobj src to fileobj dst.
261       If length is None, copy the entire content.
262    """
263    if length == 0:
264        return
265    if length is None:
266        shutil.copyfileobj(src, dst)
267        return
268
269    BUFSIZE = 16 * 1024
270    blocks, remainder = divmod(length, BUFSIZE)
271    for b in xrange(blocks):
272        buf = src.read(BUFSIZE)
273        if len(buf) < BUFSIZE:
274            raise IOError("end of file reached")
275        dst.write(buf)
276
277    if remainder != 0:
278        buf = src.read(remainder)
279        if len(buf) < remainder:
280            raise IOError("end of file reached")
281        dst.write(buf)
282    return
283
284filemode_table = (
285    ((S_IFLNK,      "l"),
286     (S_IFREG,      "-"),
287     (S_IFBLK,      "b"),
288     (S_IFDIR,      "d"),
289     (S_IFCHR,      "c"),
290     (S_IFIFO,      "p")),
291
292    ((TUREAD,       "r"),),
293    ((TUWRITE,      "w"),),
294    ((TUEXEC|TSUID, "s"),
295     (TSUID,        "S"),
296     (TUEXEC,       "x")),
297
298    ((TGREAD,       "r"),),
299    ((TGWRITE,      "w"),),
300    ((TGEXEC|TSGID, "s"),
301     (TSGID,        "S"),
302     (TGEXEC,       "x")),
303
304    ((TOREAD,       "r"),),
305    ((TOWRITE,      "w"),),
306    ((TOEXEC|TSVTX, "t"),
307     (TSVTX,        "T"),
308     (TOEXEC,       "x"))
309)
310
311def filemode(mode):
312    """Convert a file's mode to a string of the form
313       -rwxrwxrwx.
314       Used by TarFile.list()
315    """
316    perm = []
317    for table in filemode_table:
318        for bit, char in table:
319            if mode & bit == bit:
320                perm.append(char)
321                break
322        else:
323            perm.append("-")
324    return "".join(perm)
325
326class TarError(Exception):
327    """Base exception."""
328    pass
329class ExtractError(TarError):
330    """General exception for extract errors."""
331    pass
332class ReadError(TarError):
333    """Exception for unreadable tar archives."""
334    pass
335class CompressionError(TarError):
336    """Exception for unavailable compression methods."""
337    pass
338class StreamError(TarError):
339    """Exception for unsupported operations on stream-like TarFiles."""
340    pass
341class HeaderError(TarError):
342    """Base exception for header errors."""
343    pass
344class EmptyHeaderError(HeaderError):
345    """Exception for empty headers."""
346    pass
347class TruncatedHeaderError(HeaderError):
348    """Exception for truncated headers."""
349    pass
350class EOFHeaderError(HeaderError):
351    """Exception for end of file headers."""
352    pass
353class InvalidHeaderError(HeaderError):
354    """Exception for invalid headers."""
355    pass
356class SubsequentHeaderError(HeaderError):
357    """Exception for missing and invalid extended headers."""
358    pass
359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364    """Low-level file object. Supports reading and writing.
365       It is used instead of a regular file object for streaming
366       access.
367    """
368
369    def __init__(self, name, mode):
370        mode = {
371            "r": os.O_RDONLY,
372            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373        }[mode]
374        if hasattr(os, "O_BINARY"):
375            mode |= os.O_BINARY
376        self.fd = os.open(name, mode, 0666)
377
378    def close(self):
379        os.close(self.fd)
380
381    def read(self, size):
382        return os.read(self.fd, size)
383
384    def write(self, s):
385        os.write(self.fd, s)
386
387class _Stream:
388    """Class that serves as an adapter between TarFile and
389       a stream-like object.  The stream-like object only
390       needs to have a read() or write() method and is accessed
391       blockwise.  Use of gzip or bzip2 compression is possible.
392       A stream-like object could be for example: sys.stdin,
393       sys.stdout, a socket, a tape device etc.
394
395       _Stream is intended to be used only internally.
396    """
397
398    def __init__(self, name, mode, comptype, fileobj, bufsize):
399        """Construct a _Stream object.
400        """
401        self._extfileobj = True
402        if fileobj is None:
403            fileobj = _LowLevelFile(name, mode)
404            self._extfileobj = False
405
406        if comptype == '*':
407            # Enable transparent compression detection for the
408            # stream interface
409            fileobj = _StreamProxy(fileobj)
410            comptype = fileobj.getcomptype()
411
412        self.name     = name or ""
413        self.mode     = mode
414        self.comptype = comptype
415        self.fileobj  = fileobj
416        self.bufsize  = bufsize
417        self.buf      = ""
418        self.pos      = 0L
419        self.closed   = False
420
421        try:
422            if comptype == "gz":
423                try:
424                    import zlib
425                except ImportError:
426                    raise CompressionError("zlib module is not available")
427                self.zlib = zlib
428                self.crc = zlib.crc32("") & 0xffffffffL
429                if mode == "r":
430                    self._init_read_gz()
431                else:
432                    self._init_write_gz()
433
434            elif comptype == "bz2":
435                try:
436                    import bz2
437                except ImportError:
438                    raise CompressionError("bz2 module is not available")
439                if mode == "r":
440                    self.dbuf = ""
441                    self.cmp = bz2.BZ2Decompressor()
442                else:
443                    self.cmp = bz2.BZ2Compressor()
444        except:
445            if not self._extfileobj:
446                self.fileobj.close()
447            self.closed = True
448            raise
449
450    def __del__(self):
451        if hasattr(self, "closed") and not self.closed:
452            self.close()
453
454    def _init_write_gz(self):
455        """Initialize for writing with gzip compression.
456        """
457        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
458                                            -self.zlib.MAX_WBITS,
459                                            self.zlib.DEF_MEM_LEVEL,
460                                            0)
461        timestamp = struct.pack("<L", long(time.time()))
462        self.__write("\037\213\010\010%s\002\377" % timestamp)
463        if type(self.name) is unicode:
464            self.name = self.name.encode("iso-8859-1", "replace")
465        if self.name.endswith(".gz"):
466            self.name = self.name[:-3]
467        self.__write(self.name + NUL)
468
469    def write(self, s):
470        """Write string s to the stream.
471        """
472        if self.comptype == "gz":
473            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
474        self.pos += len(s)
475        if self.comptype != "tar":
476            s = self.cmp.compress(s)
477        self.__write(s)
478
479    def __write(self, s):
480        """Write string s to the stream if a whole new block
481           is ready to be written.
482        """
483        self.buf += s
484        while len(self.buf) > self.bufsize:
485            self.fileobj.write(self.buf[:self.bufsize])
486            self.buf = self.buf[self.bufsize:]
487
488    def close(self):
489        """Close the _Stream object. No operation should be
490           done on it afterwards.
491        """
492        if self.closed:
493            return
494
495        self.closed = True
496        try:
497            if self.mode == "w" and self.comptype != "tar":
498                self.buf += self.cmp.flush()
499
500            if self.mode == "w" and self.buf:
501                self.fileobj.write(self.buf)
502                self.buf = ""
503                if self.comptype == "gz":
504                    # The native zlib crc is an unsigned 32-bit integer, but
505                    # the Python wrapper implicitly casts that to a signed C
506                    # long.  So, on a 32-bit box self.crc may "look negative",
507                    # while the same crc on a 64-bit box may "look positive".
508                    # To avoid irksome warnings from the `struct` module, force
509                    # it to look positive on all boxes.
510                    self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
511                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
512        finally:
513            if not self._extfileobj:
514                self.fileobj.close()
515
516    def _init_read_gz(self):
517        """Initialize for reading a gzip compressed fileobj.
518        """
519        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
520        self.dbuf = ""
521
522        # taken from gzip.GzipFile with some alterations
523        if self.__read(2) != "\037\213":
524            raise ReadError("not a gzip file")
525        if self.__read(1) != "\010":
526            raise CompressionError("unsupported compression method")
527
528        flag = ord(self.__read(1))
529        self.__read(6)
530
531        if flag & 4:
532            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
533            self.read(xlen)
534        if flag & 8:
535            while True:
536                s = self.__read(1)
537                if not s or s == NUL:
538                    break
539        if flag & 16:
540            while True:
541                s = self.__read(1)
542                if not s or s == NUL:
543                    break
544        if flag & 2:
545            self.__read(2)
546
547    def tell(self):
548        """Return the stream's file pointer position.
549        """
550        return self.pos
551
552    def seek(self, pos=0):
553        """Set the stream's file pointer to pos. Negative seeking
554           is forbidden.
555        """
556        if pos - self.pos >= 0:
557            blocks, remainder = divmod(pos - self.pos, self.bufsize)
558            for i in xrange(blocks):
559                self.read(self.bufsize)
560            self.read(remainder)
561        else:
562            raise StreamError("seeking backwards is not allowed")
563        return self.pos
564
565    def read(self, size=None):
566        """Return the next size number of bytes from the stream.
567           If size is not defined, return all bytes of the stream
568           up to EOF.
569        """
570        if size is None:
571            t = []
572            while True:
573                buf = self._read(self.bufsize)
574                if not buf:
575                    break
576                t.append(buf)
577            buf = "".join(t)
578        else:
579            buf = self._read(size)
580        self.pos += len(buf)
581        return buf
582
583    def _read(self, size):
584        """Return size bytes from the stream.
585        """
586        if self.comptype == "tar":
587            return self.__read(size)
588
589        c = len(self.dbuf)
590        t = [self.dbuf]
591        while c < size:
592            buf = self.__read(self.bufsize)
593            if not buf:
594                break
595            try:
596                buf = self.cmp.decompress(buf)
597            except IOError:
598                raise ReadError("invalid compressed data")
599            t.append(buf)
600            c += len(buf)
601        t = "".join(t)
602        self.dbuf = t[size:]
603        return t[:size]
604
605    def __read(self, size):
606        """Return size bytes from stream. If internal buffer is empty,
607           read another block from the stream.
608        """
609        c = len(self.buf)
610        t = [self.buf]
611        while c < size:
612            buf = self.fileobj.read(self.bufsize)
613            if not buf:
614                break
615            t.append(buf)
616            c += len(buf)
617        t = "".join(t)
618        self.buf = t[size:]
619        return t[:size]
620# class _Stream
621
622class _StreamProxy(object):
623    """Small proxy class that enables transparent compression
624       detection for the Stream interface (mode 'r|*').
625    """
626
627    def __init__(self, fileobj):
628        self.fileobj = fileobj
629        self.buf = self.fileobj.read(BLOCKSIZE)
630
631    def read(self, size):
632        self.read = self.fileobj.read
633        return self.buf
634
635    def getcomptype(self):
636        if self.buf.startswith("\037\213\010"):
637            return "gz"
638        if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
639            return "bz2"
640        return "tar"
641
642    def close(self):
643        self.fileobj.close()
644# class StreamProxy
645
646class _BZ2Proxy(object):
647    """Small proxy class that enables external file object
648       support for "r:bz2" and "w:bz2" modes. This is actually
649       a workaround for a limitation in bz2 module's BZ2File
650       class which (unlike gzip.GzipFile) has no support for
651       a file object argument.
652    """
653
654    blocksize = 16 * 1024
655
656    def __init__(self, fileobj, mode):
657        self.fileobj = fileobj
658        self.mode = mode
659        self.name = getattr(self.fileobj, "name", None)
660        self.init()
661
662    def init(self):
663        import bz2
664        self.pos = 0
665        if self.mode == "r":
666            self.bz2obj = bz2.BZ2Decompressor()
667            self.fileobj.seek(0)
668            self.buf = ""
669        else:
670            self.bz2obj = bz2.BZ2Compressor()
671
672    def read(self, size):
673        b = [self.buf]
674        x = len(self.buf)
675        while x < size:
676            raw = self.fileobj.read(self.blocksize)
677            if not raw:
678                break
679            data = self.bz2obj.decompress(raw)
680            b.append(data)
681            x += len(data)
682        self.buf = "".join(b)
683
684        buf = self.buf[:size]
685        self.buf = self.buf[size:]
686        self.pos += len(buf)
687        return buf
688
689    def seek(self, pos):
690        if pos < self.pos:
691            self.init()
692        self.read(pos - self.pos)
693
694    def tell(self):
695        return self.pos
696
697    def write(self, data):
698        self.pos += len(data)
699        raw = self.bz2obj.compress(data)
700        self.fileobj.write(raw)
701
702    def close(self):
703        if self.mode == "w":
704            raw = self.bz2obj.flush()
705            self.fileobj.write(raw)
706# class _BZ2Proxy
707
708#------------------------
709# Extraction file object
710#------------------------
711class _FileInFile(object):
712    """A thin wrapper around an existing file object that
713       provides a part of its data as an individual file
714       object.
715    """
716
717    def __init__(self, fileobj, offset, size, sparse=None):
718        self.fileobj = fileobj
719        self.offset = offset
720        self.size = size
721        self.sparse = sparse
722        self.position = 0
723
724    def tell(self):
725        """Return the current file position.
726        """
727        return self.position
728
729    def seek(self, position):
730        """Seek to a position in the file.
731        """
732        self.position = position
733
734    def read(self, size=None):
735        """Read data from the file.
736        """
737        if size is None:
738            size = self.size - self.position
739        else:
740            size = min(size, self.size - self.position)
741
742        if self.sparse is None:
743            return self.readnormal(size)
744        else:
745            return self.readsparse(size)
746
747    def __read(self, size):
748        buf = self.fileobj.read(size)
749        if len(buf) != size:
750            raise ReadError("unexpected end of data")
751        return buf
752
753    def readnormal(self, size):
754        """Read operation for regular files.
755        """
756        self.fileobj.seek(self.offset + self.position)
757        self.position += size
758        return self.__read(size)
759
760    def readsparse(self, size):
761        """Read operation for sparse files.
762        """
763        data = []
764        while size > 0:
765            buf = self.readsparsesection(size)
766            if not buf:
767                break
768            size -= len(buf)
769            data.append(buf)
770        return "".join(data)
771
772    def readsparsesection(self, size):
773        """Read a single section of a sparse file.
774        """
775        section = self.sparse.find(self.position)
776
777        if section is None:
778            return ""
779
780        size = min(size, section.offset + section.size - self.position)
781
782        if isinstance(section, _data):
783            realpos = section.realpos + self.position - section.offset
784            self.fileobj.seek(self.offset + realpos)
785            self.position += size
786            return self.__read(size)
787        else:
788            self.position += size
789            return NUL * size
790#class _FileInFile
791
792
793class ExFileObject(object):
794    """File-like object for reading an archive member.
795       Is returned by TarFile.extractfile().
796    """
797    blocksize = 1024
798
799    def __init__(self, tarfile, tarinfo):
800        self.fileobj = _FileInFile(tarfile.fileobj,
801                                   tarinfo.offset_data,
802                                   tarinfo.size,
803                                   getattr(tarinfo, "sparse", None))
804        self.name = tarinfo.name
805        self.mode = "r"
806        self.closed = False
807        self.size = tarinfo.size
808
809        self.position = 0
810        self.buffer = ""
811
812    def read(self, size=None):
813        """Read at most size bytes from the file. If size is not
814           present or None, read all data until EOF is reached.
815        """
816        if self.closed:
817            raise ValueError("I/O operation on closed file")
818
819        buf = ""
820        if self.buffer:
821            if size is None:
822                buf = self.buffer
823                self.buffer = ""
824            else:
825                buf = self.buffer[:size]
826                self.buffer = self.buffer[size:]
827
828        if size is None:
829            buf += self.fileobj.read()
830        else:
831            buf += self.fileobj.read(size - len(buf))
832
833        self.position += len(buf)
834        return buf
835
836    def readline(self, size=-1):
837        """Read one entire line from the file. If size is present
838           and non-negative, return a string with at most that
839           size, which may be an incomplete line.
840        """
841        if self.closed:
842            raise ValueError("I/O operation on closed file")
843
844        if "\n" in self.buffer:
845            pos = self.buffer.find("\n") + 1
846        else:
847            buffers = [self.buffer]
848            while True:
849                buf = self.fileobj.read(self.blocksize)
850                buffers.append(buf)
851                if not buf or "\n" in buf:
852                    self.buffer = "".join(buffers)
853                    pos = self.buffer.find("\n") + 1
854                    if pos == 0:
855                        # no newline found.
856                        pos = len(self.buffer)
857                    break
858
859        if size != -1:
860            pos = min(size, pos)
861
862        buf = self.buffer[:pos]
863        self.buffer = self.buffer[pos:]
864        self.position += len(buf)
865        return buf
866
867    def readlines(self):
868        """Return a list with all remaining lines.
869        """
870        result = []
871        while True:
872            line = self.readline()
873            if not line: break
874            result.append(line)
875        return result
876
877    def tell(self):
878        """Return the current file position.
879        """
880        if self.closed:
881            raise ValueError("I/O operation on closed file")
882
883        return self.position
884
885    def seek(self, pos, whence=os.SEEK_SET):
886        """Seek to a position in the file.
887        """
888        if self.closed:
889            raise ValueError("I/O operation on closed file")
890
891        if whence == os.SEEK_SET:
892            self.position = min(max(pos, 0), self.size)
893        elif whence == os.SEEK_CUR:
894            if pos < 0:
895                self.position = max(self.position + pos, 0)
896            else:
897                self.position = min(self.position + pos, self.size)
898        elif whence == os.SEEK_END:
899            self.position = max(min(self.size + pos, self.size), 0)
900        else:
901            raise ValueError("Invalid argument")
902
903        self.buffer = ""
904        self.fileobj.seek(self.position)
905
906    def close(self):
907        """Close the file object.
908        """
909        self.closed = True
910
911    def __iter__(self):
912        """Get an iterator over the file's lines.
913        """
914        while True:
915            line = self.readline()
916            if not line:
917                break
918            yield line
919#class ExFileObject
920
921#------------------
922# Exported Classes
923#------------------
924class TarInfo(object):
925    """Informational class which holds the details about an
926       archive member given by a tar header block.
927       TarInfo objects are returned by TarFile.getmember(),
928       TarFile.getmembers() and TarFile.gettarinfo() and are
929       usually created internally.
930    """
931
932    def __init__(self, name=""):
933        """Construct a TarInfo object. name is the optional name
934           of the member.
935        """
936        self.name = name        # member name
937        self.mode = 0644        # file permissions
938        self.uid = 0            # user id
939        self.gid = 0            # group id
940        self.size = 0           # file size
941        self.mtime = 0          # modification time
942        self.chksum = 0         # header checksum
943        self.type = REGTYPE     # member type
944        self.linkname = ""      # link name
945        self.uname = ""         # user name
946        self.gname = ""         # group name
947        self.devmajor = 0       # device major number
948        self.devminor = 0       # device minor number
949
950        self.offset = 0         # the tar header starts here
951        self.offset_data = 0    # the file's data starts here
952
953        self.pax_headers = {}   # pax header information
954
955    # In pax headers the "name" and "linkname" field are called
956    # "path" and "linkpath".
957    def _getpath(self):
958        return self.name
959    def _setpath(self, name):
960        self.name = name
961    path = property(_getpath, _setpath)
962
963    def _getlinkpath(self):
964        return self.linkname
965    def _setlinkpath(self, linkname):
966        self.linkname = linkname
967    linkpath = property(_getlinkpath, _setlinkpath)
968
969    def __repr__(self):
970        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
971
972    def get_info(self, encoding, errors):
973        """Return the TarInfo's attributes as a dictionary.
974        """
975        info = {
976            "name":     self.name,
977            "mode":     self.mode & 07777,
978            "uid":      self.uid,
979            "gid":      self.gid,
980            "size":     self.size,
981            "mtime":    self.mtime,
982            "chksum":   self.chksum,
983            "type":     self.type,
984            "linkname": self.linkname,
985            "uname":    self.uname,
986            "gname":    self.gname,
987            "devmajor": self.devmajor,
988            "devminor": self.devminor
989        }
990
991        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
992            info["name"] += "/"
993
994        for key in ("name", "linkname", "uname", "gname"):
995            if type(info[key]) is unicode:
996                info[key] = info[key].encode(encoding, errors)
997
998        return info
999
1000    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
1001        """Return a tar header as a string of 512 byte blocks.
1002        """
1003        info = self.get_info(encoding, errors)
1004
1005        if format == USTAR_FORMAT:
1006            return self.create_ustar_header(info)
1007        elif format == GNU_FORMAT:
1008            return self.create_gnu_header(info)
1009        elif format == PAX_FORMAT:
1010            return self.create_pax_header(info, encoding, errors)
1011        else:
1012            raise ValueError("invalid format")
1013
1014    def create_ustar_header(self, info):
1015        """Return the object as a ustar header block.
1016        """
1017        info["magic"] = POSIX_MAGIC
1018
1019        if len(info["linkname"]) > LENGTH_LINK:
1020            raise ValueError("linkname is too long")
1021
1022        if len(info["name"]) > LENGTH_NAME:
1023            info["prefix"], info["name"] = self._posix_split_name(info["name"])
1024
1025        return self._create_header(info, USTAR_FORMAT)
1026
1027    def create_gnu_header(self, info):
1028        """Return the object as a GNU header block sequence.
1029        """
1030        info["magic"] = GNU_MAGIC
1031
1032        buf = ""
1033        if len(info["linkname"]) > LENGTH_LINK:
1034            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1035
1036        if len(info["name"]) > LENGTH_NAME:
1037            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1038
1039        return buf + self._create_header(info, GNU_FORMAT)
1040
1041    def create_pax_header(self, info, encoding, errors):
1042        """Return the object as a ustar header block. If it cannot be
1043           represented this way, prepend a pax extended header sequence
1044           with supplement information.
1045        """
1046        info["magic"] = POSIX_MAGIC
1047        pax_headers = self.pax_headers.copy()
1048
1049        # Test string fields for values that exceed the field length or cannot
1050        # be represented in ASCII encoding.
1051        for name, hname, length in (
1052                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1053                ("uname", "uname", 32), ("gname", "gname", 32)):
1054
1055            if hname in pax_headers:
1056                # The pax header has priority.
1057                continue
1058
1059            val = info[name].decode(encoding, errors)
1060
1061            # Try to encode the string as ASCII.
1062            try:
1063                val.encode("ascii")
1064            except UnicodeEncodeError:
1065                pax_headers[hname] = val
1066                continue
1067
1068            if len(info[name]) > length:
1069                pax_headers[hname] = val
1070
1071        # Test number fields for values that exceed the field limit or values
1072        # that like to be stored as float.
1073        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1074            if name in pax_headers:
1075                # The pax header has priority. Avoid overflow.
1076                info[name] = 0
1077                continue
1078
1079            val = info[name]
1080            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1081                pax_headers[name] = unicode(val)
1082                info[name] = 0
1083
1084        # Create a pax extended header if necessary.
1085        if pax_headers:
1086            buf = self._create_pax_generic_header(pax_headers)
1087        else:
1088            buf = ""
1089
1090        return buf + self._create_header(info, USTAR_FORMAT)
1091
1092    @classmethod
1093    def create_pax_global_header(cls, pax_headers):
1094        """Return the object as a pax global header block sequence.
1095        """
1096        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1097
1098    def _posix_split_name(self, name):
1099        """Split a name longer than 100 chars into a prefix
1100           and a name part.
1101        """
1102        prefix = name[:LENGTH_PREFIX + 1]
1103        while prefix and prefix[-1] != "/":
1104            prefix = prefix[:-1]
1105
1106        name = name[len(prefix):]
1107        prefix = prefix[:-1]
1108
1109        if not prefix or len(name) > LENGTH_NAME:
1110            raise ValueError("name is too long")
1111        return prefix, name
1112
1113    @staticmethod
1114    def _create_header(info, format):
1115        """Return a header block. info is a dictionary with file
1116           information, format must be one of the *_FORMAT constants.
1117        """
1118        parts = [
1119            stn(info.get("name", ""), 100),
1120            itn(info.get("mode", 0) & 07777, 8, format),
1121            itn(info.get("uid", 0), 8, format),
1122            itn(info.get("gid", 0), 8, format),
1123            itn(info.get("size", 0), 12, format),
1124            itn(info.get("mtime", 0), 12, format),
1125            "        ", # checksum field
1126            info.get("type", REGTYPE),
1127            stn(info.get("linkname", ""), 100),
1128            stn(info.get("magic", POSIX_MAGIC), 8),
1129            stn(info.get("uname", ""), 32),
1130            stn(info.get("gname", ""), 32),
1131            itn(info.get("devmajor", 0), 8, format),
1132            itn(info.get("devminor", 0), 8, format),
1133            stn(info.get("prefix", ""), 155)
1134        ]
1135
1136        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1137        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1138        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1139        return buf
1140
1141    @staticmethod
1142    def _create_payload(payload):
1143        """Return the string payload filled with zero bytes
1144           up to the next 512 byte border.
1145        """
1146        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1147        if remainder > 0:
1148            payload += (BLOCKSIZE - remainder) * NUL
1149        return payload
1150
1151    @classmethod
1152    def _create_gnu_long_header(cls, name, type):
1153        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1154           for name.
1155        """
1156        name += NUL
1157
1158        info = {}
1159        info["name"] = "././@LongLink"
1160        info["type"] = type
1161        info["size"] = len(name)
1162        info["magic"] = GNU_MAGIC
1163
1164        # create extended header + name blocks.
1165        return cls._create_header(info, USTAR_FORMAT) + \
1166                cls._create_payload(name)
1167
1168    @classmethod
1169    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1170        """Return a POSIX.1-2001 extended or global header sequence
1171           that contains a list of keyword, value pairs. The values
1172           must be unicode objects.
1173        """
1174        records = []
1175        for keyword, value in pax_headers.iteritems():
1176            keyword = keyword.encode("utf8")
1177            value = value.encode("utf8")
1178            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1179            n = p = 0
1180            while True:
1181                n = l + len(str(p))
1182                if n == p:
1183                    break
1184                p = n
1185            records.append("%d %s=%s\n" % (p, keyword, value))
1186        records = "".join(records)
1187
1188        # We use a hardcoded "././@PaxHeader" name like star does
1189        # instead of the one that POSIX recommends.
1190        info = {}
1191        info["name"] = "././@PaxHeader"
1192        info["type"] = type
1193        info["size"] = len(records)
1194        info["magic"] = POSIX_MAGIC
1195
1196        # Create pax header + record blocks.
1197        return cls._create_header(info, USTAR_FORMAT) + \
1198                cls._create_payload(records)
1199
1200    @classmethod
1201    def frombuf(cls, buf):
1202        """Construct a TarInfo object from a 512 byte string buffer.
1203        """
1204        if len(buf) == 0:
1205            raise EmptyHeaderError("empty header")
1206        if len(buf) != BLOCKSIZE:
1207            raise TruncatedHeaderError("truncated header")
1208        if buf.count(NUL) == BLOCKSIZE:
1209            raise EOFHeaderError("end of file header")
1210
1211        chksum = nti(buf[148:156])
1212        if chksum not in calc_chksums(buf):
1213            raise InvalidHeaderError("bad checksum")
1214
1215        obj = cls()
1216        obj.buf = buf
1217        obj.name = nts(buf[0:100])
1218        obj.mode = nti(buf[100:108])
1219        obj.uid = nti(buf[108:116])
1220        obj.gid = nti(buf[116:124])
1221        obj.size = nti(buf[124:136])
1222        obj.mtime = nti(buf[136:148])
1223        obj.chksum = chksum
1224        obj.type = buf[156:157]
1225        obj.linkname = nts(buf[157:257])
1226        obj.uname = nts(buf[265:297])
1227        obj.gname = nts(buf[297:329])
1228        obj.devmajor = nti(buf[329:337])
1229        obj.devminor = nti(buf[337:345])
1230        prefix = nts(buf[345:500])
1231
1232        # Old V7 tar format represents a directory as a regular
1233        # file with a trailing slash.
1234        if obj.type == AREGTYPE and obj.name.endswith("/"):
1235            obj.type = DIRTYPE
1236
1237        # Remove redundant slashes from directories.
1238        if obj.isdir():
1239            obj.name = obj.name.rstrip("/")
1240
1241        # Reconstruct a ustar longname.
1242        if prefix and obj.type not in GNU_TYPES:
1243            obj.name = prefix + "/" + obj.name
1244        return obj
1245
1246    @classmethod
1247    def fromtarfile(cls, tarfile):
1248        """Return the next TarInfo object from TarFile object
1249           tarfile.
1250        """
1251        buf = tarfile.fileobj.read(BLOCKSIZE)
1252        obj = cls.frombuf(buf)
1253        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1254        return obj._proc_member(tarfile)
1255
1256    #--------------------------------------------------------------------------
1257    # The following are methods that are called depending on the type of a
1258    # member. The entry point is _proc_member() which can be overridden in a
1259    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1260    # implement the following
1261    # operations:
1262    # 1. Set self.offset_data to the position where the data blocks begin,
1263    #    if there is data that follows.
1264    # 2. Set tarfile.offset to the position where the next member's header will
1265    #    begin.
1266    # 3. Return self or another valid TarInfo object.
1267    def _proc_member(self, tarfile):
1268        """Choose the right processing method depending on
1269           the type and call it.
1270        """
1271        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1272            return self._proc_gnulong(tarfile)
1273        elif self.type == GNUTYPE_SPARSE:
1274            return self._proc_sparse(tarfile)
1275        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1276            return self._proc_pax(tarfile)
1277        else:
1278            return self._proc_builtin(tarfile)
1279
1280    def _proc_builtin(self, tarfile):
1281        """Process a builtin type or an unknown type which
1282           will be treated as a regular file.
1283        """
1284        self.offset_data = tarfile.fileobj.tell()
1285        offset = self.offset_data
1286        if self.isreg() or self.type not in SUPPORTED_TYPES:
1287            # Skip the following data blocks.
1288            offset += self._block(self.size)
1289        tarfile.offset = offset
1290
1291        # Patch the TarInfo object with saved global
1292        # header information.
1293        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1294
1295        return self
1296
1297    def _proc_gnulong(self, tarfile):
1298        """Process the blocks that hold a GNU longname
1299           or longlink member.
1300        """
1301        buf = tarfile.fileobj.read(self._block(self.size))
1302
1303        # Fetch the next header and process it.
1304        try:
1305            next = self.fromtarfile(tarfile)
1306        except HeaderError:
1307            raise SubsequentHeaderError("missing or bad subsequent header")
1308
1309        # Patch the TarInfo object from the next header with
1310        # the longname information.
1311        next.offset = self.offset
1312        if self.type == GNUTYPE_LONGNAME:
1313            next.name = nts(buf)
1314        elif self.type == GNUTYPE_LONGLINK:
1315            next.linkname = nts(buf)
1316
1317        return next
1318
1319    def _proc_sparse(self, tarfile):
1320        """Process a GNU sparse header plus extra headers.
1321        """
1322        buf = self.buf
1323        sp = _ringbuffer()
1324        pos = 386
1325        lastpos = 0L
1326        realpos = 0L
1327        # There are 4 possible sparse structs in the
1328        # first header.
1329        for i in xrange(4):
1330            try:
1331                offset = nti(buf[pos:pos + 12])
1332                numbytes = nti(buf[pos + 12:pos + 24])
1333            except ValueError:
1334                break
1335            if offset > lastpos:
1336                sp.append(_hole(lastpos, offset - lastpos))
1337            sp.append(_data(offset, numbytes, realpos))
1338            realpos += numbytes
1339            lastpos = offset + numbytes
1340            pos += 24
1341
1342        isextended = ord(buf[482])
1343        origsize = nti(buf[483:495])
1344
1345        # If the isextended flag is given,
1346        # there are extra headers to process.
1347        while isextended == 1:
1348            buf = tarfile.fileobj.read(BLOCKSIZE)
1349            pos = 0
1350            for i in xrange(21):
1351                try:
1352                    offset = nti(buf[pos:pos + 12])
1353                    numbytes = nti(buf[pos + 12:pos + 24])
1354                except ValueError:
1355                    break
1356                if offset > lastpos:
1357                    sp.append(_hole(lastpos, offset - lastpos))
1358                sp.append(_data(offset, numbytes, realpos))
1359                realpos += numbytes
1360                lastpos = offset + numbytes
1361                pos += 24
1362            isextended = ord(buf[504])
1363
1364        if lastpos < origsize:
1365            sp.append(_hole(lastpos, origsize - lastpos))
1366
1367        self.sparse = sp
1368
1369        self.offset_data = tarfile.fileobj.tell()
1370        tarfile.offset = self.offset_data + self._block(self.size)
1371        self.size = origsize
1372
1373        return self
1374
1375    def _proc_pax(self, tarfile):
1376        """Process an extended or global header as described in
1377           POSIX.1-2001.
1378        """
1379        # Read the header information.
1380        buf = tarfile.fileobj.read(self._block(self.size))
1381
1382        # A pax header stores supplemental information for either
1383        # the following file (extended) or all following files
1384        # (global).
1385        if self.type == XGLTYPE:
1386            pax_headers = tarfile.pax_headers
1387        else:
1388            pax_headers = tarfile.pax_headers.copy()
1389
1390        # Parse pax header information. A record looks like that:
1391        # "%d %s=%s\n" % (length, keyword, value). length is the size
1392        # of the complete record including the length field itself and
1393        # the newline. keyword and value are both UTF-8 encoded strings.
1394        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1395        pos = 0
1396        while True:
1397            match = regex.match(buf, pos)
1398            if not match:
1399                break
1400
1401            length, keyword = match.groups()
1402            length = int(length)
1403            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1404
1405            keyword = keyword.decode("utf8")
1406            value = value.decode("utf8")
1407
1408            pax_headers[keyword] = value
1409            pos += length
1410
1411        # Fetch the next header.
1412        try:
1413            next = self.fromtarfile(tarfile)
1414        except HeaderError:
1415            raise SubsequentHeaderError("missing or bad subsequent header")
1416
1417        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1418            # Patch the TarInfo object with the extended header info.
1419            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1420            next.offset = self.offset
1421
1422            if "size" in pax_headers:
1423                # If the extended header replaces the size field,
1424                # we need to recalculate the offset where the next
1425                # header starts.
1426                offset = next.offset_data
1427                if next.isreg() or next.type not in SUPPORTED_TYPES:
1428                    offset += next._block(next.size)
1429                tarfile.offset = offset
1430
1431        return next
1432
1433    def _apply_pax_info(self, pax_headers, encoding, errors):
1434        """Replace fields with supplemental information from a previous
1435           pax extended or global header.
1436        """
1437        for keyword, value in pax_headers.iteritems():
1438            if keyword not in PAX_FIELDS:
1439                continue
1440
1441            if keyword == "path":
1442                value = value.rstrip("/")
1443
1444            if keyword in PAX_NUMBER_FIELDS:
1445                try:
1446                    value = PAX_NUMBER_FIELDS[keyword](value)
1447                except ValueError:
1448                    value = 0
1449            else:
1450                value = uts(value, encoding, errors)
1451
1452            setattr(self, keyword, value)
1453
1454        self.pax_headers = pax_headers.copy()
1455
1456    def _block(self, count):
1457        """Round up a byte count by BLOCKSIZE and return it,
1458           e.g. _block(834) => 1024.
1459        """
1460        blocks, remainder = divmod(count, BLOCKSIZE)
1461        if remainder:
1462            blocks += 1
1463        return blocks * BLOCKSIZE
1464
1465    def isreg(self):
1466        return self.type in REGULAR_TYPES
1467    def isfile(self):
1468        return self.isreg()
1469    def isdir(self):
1470        return self.type == DIRTYPE
1471    def issym(self):
1472        return self.type == SYMTYPE
1473    def islnk(self):
1474        return self.type == LNKTYPE
1475    def ischr(self):
1476        return self.type == CHRTYPE
1477    def isblk(self):
1478        return self.type == BLKTYPE
1479    def isfifo(self):
1480        return self.type == FIFOTYPE
1481    def issparse(self):
1482        return self.type == GNUTYPE_SPARSE
1483    def isdev(self):
1484        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1485# class TarInfo
1486
1487class TarFile(object):
1488    """The TarFile Class provides an interface to tar archives.
1489    """
1490
1491    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1492
1493    dereference = False         # If true, add content of linked file to the
1494                                # tar file, else the link.
1495
1496    ignore_zeros = False        # If true, skips empty or invalid blocks and
1497                                # continues processing.
1498
1499    errorlevel = 1              # If 0, fatal errors only appear in debug
1500                                # messages (if debug >= 0). If > 0, errors
1501                                # are passed to the caller as exceptions.
1502
1503    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1504
1505    encoding = ENCODING         # Encoding for 8-bit character strings.
1506
1507    errors = None               # Error handler for unicode conversion.
1508
1509    tarinfo = TarInfo           # The default TarInfo class to use.
1510
1511    fileobject = ExFileObject   # The default ExFileObject class to use.
1512
1513    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1514            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1515            errors=None, pax_headers=None, debug=None, errorlevel=None):
1516        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1517           read from an existing archive, 'a' to append data to an existing
1518           file or 'w' to create a new file overwriting an existing one. `mode'
1519           defaults to 'r'.
1520           If `fileobj' is given, it is used for reading or writing data. If it
1521           can be determined, `mode' is overridden by `fileobj's mode.
1522           `fileobj' is not closed, when TarFile is closed.
1523        """
1524        modes = {"r": "rb", "a": "r+b", "w": "wb"}
1525        if mode not in modes:
1526            raise ValueError("mode must be 'r', 'a' or 'w'")
1527        self.mode = mode
1528        self._mode = modes[mode]
1529
1530        if not fileobj:
1531            if self.mode == "a" and not os.path.exists(name):
1532                # Create nonexistent files in append mode.
1533                self.mode = "w"
1534                self._mode = "wb"
1535            fileobj = bltn_open(name, self._mode)
1536            self._extfileobj = False
1537        else:
1538            if name is None and hasattr(fileobj, "name"):
1539                name = fileobj.name
1540            if hasattr(fileobj, "mode"):
1541                self._mode = fileobj.mode
1542            self._extfileobj = True
1543        self.name = os.path.abspath(name) if name else None
1544        self.fileobj = fileobj
1545
1546        # Init attributes.
1547        if format is not None:
1548            self.format = format
1549        if tarinfo is not None:
1550            self.tarinfo = tarinfo
1551        if dereference is not None:
1552            self.dereference = dereference
1553        if ignore_zeros is not None:
1554            self.ignore_zeros = ignore_zeros
1555        if encoding is not None:
1556            self.encoding = encoding
1557
1558        if errors is not None:
1559            self.errors = errors
1560        elif mode == "r":
1561            self.errors = "utf-8"
1562        else:
1563            self.errors = "strict"
1564
1565        if pax_headers is not None and self.format == PAX_FORMAT:
1566            self.pax_headers = pax_headers
1567        else:
1568            self.pax_headers = {}
1569
1570        if debug is not None:
1571            self.debug = debug
1572        if errorlevel is not None:
1573            self.errorlevel = errorlevel
1574
1575        # Init datastructures.
1576        self.closed = False
1577        self.members = []       # list of members as TarInfo objects
1578        self._loaded = False    # flag if all members have been read
1579        self.offset = self.fileobj.tell()
1580                                # current position in the archive file
1581        self.inodes = {}        # dictionary caching the inodes of
1582                                # archive members already added
1583
1584        try:
1585            if self.mode == "r":
1586                self.firstmember = None
1587                self.firstmember = self.next()
1588
1589            if self.mode == "a":
1590                # Move to the end of the archive,
1591                # before the first empty block.
1592                while True:
1593                    self.fileobj.seek(self.offset)
1594                    try:
1595                        tarinfo = self.tarinfo.fromtarfile(self)
1596                        self.members.append(tarinfo)
1597                    except EOFHeaderError:
1598                        self.fileobj.seek(self.offset)
1599                        break
1600                    except HeaderError, e:
1601                        raise ReadError(str(e))
1602
1603            if self.mode in "aw":
1604                self._loaded = True
1605
1606                if self.pax_headers:
1607                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1608                    self.fileobj.write(buf)
1609                    self.offset += len(buf)
1610        except:
1611            if not self._extfileobj:
1612                self.fileobj.close()
1613            self.closed = True
1614            raise
1615
1616    def _getposix(self):
1617        return self.format == USTAR_FORMAT
1618    def _setposix(self, value):
1619        import warnings
1620        warnings.warn("use the format attribute instead", DeprecationWarning,
1621                      2)
1622        if value:
1623            self.format = USTAR_FORMAT
1624        else:
1625            self.format = GNU_FORMAT
1626    posix = property(_getposix, _setposix)
1627
1628    #--------------------------------------------------------------------------
1629    # Below are the classmethods which act as alternate constructors to the
1630    # TarFile class. The open() method is the only one that is needed for
1631    # public use; it is the "super"-constructor and is able to select an
1632    # adequate "sub"-constructor for a particular compression using the mapping
1633    # from OPEN_METH.
1634    #
1635    # This concept allows one to subclass TarFile without losing the comfort of
1636    # the super-constructor. A sub-constructor is registered and made available
1637    # by adding it to the mapping in OPEN_METH.
1638
1639    @classmethod
1640    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1641        """Open a tar archive for reading, writing or appending. Return
1642           an appropriate TarFile class.
1643
1644           mode:
1645           'r' or 'r:*' open for reading with transparent compression
1646           'r:'         open for reading exclusively uncompressed
1647           'r:gz'       open for reading with gzip compression
1648           'r:bz2'      open for reading with bzip2 compression
1649           'a' or 'a:'  open for appending, creating the file if necessary
1650           'w' or 'w:'  open for writing without compression
1651           'w:gz'       open for writing with gzip compression
1652           'w:bz2'      open for writing with bzip2 compression
1653
1654           'r|*'        open a stream of tar blocks with transparent compression
1655           'r|'         open an uncompressed stream of tar blocks for reading
1656           'r|gz'       open a gzip compressed stream of tar blocks
1657           'r|bz2'      open a bzip2 compressed stream of tar blocks
1658           'w|'         open an uncompressed stream for writing
1659           'w|gz'       open a gzip compressed stream for writing
1660           'w|bz2'      open a bzip2 compressed stream for writing
1661        """
1662
1663        if not name and not fileobj:
1664            raise ValueError("nothing to open")
1665
1666        if mode in ("r", "r:*"):
1667            # Find out which *open() is appropriate for opening the file.
1668            for comptype in cls.OPEN_METH:
1669                func = getattr(cls, cls.OPEN_METH[comptype])
1670                if fileobj is not None:
1671                    saved_pos = fileobj.tell()
1672                try:
1673                    return func(name, "r", fileobj, **kwargs)
1674                except (ReadError, CompressionError), e:
1675                    if fileobj is not None:
1676                        fileobj.seek(saved_pos)
1677                    continue
1678            raise ReadError("file could not be opened successfully")
1679
1680        elif ":" in mode:
1681            filemode, comptype = mode.split(":", 1)
1682            filemode = filemode or "r"
1683            comptype = comptype or "tar"
1684
1685            # Select the *open() function according to
1686            # given compression.
1687            if comptype in cls.OPEN_METH:
1688                func = getattr(cls, cls.OPEN_METH[comptype])
1689            else:
1690                raise CompressionError("unknown compression type %r" % comptype)
1691            return func(name, filemode, fileobj, **kwargs)
1692
1693        elif "|" in mode:
1694            filemode, comptype = mode.split("|", 1)
1695            filemode = filemode or "r"
1696            comptype = comptype or "tar"
1697
1698            if filemode not in ("r", "w"):
1699                raise ValueError("mode must be 'r' or 'w'")
1700
1701            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1702            try:
1703                t = cls(name, filemode, stream, **kwargs)
1704            except:
1705                stream.close()
1706                raise
1707            t._extfileobj = False
1708            return t
1709
1710        elif mode in ("a", "w"):
1711            return cls.taropen(name, mode, fileobj, **kwargs)
1712
1713        raise ValueError("undiscernible mode")
1714
1715    @classmethod
1716    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1717        """Open uncompressed tar archive name for reading or writing.
1718        """
1719        if mode not in ("r", "a", "w"):
1720            raise ValueError("mode must be 'r', 'a' or 'w'")
1721        return cls(name, mode, fileobj, **kwargs)
1722
1723    @classmethod
1724    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1725        """Open gzip compressed tar archive name for reading or writing.
1726           Appending is not allowed.
1727        """
1728        if mode not in ("r", "w"):
1729            raise ValueError("mode must be 'r' or 'w'")
1730
1731        try:
1732            import gzip
1733            gzip.GzipFile
1734        except (ImportError, AttributeError):
1735            raise CompressionError("gzip module is not available")
1736
1737        try:
1738            fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
1739        except OSError:
1740            if fileobj is not None and mode == 'r':
1741                raise ReadError("not a gzip file")
1742            raise
1743
1744        try:
1745            t = cls.taropen(name, mode, fileobj, **kwargs)
1746        except IOError:
1747            fileobj.close()
1748            if mode == 'r':
1749                raise ReadError("not a gzip file")
1750            raise
1751        except:
1752            fileobj.close()
1753            raise
1754        t._extfileobj = False
1755        return t
1756
1757    @classmethod
1758    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1759        """Open bzip2 compressed tar archive name for reading or writing.
1760           Appending is not allowed.
1761        """
1762        if mode not in ("r", "w"):
1763            raise ValueError("mode must be 'r' or 'w'.")
1764
1765        try:
1766            import bz2
1767        except ImportError:
1768            raise CompressionError("bz2 module is not available")
1769
1770        if fileobj is not None:
1771            fileobj = _BZ2Proxy(fileobj, mode)
1772        else:
1773            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1774
1775        try:
1776            t = cls.taropen(name, mode, fileobj, **kwargs)
1777        except (IOError, EOFError):
1778            fileobj.close()
1779            if mode == 'r':
1780                raise ReadError("not a bzip2 file")
1781            raise
1782        except:
1783            fileobj.close()
1784            raise
1785        t._extfileobj = False
1786        return t
1787
1788    # All *open() methods are registered here.
1789    OPEN_METH = {
1790        "tar": "taropen",   # uncompressed tar
1791        "gz":  "gzopen",    # gzip compressed tar
1792        "bz2": "bz2open"    # bzip2 compressed tar
1793    }
1794
1795    #--------------------------------------------------------------------------
1796    # The public methods which TarFile provides:
1797
1798    def close(self):
1799        """Close the TarFile. In write-mode, two finishing zero blocks are
1800           appended to the archive.
1801        """
1802        if self.closed:
1803            return
1804
1805        self.closed = True
1806        try:
1807            if self.mode in "aw":
1808                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1809                self.offset += (BLOCKSIZE * 2)
1810                # fill up the end with zero-blocks
1811                # (like option -b20 for tar does)
1812                blocks, remainder = divmod(self.offset, RECORDSIZE)
1813                if remainder > 0:
1814                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1815        finally:
1816            if not self._extfileobj:
1817                self.fileobj.close()
1818
1819    def getmember(self, name):
1820        """Return a TarInfo object for member `name'. If `name' can not be
1821           found in the archive, KeyError is raised. If a member occurs more
1822           than once in the archive, its last occurrence is assumed to be the
1823           most up-to-date version.
1824        """
1825        tarinfo = self._getmember(name)
1826        if tarinfo is None:
1827            raise KeyError("filename %r not found" % name)
1828        return tarinfo
1829
1830    def getmembers(self):
1831        """Return the members of the archive as a list of TarInfo objects. The
1832           list has the same order as the members in the archive.
1833        """
1834        self._check()
1835        if not self._loaded:    # if we want to obtain a list of
1836            self._load()        # all members, we first have to
1837                                # scan the whole archive.
1838        return self.members
1839
1840    def getnames(self):
1841        """Return the members of the archive as a list of their names. It has
1842           the same order as the list returned by getmembers().
1843        """
1844        return [tarinfo.name for tarinfo in self.getmembers()]
1845
1846    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1847        """Create a TarInfo object for either the file `name' or the file
1848           object `fileobj' (using os.fstat on its file descriptor). You can
1849           modify some of the TarInfo's attributes before you add it using
1850           addfile(). If given, `arcname' specifies an alternative name for the
1851           file in the archive.
1852        """
1853        self._check("aw")
1854
1855        # When fileobj is given, replace name by
1856        # fileobj's real name.
1857        if fileobj is not None:
1858            name = fileobj.name
1859
1860        # Building the name of the member in the archive.
1861        # Backward slashes are converted to forward slashes,
1862        # Absolute paths are turned to relative paths.
1863        if arcname is None:
1864            arcname = name
1865        drv, arcname = os.path.splitdrive(arcname)
1866        arcname = arcname.replace(os.sep, "/")
1867        arcname = arcname.lstrip("/")
1868
1869        # Now, fill the TarInfo object with
1870        # information specific for the file.
1871        tarinfo = self.tarinfo()
1872        tarinfo.tarfile = self
1873
1874        # Use os.stat or os.lstat, depending on platform
1875        # and if symlinks shall be resolved.
1876        if fileobj is None:
1877            if hasattr(os, "lstat") and not self.dereference:
1878                statres = os.lstat(name)
1879            else:
1880                statres = os.stat(name)
1881        else:
1882            statres = os.fstat(fileobj.fileno())
1883        linkname = ""
1884
1885        stmd = statres.st_mode
1886        if stat.S_ISREG(stmd):
1887            inode = (statres.st_ino, statres.st_dev)
1888            if not self.dereference and statres.st_nlink > 1 and \
1889                    inode in self.inodes and arcname != self.inodes[inode]:
1890                # Is it a hardlink to an already
1891                # archived file?
1892                type = LNKTYPE
1893                linkname = self.inodes[inode]
1894            else:
1895                # The inode is added only if its valid.
1896                # For win32 it is always 0.
1897                type = REGTYPE
1898                if inode[0]:
1899                    self.inodes[inode] = arcname
1900        elif stat.S_ISDIR(stmd):
1901            type = DIRTYPE
1902        elif stat.S_ISFIFO(stmd):
1903            type = FIFOTYPE
1904        elif stat.S_ISLNK(stmd):
1905            type = SYMTYPE
1906            linkname = os.readlink(name)
1907        elif stat.S_ISCHR(stmd):
1908            type = CHRTYPE
1909        elif stat.S_ISBLK(stmd):
1910            type = BLKTYPE
1911        else:
1912            return None
1913
1914        # Fill the TarInfo object with all
1915        # information we can get.
1916        tarinfo.name = arcname
1917        tarinfo.mode = stmd
1918        tarinfo.uid = statres.st_uid
1919        tarinfo.gid = statres.st_gid
1920        if type == REGTYPE:
1921            tarinfo.size = statres.st_size
1922        else:
1923            tarinfo.size = 0L
1924        tarinfo.mtime = statres.st_mtime
1925        tarinfo.type = type
1926        tarinfo.linkname = linkname
1927        if pwd:
1928            try:
1929                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1930            except KeyError:
1931                pass
1932        if grp:
1933            try:
1934                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1935            except KeyError:
1936                pass
1937
1938        if type in (CHRTYPE, BLKTYPE):
1939            if hasattr(os, "major") and hasattr(os, "minor"):
1940                tarinfo.devmajor = os.major(statres.st_rdev)
1941                tarinfo.devminor = os.minor(statres.st_rdev)
1942        return tarinfo
1943
1944    def list(self, verbose=True):
1945        """Print a table of contents to sys.stdout. If `verbose' is False, only
1946           the names of the members are printed. If it is True, an `ls -l'-like
1947           output is produced.
1948        """
1949        self._check()
1950
1951        for tarinfo in self:
1952            if verbose:
1953                print filemode(tarinfo.mode),
1954                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1955                                 tarinfo.gname or tarinfo.gid),
1956                if tarinfo.ischr() or tarinfo.isblk():
1957                    print "%10s" % ("%d,%d" \
1958                                    % (tarinfo.devmajor, tarinfo.devminor)),
1959                else:
1960                    print "%10d" % tarinfo.size,
1961                print "%d-%02d-%02d %02d:%02d:%02d" \
1962                      % time.localtime(tarinfo.mtime)[:6],
1963
1964            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1965
1966            if verbose:
1967                if tarinfo.issym():
1968                    print "->", tarinfo.linkname,
1969                if tarinfo.islnk():
1970                    print "link to", tarinfo.linkname,
1971            print
1972
1973    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1974        """Add the file `name' to the archive. `name' may be any type of file
1975           (directory, fifo, symbolic link, etc.). If given, `arcname'
1976           specifies an alternative name for the file in the archive.
1977           Directories are added recursively by default. This can be avoided by
1978           setting `recursive' to False. `exclude' is a function that should
1979           return True for each filename to be excluded. `filter' is a function
1980           that expects a TarInfo object argument and returns the changed
1981           TarInfo object, if it returns None the TarInfo object will be
1982           excluded from the archive.
1983        """
1984        self._check("aw")
1985
1986        if arcname is None:
1987            arcname = name
1988
1989        # Exclude pathnames.
1990        if exclude is not None:
1991            import warnings
1992            warnings.warn("use the filter argument instead",
1993                    DeprecationWarning, 2)
1994            if exclude(name):
1995                self._dbg(2, "tarfile: Excluded %r" % name)
1996                return
1997
1998        # Skip if somebody tries to archive the archive...
1999        if self.name is not None and os.path.abspath(name) == self.name:
2000            self._dbg(2, "tarfile: Skipped %r" % name)
2001            return
2002
2003        self._dbg(1, name)
2004
2005        # Create a TarInfo object from the file.
2006        tarinfo = self.gettarinfo(name, arcname)
2007
2008        if tarinfo is None:
2009            self._dbg(1, "tarfile: Unsupported type %r" % name)
2010            return
2011
2012        # Change or exclude the TarInfo object.
2013        if filter is not None:
2014            tarinfo = filter(tarinfo)
2015            if tarinfo is None:
2016                self._dbg(2, "tarfile: Excluded %r" % name)
2017                return
2018
2019        # Append the tar header and data to the archive.
2020        if tarinfo.isreg():
2021            with bltn_open(name, "rb") as f:
2022                self.addfile(tarinfo, f)
2023
2024        elif tarinfo.isdir():
2025            self.addfile(tarinfo)
2026            if recursive:
2027                for f in os.listdir(name):
2028                    self.add(os.path.join(name, f), os.path.join(arcname, f),
2029                            recursive, exclude, filter)
2030
2031        else:
2032            self.addfile(tarinfo)
2033
2034    def addfile(self, tarinfo, fileobj=None):
2035        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2036           given, tarinfo.size bytes are read from it and added to the archive.
2037           You can create TarInfo objects using gettarinfo().
2038           On Windows platforms, `fileobj' should always be opened with mode
2039           'rb' to avoid irritation about the file size.
2040        """
2041        self._check("aw")
2042
2043        tarinfo = copy.copy(tarinfo)
2044
2045        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2046        self.fileobj.write(buf)
2047        self.offset += len(buf)
2048
2049        # If there's data to follow, append it.
2050        if fileobj is not None:
2051            copyfileobj(fileobj, self.fileobj, tarinfo.size)
2052            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2053            if remainder > 0:
2054                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2055                blocks += 1
2056            self.offset += blocks * BLOCKSIZE
2057
2058        self.members.append(tarinfo)
2059
2060    def extractall(self, path=".", members=None):
2061        """Extract all members from the archive to the current working
2062           directory and set owner, modification time and permissions on
2063           directories afterwards. `path' specifies a different directory
2064           to extract to. `members' is optional and must be a subset of the
2065           list returned by getmembers().
2066        """
2067        directories = []
2068
2069        if members is None:
2070            members = self
2071
2072        for tarinfo in members:
2073            if tarinfo.isdir():
2074                # Extract directories with a safe mode.
2075                directories.append(tarinfo)
2076                tarinfo = copy.copy(tarinfo)
2077                tarinfo.mode = 0700
2078            self.extract(tarinfo, path)
2079
2080        # Reverse sort directories.
2081        directories.sort(key=operator.attrgetter('name'))
2082        directories.reverse()
2083
2084        # Set correct owner, mtime and filemode on directories.
2085        for tarinfo in directories:
2086            dirpath = os.path.join(path, tarinfo.name)
2087            try:
2088                self.chown(tarinfo, dirpath)
2089                self.utime(tarinfo, dirpath)
2090                self.chmod(tarinfo, dirpath)
2091            except ExtractError, e:
2092                if self.errorlevel > 1:
2093                    raise
2094                else:
2095                    self._dbg(1, "tarfile: %s" % e)
2096
2097    def extract(self, member, path=""):
2098        """Extract a member from the archive to the current working directory,
2099           using its full name. Its file information is extracted as accurately
2100           as possible. `member' may be a filename or a TarInfo object. You can
2101           specify a different directory using `path'.
2102        """
2103        self._check("r")
2104
2105        if isinstance(member, basestring):
2106            tarinfo = self.getmember(member)
2107        else:
2108            tarinfo = member
2109
2110        # Prepare the link target for makelink().
2111        if tarinfo.islnk():
2112            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2113
2114        try:
2115            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2116        except EnvironmentError, e:
2117            if self.errorlevel > 0:
2118                raise
2119            else:
2120                if e.filename is None:
2121                    self._dbg(1, "tarfile: %s" % e.strerror)
2122                else:
2123                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2124        except ExtractError, e:
2125            if self.errorlevel > 1:
2126                raise
2127            else:
2128                self._dbg(1, "tarfile: %s" % e)
2129
2130    def extractfile(self, member):
2131        """Extract a member from the archive as a file object. `member' may be
2132           a filename or a TarInfo object. If `member' is a regular file, a
2133           file-like object is returned. If `member' is a link, a file-like
2134           object is constructed from the link's target. If `member' is none of
2135           the above, None is returned.
2136           The file-like object is read-only and provides the following
2137           methods: read(), readline(), readlines(), seek() and tell()
2138        """
2139        self._check("r")
2140
2141        if isinstance(member, basestring):
2142            tarinfo = self.getmember(member)
2143        else:
2144            tarinfo = member
2145
2146        if tarinfo.isreg():
2147            return self.fileobject(self, tarinfo)
2148
2149        elif tarinfo.type not in SUPPORTED_TYPES:
2150            # If a member's type is unknown, it is treated as a
2151            # regular file.
2152            return self.fileobject(self, tarinfo)
2153
2154        elif tarinfo.islnk() or tarinfo.issym():
2155            if isinstance(self.fileobj, _Stream):
2156                # A small but ugly workaround for the case that someone tries
2157                # to extract a (sym)link as a file-object from a non-seekable
2158                # stream of tar blocks.
2159                raise StreamError("cannot extract (sym)link as file object")
2160            else:
2161                # A (sym)link's file object is its target's file object.
2162                return self.extractfile(self._find_link_target(tarinfo))
2163        else:
2164            # If there's no data associated with the member (directory, chrdev,
2165            # blkdev, etc.), return None instead of a file object.
2166            return None
2167
2168    def _extract_member(self, tarinfo, targetpath):
2169        """Extract the TarInfo object tarinfo to a physical
2170           file called targetpath.
2171        """
2172        # Fetch the TarInfo object for the given name
2173        # and build the destination pathname, replacing
2174        # forward slashes to platform specific separators.
2175        targetpath = targetpath.rstrip("/")
2176        targetpath = targetpath.replace("/", os.sep)
2177
2178        # Create all upper directories.
2179        upperdirs = os.path.dirname(targetpath)
2180        if upperdirs and not os.path.exists(upperdirs):
2181            # Create directories that are not part of the archive with
2182            # default permissions.
2183            os.makedirs(upperdirs)
2184
2185        if tarinfo.islnk() or tarinfo.issym():
2186            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2187        else:
2188            self._dbg(1, tarinfo.name)
2189
2190        if tarinfo.isreg():
2191            self.makefile(tarinfo, targetpath)
2192        elif tarinfo.isdir():
2193            self.makedir(tarinfo, targetpath)
2194        elif tarinfo.isfifo():
2195            self.makefifo(tarinfo, targetpath)
2196        elif tarinfo.ischr() or tarinfo.isblk():
2197            self.makedev(tarinfo, targetpath)
2198        elif tarinfo.islnk() or tarinfo.issym():
2199            self.makelink(tarinfo, targetpath)
2200        elif tarinfo.type not in SUPPORTED_TYPES:
2201            self.makeunknown(tarinfo, targetpath)
2202        else:
2203            self.makefile(tarinfo, targetpath)
2204
2205        self.chown(tarinfo, targetpath)
2206        if not tarinfo.issym():
2207            self.chmod(tarinfo, targetpath)
2208            self.utime(tarinfo, targetpath)
2209
2210    #--------------------------------------------------------------------------
2211    # Below are the different file methods. They are called via
2212    # _extract_member() when extract() is called. They can be replaced in a
2213    # subclass to implement other functionality.
2214
2215    def makedir(self, tarinfo, targetpath):
2216        """Make a directory called targetpath.
2217        """
2218        try:
2219            # Use a safe mode for the directory, the real mode is set
2220            # later in _extract_member().
2221            os.mkdir(targetpath, 0700)
2222        except EnvironmentError, e:
2223            if e.errno != errno.EEXIST:
2224                raise
2225
2226    def makefile(self, tarinfo, targetpath):
2227        """Make a file called targetpath.
2228        """
2229        source = self.extractfile(tarinfo)
2230        try:
2231            with bltn_open(targetpath, "wb") as target:
2232                copyfileobj(source, target)
2233        finally:
2234            source.close()
2235
2236    def makeunknown(self, tarinfo, targetpath):
2237        """Make a file from a TarInfo object with an unknown type
2238           at targetpath.
2239        """
2240        self.makefile(tarinfo, targetpath)
2241        self._dbg(1, "tarfile: Unknown file type %r, " \
2242                     "extracted as regular file." % tarinfo.type)
2243
2244    def makefifo(self, tarinfo, targetpath):
2245        """Make a fifo called targetpath.
2246        """
2247        if hasattr(os, "mkfifo"):
2248            os.mkfifo(targetpath)
2249        else:
2250            raise ExtractError("fifo not supported by system")
2251
2252    def makedev(self, tarinfo, targetpath):
2253        """Make a character or block device called targetpath.
2254        """
2255        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2256            raise ExtractError("special devices not supported by system")
2257
2258        mode = tarinfo.mode
2259        if tarinfo.isblk():
2260            mode |= stat.S_IFBLK
2261        else:
2262            mode |= stat.S_IFCHR
2263
2264        os.mknod(targetpath, mode,
2265                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2266
2267    def makelink(self, tarinfo, targetpath):
2268        """Make a (symbolic) link called targetpath. If it cannot be created
2269          (platform limitation), we try to make a copy of the referenced file
2270          instead of a link.
2271        """
2272        if hasattr(os, "symlink") and hasattr(os, "link"):
2273            # For systems that support symbolic and hard links.
2274            if tarinfo.issym():
2275                if os.path.lexists(targetpath):
2276                    os.unlink(targetpath)
2277                os.symlink(tarinfo.linkname, targetpath)
2278            else:
2279                # See extract().
2280                if os.path.exists(tarinfo._link_target):
2281                    if os.path.lexists(targetpath):
2282                        os.unlink(targetpath)
2283                    os.link(tarinfo._link_target, targetpath)
2284                else:
2285                    self._extract_member(self._find_link_target(tarinfo), targetpath)
2286        else:
2287            try:
2288                self._extract_member(self._find_link_target(tarinfo), targetpath)
2289            except KeyError:
2290                raise ExtractError("unable to resolve link inside archive")
2291
2292    def chown(self, tarinfo, targetpath):
2293        """Set owner of targetpath according to tarinfo.
2294        """
2295        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2296            # We have to be root to do so.
2297            try:
2298                g = grp.getgrnam(tarinfo.gname)[2]
2299            except KeyError:
2300                g = tarinfo.gid
2301            try:
2302                u = pwd.getpwnam(tarinfo.uname)[2]
2303            except KeyError:
2304                u = tarinfo.uid
2305            try:
2306                if tarinfo.issym() and hasattr(os, "lchown"):
2307                    os.lchown(targetpath, u, g)
2308                else:
2309                    if sys.platform != "os2emx":
2310                        os.chown(targetpath, u, g)
2311            except EnvironmentError, e:
2312                raise ExtractError("could not change owner")
2313
2314    def chmod(self, tarinfo, targetpath):
2315        """Set file permissions of targetpath according to tarinfo.
2316        """
2317        if hasattr(os, 'chmod'):
2318            try:
2319                os.chmod(targetpath, tarinfo.mode)
2320            except EnvironmentError, e:
2321                raise ExtractError("could not change mode")
2322
2323    def utime(self, tarinfo, targetpath):
2324        """Set modification time of targetpath according to tarinfo.
2325        """
2326        if not hasattr(os, 'utime'):
2327            return
2328        try:
2329            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2330        except EnvironmentError, e:
2331            raise ExtractError("could not change modification time")
2332
2333    #--------------------------------------------------------------------------
2334    def next(self):
2335        """Return the next member of the archive as a TarInfo object, when
2336           TarFile is opened for reading. Return None if there is no more
2337           available.
2338        """
2339        self._check("ra")
2340        if self.firstmember is not None:
2341            m = self.firstmember
2342            self.firstmember = None
2343            return m
2344
2345        # Advance the file pointer.
2346        if self.offset != self.fileobj.tell():
2347            self.fileobj.seek(self.offset - 1)
2348            if not self.fileobj.read(1):
2349                raise ReadError("unexpected end of data")
2350
2351        # Read the next block.
2352        tarinfo = None
2353        while True:
2354            try:
2355                tarinfo = self.tarinfo.fromtarfile(self)
2356            except EOFHeaderError, e:
2357                if self.ignore_zeros:
2358                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2359                    self.offset += BLOCKSIZE
2360                    continue
2361            except InvalidHeaderError, e:
2362                if self.ignore_zeros:
2363                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2364                    self.offset += BLOCKSIZE
2365                    continue
2366                elif self.offset == 0:
2367                    raise ReadError(str(e))
2368            except EmptyHeaderError:
2369                if self.offset == 0:
2370                    raise ReadError("empty file")
2371            except TruncatedHeaderError, e:
2372                if self.offset == 0:
2373                    raise ReadError(str(e))
2374            except SubsequentHeaderError, e:
2375                raise ReadError(str(e))
2376            break
2377
2378        if tarinfo is not None:
2379            self.members.append(tarinfo)
2380        else:
2381            self._loaded = True
2382
2383        return tarinfo
2384
2385    #--------------------------------------------------------------------------
2386    # Little helper methods:
2387
2388    def _getmember(self, name, tarinfo=None, normalize=False):
2389        """Find an archive member by name from bottom to top.
2390           If tarinfo is given, it is used as the starting point.
2391        """
2392        # Ensure that all members have been loaded.
2393        members = self.getmembers()
2394
2395        # Limit the member search list up to tarinfo.
2396        if tarinfo is not None:
2397            members = members[:members.index(tarinfo)]
2398
2399        if normalize:
2400            name = os.path.normpath(name)
2401
2402        for member in reversed(members):
2403            if normalize:
2404                member_name = os.path.normpath(member.name)
2405            else:
2406                member_name = member.name
2407
2408            if name == member_name:
2409                return member
2410
2411    def _load(self):
2412        """Read through the entire archive file and look for readable
2413           members.
2414        """
2415        while True:
2416            tarinfo = self.next()
2417            if tarinfo is None:
2418                break
2419        self._loaded = True
2420
2421    def _check(self, mode=None):
2422        """Check if TarFile is still open, and if the operation's mode
2423           corresponds to TarFile's mode.
2424        """
2425        if self.closed:
2426            raise IOError("%s is closed" % self.__class__.__name__)
2427        if mode is not None and self.mode not in mode:
2428            raise IOError("bad operation for mode %r" % self.mode)
2429
2430    def _find_link_target(self, tarinfo):
2431        """Find the target member of a symlink or hardlink member in the
2432           archive.
2433        """
2434        if tarinfo.issym():
2435            # Always search the entire archive.
2436            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2437            limit = None
2438        else:
2439            # Search the archive before the link, because a hard link is
2440            # just a reference to an already archived file.
2441            linkname = tarinfo.linkname
2442            limit = tarinfo
2443
2444        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2445        if member is None:
2446            raise KeyError("linkname %r not found" % linkname)
2447        return member
2448
2449    def __iter__(self):
2450        """Provide an iterator object.
2451        """
2452        if self._loaded:
2453            return iter(self.members)
2454        else:
2455            return TarIter(self)
2456
2457    def _dbg(self, level, msg):
2458        """Write debugging output to sys.stderr.
2459        """
2460        if level <= self.debug:
2461            print >> sys.stderr, msg
2462
2463    def __enter__(self):
2464        self._check()
2465        return self
2466
2467    def __exit__(self, type, value, traceback):
2468        if type is None:
2469            self.close()
2470        else:
2471            # An exception occurred. We must not call close() because
2472            # it would try to write end-of-archive blocks and padding.
2473            if not self._extfileobj:
2474                self.fileobj.close()
2475            self.closed = True
2476# class TarFile
2477
2478class TarIter:
2479    """Iterator Class.
2480
2481       for tarinfo in TarFile(...):
2482           suite...
2483    """
2484
2485    def __init__(self, tarfile):
2486        """Construct a TarIter object.
2487        """
2488        self.tarfile = tarfile
2489        self.index = 0
2490    def __iter__(self):
2491        """Return iterator object.
2492        """
2493        return self
2494    def next(self):
2495        """Return the next item using TarFile's next() method.
2496           When all members have been read, set TarFile as _loaded.
2497        """
2498        # Fix for SF #1100429: Under rare circumstances it can
2499        # happen that getmembers() is called during iteration,
2500        # which will cause TarIter to stop prematurely.
2501
2502        if self.index == 0 and self.tarfile.firstmember is not None:
2503            tarinfo = self.tarfile.next()
2504        elif self.index < len(self.tarfile.members):
2505            tarinfo = self.tarfile.members[self.index]
2506        elif not self.tarfile._loaded:
2507            tarinfo = self.tarfile.next()
2508            if not tarinfo:
2509                self.tarfile._loaded = True
2510                raise StopIteration
2511        else:
2512            raise StopIteration
2513        self.index += 1
2514        return tarinfo
2515
2516# Helper classes for sparse file support
2517class _section:
2518    """Base class for _data and _hole.
2519    """
2520    def __init__(self, offset, size):
2521        self.offset = offset
2522        self.size = size
2523    def __contains__(self, offset):
2524        return self.offset <= offset < self.offset + self.size
2525
2526class _data(_section):
2527    """Represent a data section in a sparse file.
2528    """
2529    def __init__(self, offset, size, realpos):
2530        _section.__init__(self, offset, size)
2531        self.realpos = realpos
2532
2533class _hole(_section):
2534    """Represent a hole section in a sparse file.
2535    """
2536    pass
2537
2538class _ringbuffer(list):
2539    """Ringbuffer class which increases performance
2540       over a regular list.
2541    """
2542    def __init__(self):
2543        self.idx = 0
2544    def find(self, offset):
2545        idx = self.idx
2546        while True:
2547            item = self[idx]
2548            if offset in item:
2549                break
2550            idx += 1
2551            if idx == len(self):
2552                idx = 0
2553            if idx == self.idx:
2554                # End of File
2555                return None
2556        self.idx = idx
2557        return item
2558
2559#---------------------------------------------
2560# zipfile compatible TarFile class
2561#---------------------------------------------
2562TAR_PLAIN = 0           # zipfile.ZIP_STORED
2563TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2564class TarFileCompat:
2565    """TarFile class compatible with standard module zipfile's
2566       ZipFile class.
2567    """
2568    def __init__(self, file, mode="r", compression=TAR_PLAIN):
2569        from warnings import warnpy3k
2570        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2571                stacklevel=2)
2572        if compression == TAR_PLAIN:
2573            self.tarfile = TarFile.taropen(file, mode)
2574        elif compression == TAR_GZIPPED:
2575            self.tarfile = TarFile.gzopen(file, mode)
2576        else:
2577            raise ValueError("unknown compression constant")
2578        if mode[0:1] == "r":
2579            members = self.tarfile.getmembers()
2580            for m in members:
2581                m.filename = m.name
2582                m.file_size = m.size
2583                m.date_time = time.gmtime(m.mtime)[:6]
2584    def namelist(self):
2585        return map(lambda m: m.name, self.infolist())
2586    def infolist(self):
2587        return filter(lambda m: m.type in REGULAR_TYPES,
2588                      self.tarfile.getmembers())
2589    def printdir(self):
2590        self.tarfile.list()
2591    def testzip(self):
2592        return
2593    def getinfo(self, name):
2594        return self.tarfile.getmember(name)
2595    def read(self, name):
2596        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2597    def write(self, filename, arcname=None, compress_type=None):
2598        self.tarfile.add(filename, arcname)
2599    def writestr(self, zinfo, bytes):
2600        try:
2601            from cStringIO import StringIO
2602        except ImportError:
2603            from StringIO import StringIO
2604        import calendar
2605        tinfo = TarInfo(zinfo.filename)
2606        tinfo.size = len(bytes)
2607        tinfo.mtime = calendar.timegm(zinfo.date_time)
2608        self.tarfile.addfile(tinfo, StringIO(bytes))
2609    def close(self):
2610        self.tarfile.close()
2611#class TarFileCompat
2612
2613#--------------------
2614# exported functions
2615#--------------------
2616def is_tarfile(name):
2617    """Return True if name points to a tar archive that we
2618       are able to handle, else return False.
2619    """
2620    try:
2621        t = open(name)
2622        t.close()
2623        return True
2624    except TarError:
2625        return False
2626
2627open = TarFile.open
Note: See TracBrowser for help on using the repository browser.