Back to index

moin  1.9.0~rc2
tarfile.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: iso-8859-1 -*-
00003 #-------------------------------------------------------------------
00004 # tarfile.py (from Python 2.5.4, some 2.3/2.4 compat hacks added,
00005 # fix for mode='a' [from 2.6 trunk])
00006 # tarfile was broken in misc. python versions before, thus we are
00007 # using this (sane) version because else our tests fail).
00008 #-------------------------------------------------------------------
00009 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
00010 # All rights reserved.
00011 #
00012 # Permission  is  hereby granted,  free  of charge,  to  any person
00013 # obtaining a  copy of  this software  and associated documentation
00014 # files  (the  "Software"),  to   deal  in  the  Software   without
00015 # restriction,  including  without limitation  the  rights to  use,
00016 # copy, modify, merge, publish, distribute, sublicense, and/or sell
00017 # copies  of  the  Software,  and to  permit  persons  to  whom the
00018 # Software  is  furnished  to  do  so,  subject  to  the  following
00019 # conditions:
00020 #
00021 # The above copyright  notice and this  permission notice shall  be
00022 # included in all copies or substantial portions of the Software.
00023 #
00024 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
00025 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
00026 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
00027 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
00028 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
00029 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
00030 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
00031 # OTHER DEALINGS IN THE SOFTWARE.
00032 #
00033 """Read from and write to tar format archives.
00034 """
00035 
00036 __version__ = "$Revision: 53162 $"
00037 # $Source$
00038 
00039 version     = "0.8.0"
00040 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
00041 __date__    = "$Date: 2006-12-27 21:36:58 +1100 (Wed, 27 Dec 2006) $"
00042 __cvsid__   = "$Id: tarfile.py 53162 2006-12-27 10:36:58Z lars.gustaebel $"
00043 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
00044 
00045 #---------
00046 # Imports
00047 #---------
00048 import sys
00049 import os
00050 
00051 # the 2.5.1 tarfile needs those and python 2.3/2.4 does not have them:
00052 try:
00053     os.SEEK_SET
00054 except AttributeError:
00055     os.SEEK_SET, os.SEEK_CUR, os.SEEK_END = range(3)
00056 
00057 import shutil
00058 import stat
00059 import errno
00060 import time
00061 import struct
00062 import copy
00063 
00064 if sys.platform == 'mac':
00065     # This module needs work for MacOS9, especially in the area of pathname
00066     # handling. In many places it is assumed a simple substitution of / by the
00067     # local os.path.sep is good enough to convert pathnames, but this does not
00068     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
00069     raise ImportError, "tarfile does not work for platform==mac"
00070 
00071 try:
00072     import grp, pwd
00073 except ImportError:
00074     grp = pwd = None
00075 
00076 # from tarfile import *
00077 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
00078 
00079 #---------------------------------------------------------
00080 # tar constants
00081 #---------------------------------------------------------
00082 NUL        = "\0"               # the null character
00083 BLOCKSIZE  = 512                # length of processing blocks
00084 RECORDSIZE = BLOCKSIZE * 20     # length of records
00085 MAGIC      = "ustar"            # magic tar string
00086 VERSION    = "00"               # version number
00087 
00088 LENGTH_NAME    = 100            # maximum length of a filename
00089 LENGTH_LINK    = 100            # maximum length of a linkname
00090 LENGTH_PREFIX  = 155            # maximum length of the prefix field
00091 MAXSIZE_MEMBER = 077777777777L  # maximum size of a file (11 octal digits)
00092 
00093 REGTYPE  = "0"                  # regular file
00094 AREGTYPE = "\0"                 # regular file
00095 LNKTYPE  = "1"                  # link (inside tarfile)
00096 SYMTYPE  = "2"                  # symbolic link
00097 CHRTYPE  = "3"                  # character special device
00098 BLKTYPE  = "4"                  # block special device
00099 DIRTYPE  = "5"                  # directory
00100 FIFOTYPE = "6"                  # fifo special device
00101 CONTTYPE = "7"                  # contiguous file
00102 
00103 GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
00104 GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
00105 GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
00106 
00107 #---------------------------------------------------------
00108 # tarfile constants
00109 #---------------------------------------------------------
00110 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
00111                    SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
00112                    CONTTYPE, CHRTYPE, BLKTYPE,
00113                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
00114                    GNUTYPE_SPARSE)
00115 
00116 REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
00117                  CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
00118 
00119 #---------------------------------------------------------
00120 # Bits used in the mode field, values in octal.
00121 #---------------------------------------------------------
00122 S_IFLNK = 0120000        # symbolic link
00123 S_IFREG = 0100000        # regular file
00124 S_IFBLK = 0060000        # block device
00125 S_IFDIR = 0040000        # directory
00126 S_IFCHR = 0020000        # character device
00127 S_IFIFO = 0010000        # fifo
00128 
00129 TSUID   = 04000          # set UID on execution
00130 TSGID   = 02000          # set GID on execution
00131 TSVTX   = 01000          # reserved
00132 
00133 TUREAD  = 0400           # read by owner
00134 TUWRITE = 0200           # write by owner
00135 TUEXEC  = 0100           # execute/search by owner
00136 TGREAD  = 0040           # read by group
00137 TGWRITE = 0020           # write by group
00138 TGEXEC  = 0010           # execute/search by group
00139 TOREAD  = 0004           # read by other
00140 TOWRITE = 0002           # write by other
00141 TOEXEC  = 0001           # execute/search by other
00142 
00143 #---------------------------------------------------------
00144 # Some useful functions
00145 #---------------------------------------------------------
00146 
00147 def stn(s, length):
00148     """Convert a python string to a null-terminated string buffer.
00149     """
00150     return s[:length] + (length - len(s)) * NUL
00151 
00152 def nts(s):
00153     """Convert a null-terminated string field to a python string.
00154     """
00155     # Use the string up to the first null char.
00156     p = s.find("\0")
00157     if p == -1:
00158         return s
00159     return s[:p]
00160 
00161 def nti(s):
00162     """Convert a number field to a python number.
00163     """
00164     # There are two possible encodings for a number field, see
00165     # itn() below.
00166     if s[0] != chr(0200):
00167         n = int(nts(s) or "0", 8)
00168     else:
00169         n = 0L
00170         for i in xrange(len(s) - 1):
00171             n <<= 8
00172             n += ord(s[i + 1])
00173     return n
00174 
00175 def itn(n, digits=8, posix=False):
00176     """Convert a python number to a number field.
00177     """
00178     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
00179     # octal digits followed by a null-byte, this allows values up to
00180     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
00181     # that if necessary. A leading 0200 byte indicates this particular
00182     # encoding, the following digits-1 bytes are a big-endian
00183     # representation. This allows values up to (256**(digits-1))-1.
00184     if 0 <= n < 8 ** (digits - 1):
00185         s = "%0*o" % (digits - 1, n) + NUL
00186     else:
00187         if posix:
00188             raise ValueError("overflow in number field")
00189 
00190         if n < 0:
00191             # XXX We mimic GNU tar's behaviour with negative numbers,
00192             # this could raise OverflowError.
00193             n = struct.unpack("L", struct.pack("l", n))[0]
00194 
00195         s = ""
00196         for i in xrange(digits - 1):
00197             s = chr(n & 0377) + s
00198             n >>= 8
00199         s = chr(0200) + s
00200     return s
00201 
00202 def calc_chksums(buf):
00203     """Calculate the checksum for a member's header by summing up all
00204        characters except for the chksum field which is treated as if
00205        it was filled with spaces. According to the GNU tar sources,
00206        some tars (Sun and NeXT) calculate chksum with signed char,
00207        which will be different if there are chars in the buffer with
00208        the high bit set. So we calculate two checksums, unsigned and
00209        signed.
00210     """
00211     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
00212     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
00213     return unsigned_chksum, signed_chksum
00214 
00215 def copyfileobj(src, dst, length=None):
00216     """Copy length bytes from fileobj src to fileobj dst.
00217        If length is None, copy the entire content.
00218     """
00219     if length == 0:
00220         return
00221     if length is None:
00222         shutil.copyfileobj(src, dst)
00223         return
00224 
00225     BUFSIZE = 16 * 1024
00226     blocks, remainder = divmod(length, BUFSIZE)
00227     for b in xrange(blocks):
00228         buf = src.read(BUFSIZE)
00229         if len(buf) < BUFSIZE:
00230             raise IOError("end of file reached")
00231         dst.write(buf)
00232 
00233     if remainder != 0:
00234         buf = src.read(remainder)
00235         if len(buf) < remainder:
00236             raise IOError("end of file reached")
00237         dst.write(buf)
00238     return
00239 
00240 filemode_table = (
00241     ((S_IFLNK,      "l"),
00242      (S_IFREG,      "-"),
00243      (S_IFBLK,      "b"),
00244      (S_IFDIR,      "d"),
00245      (S_IFCHR,      "c"),
00246      (S_IFIFO,      "p")),
00247 
00248     ((TUREAD,       "r"),),
00249     ((TUWRITE,      "w"),),
00250     ((TUEXEC|TSUID, "s"),
00251      (TSUID,        "S"),
00252      (TUEXEC,       "x")),
00253 
00254     ((TGREAD,       "r"),),
00255     ((TGWRITE,      "w"),),
00256     ((TGEXEC|TSGID, "s"),
00257      (TSGID,        "S"),
00258      (TGEXEC,       "x")),
00259 
00260     ((TOREAD,       "r"),),
00261     ((TOWRITE,      "w"),),
00262     ((TOEXEC|TSVTX, "t"),
00263      (TSVTX,        "T"),
00264      (TOEXEC,       "x"))
00265 )
00266 
00267 def filemode(mode):
00268     """Convert a file's mode to a string of the form
00269        -rwxrwxrwx.
00270        Used by TarFile.list()
00271     """
00272     perm = []
00273     for table in filemode_table:
00274         for bit, char in table:
00275             if mode & bit == bit:
00276                 perm.append(char)
00277                 break
00278         else:
00279             perm.append("-")
00280     return "".join(perm)
00281 
00282 if os.sep != "/":
00283     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
00284 else:
00285     normpath = os.path.normpath
00286 
00287 class TarError(Exception):
00288     """Base exception."""
00289     pass
00290 class ExtractError(TarError):
00291     """General exception for extract errors."""
00292     pass
00293 class ReadError(TarError):
00294     """Exception for unreadble tar archives."""
00295     pass
00296 class CompressionError(TarError):
00297     """Exception for unavailable compression methods."""
00298     pass
00299 class StreamError(TarError):
00300     """Exception for unsupported operations on stream-like TarFiles."""
00301     pass
00302 
00303 #---------------------------
00304 # internal stream interface
00305 #---------------------------
00306 class _LowLevelFile:
00307     """Low-level file object. Supports reading and writing.
00308        It is used instead of a regular file object for streaming
00309        access.
00310     """
00311 
00312     def __init__(self, name, mode):
00313         mode = {
00314             "r": os.O_RDONLY,
00315             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
00316         }[mode]
00317         if hasattr(os, "O_BINARY"):
00318             mode |= os.O_BINARY
00319         self.fd = os.open(name, mode)
00320 
00321     def close(self):
00322         os.close(self.fd)
00323 
00324     def read(self, size):
00325         return os.read(self.fd, size)
00326 
00327     def write(self, s):
00328         os.write(self.fd, s)
00329 
00330 class _Stream:
00331     """Class that serves as an adapter between TarFile and
00332        a stream-like object.  The stream-like object only
00333        needs to have a read() or write() method and is accessed
00334        blockwise.  Use of gzip or bzip2 compression is possible.
00335        A stream-like object could be for example: sys.stdin,
00336        sys.stdout, a socket, a tape device etc.
00337 
00338        _Stream is intended to be used only internally.
00339     """
00340 
00341     def __init__(self, name, mode, comptype, fileobj, bufsize):
00342         """Construct a _Stream object.
00343         """
00344         self._extfileobj = True
00345         if fileobj is None:
00346             fileobj = _LowLevelFile(name, mode)
00347             self._extfileobj = False
00348 
00349         if comptype == '*':
00350             # Enable transparent compression detection for the
00351             # stream interface
00352             fileobj = _StreamProxy(fileobj)
00353             comptype = fileobj.getcomptype()
00354 
00355         self.name     = name or ""
00356         self.mode     = mode
00357         self.comptype = comptype
00358         self.fileobj  = fileobj
00359         self.bufsize  = bufsize
00360         self.buf      = ""
00361         self.pos      = 0L
00362         self.closed   = False
00363 
00364         if comptype == "gz":
00365             try:
00366                 import zlib
00367             except ImportError:
00368                 raise CompressionError("zlib module is not available")
00369             self.zlib = zlib
00370             self.crc = zlib.crc32("")
00371             if mode == "r":
00372                 self._init_read_gz()
00373             else:
00374                 self._init_write_gz()
00375 
00376         if comptype == "bz2":
00377             try:
00378                 import bz2
00379             except ImportError:
00380                 raise CompressionError("bz2 module is not available")
00381             if mode == "r":
00382                 self.dbuf = ""
00383                 self.cmp = bz2.BZ2Decompressor()
00384             else:
00385                 self.cmp = bz2.BZ2Compressor()
00386 
00387     def __del__(self):
00388         if hasattr(self, "closed") and not self.closed:
00389             self.close()
00390 
00391     def _init_write_gz(self):
00392         """Initialize for writing with gzip compression.
00393         """
00394         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
00395                                             -self.zlib.MAX_WBITS,
00396                                             self.zlib.DEF_MEM_LEVEL,
00397                                             0)
00398         timestamp = struct.pack("<L", long(time.time()))
00399         self.__write("\037\213\010\010%s\002\377" % timestamp)
00400         if self.name.endswith(".gz"):
00401             self.name = self.name[:-3]
00402         self.__write(self.name + NUL)
00403 
00404     def write(self, s):
00405         """Write string s to the stream.
00406         """
00407         if self.comptype == "gz":
00408             self.crc = self.zlib.crc32(s, self.crc)
00409         self.pos += len(s)
00410         if self.comptype != "tar":
00411             s = self.cmp.compress(s)
00412         self.__write(s)
00413 
00414     def __write(self, s):
00415         """Write string s to the stream if a whole new block
00416            is ready to be written.
00417         """
00418         self.buf += s
00419         while len(self.buf) > self.bufsize:
00420             self.fileobj.write(self.buf[:self.bufsize])
00421             self.buf = self.buf[self.bufsize:]
00422 
00423     def close(self):
00424         """Close the _Stream object. No operation should be
00425            done on it afterwards.
00426         """
00427         if self.closed:
00428             return
00429 
00430         if self.mode == "w" and self.comptype != "tar":
00431             self.buf += self.cmp.flush()
00432 
00433         if self.mode == "w" and self.buf:
00434             self.fileobj.write(self.buf)
00435             self.buf = ""
00436             if self.comptype == "gz":
00437                 # The native zlib crc is an unsigned 32-bit integer, but
00438                 # the Python wrapper implicitly casts that to a signed C
00439                 # long.  So, on a 32-bit box self.crc may "look negative",
00440                 # while the same crc on a 64-bit box may "look positive".
00441                 # To avoid irksome warnings from the `struct` module, force
00442                 # it to look positive on all boxes.
00443                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
00444                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
00445 
00446         if not self._extfileobj:
00447             self.fileobj.close()
00448 
00449         self.closed = True
00450 
00451     def _init_read_gz(self):
00452         """Initialize for reading a gzip compressed fileobj.
00453         """
00454         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
00455         self.dbuf = ""
00456 
00457         # taken from gzip.GzipFile with some alterations
00458         if self.__read(2) != "\037\213":
00459             raise ReadError("not a gzip file")
00460         if self.__read(1) != "\010":
00461             raise CompressionError("unsupported compression method")
00462 
00463         flag = ord(self.__read(1))
00464         self.__read(6)
00465 
00466         if flag & 4:
00467             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
00468             self.read(xlen)
00469         if flag & 8:
00470             while True:
00471                 s = self.__read(1)
00472                 if not s or s == NUL:
00473                     break
00474         if flag & 16:
00475             while True:
00476                 s = self.__read(1)
00477                 if not s or s == NUL:
00478                     break
00479         if flag & 2:
00480             self.__read(2)
00481 
00482     def tell(self):
00483         """Return the stream's file pointer position.
00484         """
00485         return self.pos
00486 
00487     def seek(self, pos=0):
00488         """Set the stream's file pointer to pos. Negative seeking
00489            is forbidden.
00490         """
00491         if pos - self.pos >= 0:
00492             blocks, remainder = divmod(pos - self.pos, self.bufsize)
00493             for i in xrange(blocks):
00494                 self.read(self.bufsize)
00495             self.read(remainder)
00496         else:
00497             raise StreamError("seeking backwards is not allowed")
00498         return self.pos
00499 
00500     def read(self, size=None):
00501         """Return the next size number of bytes from the stream.
00502            If size is not defined, return all bytes of the stream
00503            up to EOF.
00504         """
00505         if size is None:
00506             t = []
00507             while True:
00508                 buf = self._read(self.bufsize)
00509                 if not buf:
00510                     break
00511                 t.append(buf)
00512             buf = "".join(t)
00513         else:
00514             buf = self._read(size)
00515         self.pos += len(buf)
00516         return buf
00517 
00518     def _read(self, size):
00519         """Return size bytes from the stream.
00520         """
00521         if self.comptype == "tar":
00522             return self.__read(size)
00523 
00524         c = len(self.dbuf)
00525         t = [self.dbuf]
00526         while c < size:
00527             buf = self.__read(self.bufsize)
00528             if not buf:
00529                 break
00530             buf = self.cmp.decompress(buf)
00531             t.append(buf)
00532             c += len(buf)
00533         t = "".join(t)
00534         self.dbuf = t[size:]
00535         return t[:size]
00536 
00537     def __read(self, size):
00538         """Return size bytes from stream. If internal buffer is empty,
00539            read another block from the stream.
00540         """
00541         c = len(self.buf)
00542         t = [self.buf]
00543         while c < size:
00544             buf = self.fileobj.read(self.bufsize)
00545             if not buf:
00546                 break
00547             t.append(buf)
00548             c += len(buf)
00549         t = "".join(t)
00550         self.buf = t[size:]
00551         return t[:size]
00552 # class _Stream
00553 
00554 class _StreamProxy(object):
00555     """Small proxy class that enables transparent compression
00556        detection for the Stream interface (mode 'r|*').
00557     """
00558 
00559     def __init__(self, fileobj):
00560         self.fileobj = fileobj
00561         self.buf = self.fileobj.read(BLOCKSIZE)
00562 
00563     def read(self, size):
00564         self.read = self.fileobj.read
00565         return self.buf
00566 
00567     def getcomptype(self):
00568         if self.buf.startswith("\037\213\010"):
00569             return "gz"
00570         if self.buf.startswith("BZh91"):
00571             return "bz2"
00572         return "tar"
00573 
00574     def close(self):
00575         self.fileobj.close()
00576 # class StreamProxy
00577 
00578 class _BZ2Proxy(object):
00579     """Small proxy class that enables external file object
00580        support for "r:bz2" and "w:bz2" modes. This is actually
00581        a workaround for a limitation in bz2 module's BZ2File
00582        class which (unlike gzip.GzipFile) has no support for
00583        a file object argument.
00584     """
00585 
00586     blocksize = 16 * 1024
00587 
00588     def __init__(self, fileobj, mode):
00589         self.fileobj = fileobj
00590         self.mode = mode
00591         self.init()
00592 
00593     def init(self):
00594         import bz2
00595         self.pos = 0
00596         if self.mode == "r":
00597             self.bz2obj = bz2.BZ2Decompressor()
00598             self.fileobj.seek(0)
00599             self.buf = ""
00600         else:
00601             self.bz2obj = bz2.BZ2Compressor()
00602 
00603     def read(self, size):
00604         b = [self.buf]
00605         x = len(self.buf)
00606         while x < size:
00607             try:
00608                 raw = self.fileobj.read(self.blocksize)
00609                 data = self.bz2obj.decompress(raw)
00610                 b.append(data)
00611             except EOFError:
00612                 break
00613             x += len(data)
00614         self.buf = "".join(b)
00615 
00616         buf = self.buf[:size]
00617         self.buf = self.buf[size:]
00618         self.pos += len(buf)
00619         return buf
00620 
00621     def seek(self, pos):
00622         if pos < self.pos:
00623             self.init()
00624         self.read(pos - self.pos)
00625 
00626     def tell(self):
00627         return self.pos
00628 
00629     def write(self, data):
00630         self.pos += len(data)
00631         raw = self.bz2obj.compress(data)
00632         self.fileobj.write(raw)
00633 
00634     def close(self):
00635         if self.mode == "w":
00636             raw = self.bz2obj.flush()
00637             self.fileobj.write(raw)
00638         self.fileobj.close()
00639 # class _BZ2Proxy
00640 
00641 #------------------------
00642 # Extraction file object
00643 #------------------------
00644 class _FileInFile(object):
00645     """A thin wrapper around an existing file object that
00646        provides a part of its data as an individual file
00647        object.
00648     """
00649 
00650     def __init__(self, fileobj, offset, size, sparse=None):
00651         self.fileobj = fileobj
00652         self.offset = offset
00653         self.size = size
00654         self.sparse = sparse
00655         self.position = 0
00656 
00657     def tell(self):
00658         """Return the current file position.
00659         """
00660         return self.position
00661 
00662     def seek(self, position):
00663         """Seek to a position in the file.
00664         """
00665         self.position = position
00666 
00667     def read(self, size=None):
00668         """Read data from the file.
00669         """
00670         if size is None:
00671             size = self.size - self.position
00672         else:
00673             size = min(size, self.size - self.position)
00674 
00675         if self.sparse is None:
00676             return self.readnormal(size)
00677         else:
00678             return self.readsparse(size)
00679 
00680     def readnormal(self, size):
00681         """Read operation for regular files.
00682         """
00683         self.fileobj.seek(self.offset + self.position)
00684         self.position += size
00685         return self.fileobj.read(size)
00686 
00687     def readsparse(self, size):
00688         """Read operation for sparse files.
00689         """
00690         data = []
00691         while size > 0:
00692             buf = self.readsparsesection(size)
00693             if not buf:
00694                 break
00695             size -= len(buf)
00696             data.append(buf)
00697         return "".join(data)
00698 
00699     def readsparsesection(self, size):
00700         """Read a single section of a sparse file.
00701         """
00702         section = self.sparse.find(self.position)
00703 
00704         if section is None:
00705             return ""
00706 
00707         size = min(size, section.offset + section.size - self.position)
00708 
00709         if isinstance(section, _data):
00710             realpos = section.realpos + self.position - section.offset
00711             self.fileobj.seek(self.offset + realpos)
00712             self.position += size
00713             return self.fileobj.read(size)
00714         else:
00715             self.position += size
00716             return NUL * size
00717 #class _FileInFile
00718 
00719 
00720 class ExFileObject(object):
00721     """File-like object for reading an archive member.
00722        Is returned by TarFile.extractfile().
00723     """
00724     blocksize = 1024
00725 
00726     def __init__(self, tarfile, tarinfo):
00727         self.fileobj = _FileInFile(tarfile.fileobj,
00728                                    tarinfo.offset_data,
00729                                    tarinfo.size,
00730                                    getattr(tarinfo, "sparse", None))
00731         self.name = tarinfo.name
00732         self.mode = "r"
00733         self.closed = False
00734         self.size = tarinfo.size
00735 
00736         self.position = 0
00737         self.buffer = ""
00738 
00739     def read(self, size=None):
00740         """Read at most size bytes from the file. If size is not
00741            present or None, read all data until EOF is reached.
00742         """
00743         if self.closed:
00744             raise ValueError("I/O operation on closed file")
00745 
00746         buf = ""
00747         if self.buffer:
00748             if size is None:
00749                 buf = self.buffer
00750                 self.buffer = ""
00751             else:
00752                 buf = self.buffer[:size]
00753                 self.buffer = self.buffer[size:]
00754 
00755         if size is None:
00756             buf += self.fileobj.read()
00757         else:
00758             buf += self.fileobj.read(size - len(buf))
00759 
00760         self.position += len(buf)
00761         return buf
00762 
00763     def readline(self, size=-1):
00764         """Read one entire line from the file. If size is present
00765            and non-negative, return a string with at most that
00766            size, which may be an incomplete line.
00767         """
00768         if self.closed:
00769             raise ValueError("I/O operation on closed file")
00770 
00771         if "\n" in self.buffer:
00772             pos = self.buffer.find("\n") + 1
00773         else:
00774             buffers = [self.buffer]
00775             while True:
00776                 buf = self.fileobj.read(self.blocksize)
00777                 buffers.append(buf)
00778                 if not buf or "\n" in buf:
00779                     self.buffer = "".join(buffers)
00780                     pos = self.buffer.find("\n") + 1
00781                     if pos == 0:
00782                         # no newline found.
00783                         pos = len(self.buffer)
00784                     break
00785 
00786         if size != -1:
00787             pos = min(size, pos)
00788 
00789         buf = self.buffer[:pos]
00790         self.buffer = self.buffer[pos:]
00791         self.position += len(buf)
00792         return buf
00793 
00794     def readlines(self):
00795         """Return a list with all remaining lines.
00796         """
00797         result = []
00798         while True:
00799             line = self.readline()
00800             if not line: break
00801             result.append(line)
00802         return result
00803 
00804     def tell(self):
00805         """Return the current file position.
00806         """
00807         if self.closed:
00808             raise ValueError("I/O operation on closed file")
00809 
00810         return self.position
00811 
00812     def seek(self, pos, whence=os.SEEK_SET):
00813         """Seek to a position in the file.
00814         """
00815         if self.closed:
00816             raise ValueError("I/O operation on closed file")
00817 
00818         if whence == os.SEEK_SET:
00819             self.position = min(max(pos, 0), self.size)
00820         elif whence == os.SEEK_CUR:
00821             if pos < 0:
00822                 self.position = max(self.position + pos, 0)
00823             else:
00824                 self.position = min(self.position + pos, self.size)
00825         elif whence == os.SEEK_END:
00826             self.position = max(min(self.size + pos, self.size), 0)
00827         else:
00828             raise ValueError("Invalid argument")
00829 
00830         self.buffer = ""
00831         self.fileobj.seek(self.position)
00832 
00833     def close(self):
00834         """Close the file object.
00835         """
00836         self.closed = True
00837 
00838     def __iter__(self):
00839         """Get an iterator over the file's lines.
00840         """
00841         while True:
00842             line = self.readline()
00843             if not line:
00844                 break
00845             yield line
00846 #class ExFileObject
00847 
00848 #------------------
00849 # Exported Classes
00850 #------------------
00851 class TarInfo(object):
00852     """Informational class which holds the details about an
00853        archive member given by a tar header block.
00854        TarInfo objects are returned by TarFile.getmember(),
00855        TarFile.getmembers() and TarFile.gettarinfo() and are
00856        usually created internally.
00857     """
00858 
00859     def __init__(self, name=""):
00860         """Construct a TarInfo object. name is the optional name
00861            of the member.
00862         """
00863         self.name = name        # member name (dirnames must end with '/')
00864         self.mode = 0666        # file permissions
00865         self.uid = 0            # user id
00866         self.gid = 0            # group id
00867         self.size = 0           # file size
00868         self.mtime = 0          # modification time
00869         self.chksum = 0         # header checksum
00870         self.type = REGTYPE     # member type
00871         self.linkname = ""      # link name
00872         self.uname = "user"     # user name
00873         self.gname = "group"    # group name
00874         self.devmajor = 0       # device major number
00875         self.devminor = 0       # device minor number
00876 
00877         self.offset = 0         # the tar header starts here
00878         self.offset_data = 0    # the file's data starts here
00879 
00880     def __repr__(self):
00881         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
00882 
00883     def frombuf(cls, buf):
00884         """Construct a TarInfo object from a 512 byte string buffer.
00885         """
00886         if len(buf) != BLOCKSIZE:
00887             raise ValueError("truncated header")
00888         if buf.count(NUL) == BLOCKSIZE:
00889             raise ValueError("empty header")
00890 
00891         tarinfo = cls()
00892         tarinfo.buf = buf
00893         tarinfo.name = nts(buf[0:100])
00894         tarinfo.mode = nti(buf[100:108])
00895         tarinfo.uid = nti(buf[108:116])
00896         tarinfo.gid = nti(buf[116:124])
00897         tarinfo.size = nti(buf[124:136])
00898         tarinfo.mtime = nti(buf[136:148])
00899         tarinfo.chksum = nti(buf[148:156])
00900         tarinfo.type = buf[156:157]
00901         tarinfo.linkname = nts(buf[157:257])
00902         tarinfo.uname = nts(buf[265:297])
00903         tarinfo.gname = nts(buf[297:329])
00904         tarinfo.devmajor = nti(buf[329:337])
00905         tarinfo.devminor = nti(buf[337:345])
00906         prefix = nts(buf[345:500])
00907 
00908         if prefix and not tarinfo.issparse():
00909             tarinfo.name = prefix + "/" + tarinfo.name
00910 
00911         if tarinfo.chksum not in calc_chksums(buf):
00912             raise ValueError("invalid header")
00913         return tarinfo
00914     frombuf = classmethod(frombuf)
00915 
00916     def tobuf(self, posix=False):
00917         """Return a tar header as a string of 512 byte blocks.
00918         """
00919         buf = ""
00920         type = self.type
00921         prefix = ""
00922 
00923         if self.name.endswith("/"):
00924             type = DIRTYPE
00925 
00926         if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
00927             # Prevent "././@LongLink" from being normalized.
00928             name = self.name
00929         else:
00930             name = normpath(self.name)
00931 
00932         if type == DIRTYPE:
00933             # directories should end with '/'
00934             name += "/"
00935 
00936         linkname = self.linkname
00937         if linkname:
00938             # if linkname is empty we end up with a '.'
00939             linkname = normpath(linkname)
00940 
00941         if posix:
00942             if self.size > MAXSIZE_MEMBER:
00943                 raise ValueError("file is too large (>= 8 GB)")
00944 
00945             if len(self.linkname) > LENGTH_LINK:
00946                 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
00947 
00948             if len(name) > LENGTH_NAME:
00949                 prefix = name[:LENGTH_PREFIX + 1]
00950                 while prefix and prefix[-1] != "/":
00951                     prefix = prefix[:-1]
00952 
00953                 name = name[len(prefix):]
00954                 prefix = prefix[:-1]
00955 
00956                 if not prefix or len(name) > LENGTH_NAME:
00957                     raise ValueError("name is too long")
00958 
00959         else:
00960             if len(self.linkname) > LENGTH_LINK:
00961                 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
00962 
00963             if len(name) > LENGTH_NAME:
00964                 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
00965 
00966         parts = [
00967             stn(name, 100),
00968             itn(self.mode & 07777, 8, posix),
00969             itn(self.uid, 8, posix),
00970             itn(self.gid, 8, posix),
00971             itn(self.size, 12, posix),
00972             itn(self.mtime, 12, posix),
00973             "        ", # checksum field
00974             type,
00975             stn(self.linkname, 100),
00976             stn(MAGIC, 6),
00977             stn(VERSION, 2),
00978             stn(self.uname, 32),
00979             stn(self.gname, 32),
00980             itn(self.devmajor, 8, posix),
00981             itn(self.devminor, 8, posix),
00982             stn(prefix, 155)
00983         ]
00984 
00985         buf += "".join(parts).ljust(BLOCKSIZE, NUL)
00986         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
00987         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
00988         self.buf = buf
00989         return buf
00990 
00991     def _create_gnulong(self, name, type):
00992         """Create a GNU longname/longlink header from name.
00993            It consists of an extended tar header, with the length
00994            of the longname as size, followed by data blocks,
00995            which contain the longname as a null terminated string.
00996         """
00997         name += NUL
00998 
00999         tarinfo = self.__class__()
01000         tarinfo.name = "././@LongLink"
01001         tarinfo.type = type
01002         tarinfo.mode = 0
01003         tarinfo.size = len(name)
01004 
01005         # create extended header
01006         buf = tarinfo.tobuf()
01007         # create name blocks
01008         buf += name
01009         blocks, remainder = divmod(len(name), BLOCKSIZE)
01010         if remainder > 0:
01011             buf += (BLOCKSIZE - remainder) * NUL
01012         return buf
01013 
01014     def isreg(self):
01015         return self.type in REGULAR_TYPES
01016     def isfile(self):
01017         return self.isreg()
01018     def isdir(self):
01019         return self.type == DIRTYPE
01020     def issym(self):
01021         return self.type == SYMTYPE
01022     def islnk(self):
01023         return self.type == LNKTYPE
01024     def ischr(self):
01025         return self.type == CHRTYPE
01026     def isblk(self):
01027         return self.type == BLKTYPE
01028     def isfifo(self):
01029         return self.type == FIFOTYPE
01030     def issparse(self):
01031         return self.type == GNUTYPE_SPARSE
01032     def isdev(self):
01033         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
01034 # class TarInfo
01035 
01036 class TarFile(object):
01037     """The TarFile Class provides an interface to tar archives.
01038     """
01039 
01040     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
01041 
01042     dereference = False         # If true, add content of linked file to the
01043                                 # tar file, else the link.
01044 
01045     ignore_zeros = False        # If true, skips empty or invalid blocks and
01046                                 # continues processing.
01047 
01048     errorlevel = 0              # If 0, fatal errors only appear in debug
01049                                 # messages (if debug >= 0). If > 0, errors
01050                                 # are passed to the caller as exceptions.
01051 
01052     posix = False               # If True, generates POSIX.1-1990-compliant
01053                                 # archives (no GNU extensions!)
01054 
01055     fileobject = ExFileObject
01056 
01057     def __init__(self, name=None, mode="r", fileobj=None):
01058         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
01059            read from an existing archive, 'a' to append data to an existing
01060            file or 'w' to create a new file overwriting an existing one. `mode'
01061            defaults to 'r'.
01062            If `fileobj' is given, it is used for reading or writing data. If it
01063            can be determined, `mode' is overridden by `fileobj's mode.
01064            `fileobj' is not closed, when TarFile is closed.
01065         """
01066         if len(mode) > 1 or mode not in "raw":
01067             raise ValueError("mode must be 'r', 'a' or 'w'")
01068         self._mode = mode
01069         self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
01070 
01071         if not fileobj:
01072             if self._mode == "a" and not os.path.exists(name):
01073                 # Create nonexistent files in append mode.
01074                 self._mode = "w"
01075                 self.mode = "wb"
01076             fileobj = file(name, self.mode)
01077             self._extfileobj = False
01078         else:
01079             if name is None and hasattr(fileobj, "name"):
01080                 name = fileobj.name
01081             if hasattr(fileobj, "mode"):
01082                 self.mode = fileobj.mode
01083             self._extfileobj = True
01084         self.name = name and os.path.abspath(name) or None
01085         self.fileobj = fileobj
01086 
01087         # Init datastructures
01088         self.closed = False
01089         self.members = []       # list of members as TarInfo objects
01090         self._loaded = False    # flag if all members have been read
01091         self.offset = self.fileobj.tell()
01092                                 # current position in the archive file
01093         self.inodes = {}        # dictionary caching the inodes of
01094                                 # archive members already added
01095 
01096         if self._mode == "r":
01097             self.firstmember = None
01098             self.firstmember = self.next()
01099 
01100         if self._mode == "a":
01101             # Move to the end of the archive,
01102             # before the first empty block.
01103             self.firstmember = None
01104             while True:
01105                 try:
01106                     tarinfo = self.next()
01107                 except ReadError:
01108                     self.fileobj.seek(0)
01109                     break
01110                 if tarinfo is None:
01111                     if self.offset > 0:
01112                         self.fileobj.seek(- BLOCKSIZE, 1)
01113                     break
01114 
01115         if self._mode in "aw":
01116             self._loaded = True
01117 
01118     #--------------------------------------------------------------------------
01119     # Below are the classmethods which act as alternate constructors to the
01120     # TarFile class. The open() method is the only one that is needed for
01121     # public use; it is the "super"-constructor and is able to select an
01122     # adequate "sub"-constructor for a particular compression using the mapping
01123     # from OPEN_METH.
01124     #
01125     # This concept allows one to subclass TarFile without losing the comfort of
01126     # the super-constructor. A sub-constructor is registered and made available
01127     # by adding it to the mapping in OPEN_METH.
01128 
01129     def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
01130         """Open a tar archive for reading, writing or appending. Return
01131            an appropriate TarFile class.
01132 
01133            mode:
01134            'r' or 'r:*' open for reading with transparent compression
01135            'r:'         open for reading exclusively uncompressed
01136            'r:gz'       open for reading with gzip compression
01137            'r:bz2'      open for reading with bzip2 compression
01138            'a' or 'a:'  open for appending, creating the file if necessary
01139            'w' or 'w:'  open for writing without compression
01140            'w:gz'       open for writing with gzip compression
01141            'w:bz2'      open for writing with bzip2 compression
01142 
01143            'r|*'        open a stream of tar blocks with transparent compression
01144            'r|'         open an uncompressed stream of tar blocks for reading
01145            'r|gz'       open a gzip compressed stream of tar blocks
01146            'r|bz2'      open a bzip2 compressed stream of tar blocks
01147            'w|'         open an uncompressed stream for writing
01148            'w|gz'       open a gzip compressed stream for writing
01149            'w|bz2'      open a bzip2 compressed stream for writing
01150         """
01151 
01152         if not name and not fileobj:
01153             raise ValueError("nothing to open")
01154 
01155         if mode in ("r", "r:*"):
01156             # Find out which *open() is appropriate for opening the file.
01157             for comptype in cls.OPEN_METH:
01158                 func = getattr(cls, cls.OPEN_METH[comptype])
01159                 if fileobj is not None:
01160                     saved_pos = fileobj.tell()
01161                 try:
01162                     return func(name, "r", fileobj)
01163                 except (ReadError, CompressionError):
01164                     if fileobj is not None:
01165                         fileobj.seek(saved_pos)
01166                     continue
01167             raise ReadError("file could not be opened successfully")
01168 
01169         elif ":" in mode:
01170             filemode, comptype = mode.split(":", 1)
01171             filemode = filemode or "r"
01172             comptype = comptype or "tar"
01173 
01174             # Select the *open() function according to
01175             # given compression.
01176             if comptype in cls.OPEN_METH:
01177                 func = getattr(cls, cls.OPEN_METH[comptype])
01178             else:
01179                 raise CompressionError("unknown compression type %r" % comptype)
01180             return func(name, filemode, fileobj)
01181 
01182         elif "|" in mode:
01183             filemode, comptype = mode.split("|", 1)
01184             filemode = filemode or "r"
01185             comptype = comptype or "tar"
01186 
01187             if filemode not in "rw":
01188                 raise ValueError("mode must be 'r' or 'w'")
01189 
01190             t = cls(name, filemode,
01191                     _Stream(name, filemode, comptype, fileobj, bufsize))
01192             t._extfileobj = False
01193             return t
01194 
01195         elif mode in "aw":
01196             return cls.taropen(name, mode, fileobj)
01197 
01198         raise ValueError("undiscernible mode")
01199     open = classmethod(open)
01200 
01201     def taropen(cls, name, mode="r", fileobj=None):
01202         """Open uncompressed tar archive name for reading or writing.
01203         """
01204         if len(mode) > 1 or mode not in "raw":
01205             raise ValueError("mode must be 'r', 'a' or 'w'")
01206         return cls(name, mode, fileobj)
01207     taropen = classmethod(taropen)
01208 
01209     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
01210         """Open gzip compressed tar archive name for reading or writing.
01211            Appending is not allowed.
01212         """
01213         if len(mode) > 1 or mode not in "rw":
01214             raise ValueError("mode must be 'r' or 'w'")
01215 
01216         try:
01217             import gzip
01218             gzip.GzipFile
01219         except (ImportError, AttributeError):
01220             raise CompressionError("gzip module is not available")
01221 
01222         if fileobj is None:
01223             fileobj = file(name, mode + "b")
01224 
01225         try:
01226             t = cls.taropen(name, mode,
01227                 gzip.GzipFile(name, mode, compresslevel, fileobj))
01228         except IOError:
01229             raise ReadError("not a gzip file")
01230         t._extfileobj = False
01231         return t
01232     gzopen = classmethod(gzopen)
01233 
01234     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
01235         """Open bzip2 compressed tar archive name for reading or writing.
01236            Appending is not allowed.
01237         """
01238         if len(mode) > 1 or mode not in "rw":
01239             raise ValueError("mode must be 'r' or 'w'.")
01240 
01241         try:
01242             import bz2
01243         except ImportError:
01244             raise CompressionError("bz2 module is not available")
01245 
01246         if fileobj is not None:
01247             fileobj = _BZ2Proxy(fileobj, mode)
01248         else:
01249             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
01250 
01251         try:
01252             t = cls.taropen(name, mode, fileobj)
01253         except IOError:
01254             raise ReadError("not a bzip2 file")
01255         t._extfileobj = False
01256         return t
01257     bz2open = classmethod(bz2open)
01258 
01259     # All *open() methods are registered here.
01260     OPEN_METH = {
01261         "tar": "taropen",   # uncompressed tar
01262         "gz":  "gzopen",    # gzip compressed tar
01263         "bz2": "bz2open"    # bzip2 compressed tar
01264     }
01265 
01266     #--------------------------------------------------------------------------
01267     # The public methods which TarFile provides:
01268 
01269     def close(self):
01270         """Close the TarFile. In write-mode, two finishing zero blocks are
01271            appended to the archive.
01272         """
01273         if self.closed:
01274             return
01275 
01276         if self._mode in "aw":
01277             self.fileobj.write(NUL * (BLOCKSIZE * 2))
01278             self.offset += (BLOCKSIZE * 2)
01279             # fill up the end with zero-blocks
01280             # (like option -b20 for tar does)
01281             blocks, remainder = divmod(self.offset, RECORDSIZE)
01282             if remainder > 0:
01283                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
01284 
01285         if not self._extfileobj:
01286             self.fileobj.close()
01287         self.closed = True
01288 
01289     def getmember(self, name):
01290         """Return a TarInfo object for member `name'. If `name' can not be
01291            found in the archive, KeyError is raised. If a member occurs more
01292            than once in the archive, its last occurence is assumed to be the
01293            most up-to-date version.
01294         """
01295         tarinfo = self._getmember(name)
01296         if tarinfo is None:
01297             raise KeyError("filename %r not found" % name)
01298         return tarinfo
01299 
01300     def getmembers(self):
01301         """Return the members of the archive as a list of TarInfo objects. The
01302            list has the same order as the members in the archive.
01303         """
01304         self._check()
01305         if not self._loaded:    # if we want to obtain a list of
01306             self._load()        # all members, we first have to
01307                                 # scan the whole archive.
01308         return self.members
01309 
01310     def getnames(self):
01311         """Return the members of the archive as a list of their names. It has
01312            the same order as the list returned by getmembers().
01313         """
01314         return [tarinfo.name for tarinfo in self.getmembers()]
01315 
01316     def gettarinfo(self, name=None, arcname=None, fileobj=None):
01317         """Create a TarInfo object for either the file `name' or the file
01318            object `fileobj' (using os.fstat on its file descriptor). You can
01319            modify some of the TarInfo's attributes before you add it using
01320            addfile(). If given, `arcname' specifies an alternative name for the
01321            file in the archive.
01322         """
01323         self._check("aw")
01324 
01325         # When fileobj is given, replace name by
01326         # fileobj's real name.
01327         if fileobj is not None:
01328             name = fileobj.name
01329 
01330         # Building the name of the member in the archive.
01331         # Backward slashes are converted to forward slashes,
01332         # Absolute paths are turned to relative paths.
01333         if arcname is None:
01334             arcname = name
01335         arcname = normpath(arcname)
01336         drv, arcname = os.path.splitdrive(arcname)
01337         while arcname[0:1] == "/":
01338             arcname = arcname[1:]
01339 
01340         # Now, fill the TarInfo object with
01341         # information specific for the file.
01342         tarinfo = TarInfo()
01343 
01344         # Use os.stat or os.lstat, depending on platform
01345         # and if symlinks shall be resolved.
01346         if fileobj is None:
01347             if hasattr(os, "lstat") and not self.dereference:
01348                 statres = os.lstat(name)
01349             else:
01350                 statres = os.stat(name)
01351         else:
01352             statres = os.fstat(fileobj.fileno())
01353         linkname = ""
01354 
01355         stmd = statres.st_mode
01356         if stat.S_ISREG(stmd):
01357             inode = (statres.st_ino, statres.st_dev)
01358             if not self.dereference and \
01359                     statres.st_nlink > 1 and inode in self.inodes:
01360                 # Is it a hardlink to an already
01361                 # archived file?
01362                 type = LNKTYPE
01363                 linkname = self.inodes[inode]
01364             else:
01365                 # The inode is added only if its valid.
01366                 # For win32 it is always 0.
01367                 type = REGTYPE
01368                 if inode[0]:
01369                     self.inodes[inode] = arcname
01370         elif stat.S_ISDIR(stmd):
01371             type = DIRTYPE
01372             if arcname[-1:] != "/":
01373                 arcname += "/"
01374         elif stat.S_ISFIFO(stmd):
01375             type = FIFOTYPE
01376         elif stat.S_ISLNK(stmd):
01377             type = SYMTYPE
01378             linkname = os.readlink(name)
01379         elif stat.S_ISCHR(stmd):
01380             type = CHRTYPE
01381         elif stat.S_ISBLK(stmd):
01382             type = BLKTYPE
01383         else:
01384             return None
01385 
01386         # Fill the TarInfo object with all
01387         # information we can get.
01388         tarinfo.name = arcname
01389         tarinfo.mode = stmd
01390         tarinfo.uid = statres.st_uid
01391         tarinfo.gid = statres.st_gid
01392         if stat.S_ISREG(stmd):
01393             tarinfo.size = statres.st_size
01394         else:
01395             tarinfo.size = 0L
01396         tarinfo.mtime = statres.st_mtime
01397         tarinfo.type = type
01398         tarinfo.linkname = linkname
01399         if pwd:
01400             try:
01401                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
01402             except KeyError:
01403                 pass
01404         if grp:
01405             try:
01406                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
01407             except KeyError:
01408                 pass
01409 
01410         if type in (CHRTYPE, BLKTYPE):
01411             if hasattr(os, "major") and hasattr(os, "minor"):
01412                 tarinfo.devmajor = os.major(statres.st_rdev)
01413                 tarinfo.devminor = os.minor(statres.st_rdev)
01414         return tarinfo
01415 
01416     def list(self, verbose=True):
01417         """Print a table of contents to sys.stdout. If `verbose' is False, only
01418            the names of the members are printed. If it is True, an `ls -l'-like
01419            output is produced.
01420         """
01421         self._check()
01422 
01423         for tarinfo in self:
01424             if verbose:
01425                 print filemode(tarinfo.mode),
01426                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
01427                                  tarinfo.gname or tarinfo.gid),
01428                 if tarinfo.ischr() or tarinfo.isblk():
01429                     print "%10s" % ("%d,%d" \
01430                                     % (tarinfo.devmajor, tarinfo.devminor)),
01431                 else:
01432                     print "%10d" % tarinfo.size,
01433                 print "%d-%02d-%02d %02d:%02d:%02d" \
01434                       % time.localtime(tarinfo.mtime)[:6],
01435 
01436             print tarinfo.name,
01437 
01438             if verbose:
01439                 if tarinfo.issym():
01440                     print "->", tarinfo.linkname,
01441                 if tarinfo.islnk():
01442                     print "link to", tarinfo.linkname,
01443             print
01444 
01445     def add(self, name, arcname=None, recursive=True):
01446         """Add the file `name' to the archive. `name' may be any type of file
01447            (directory, fifo, symbolic link, etc.). If given, `arcname'
01448            specifies an alternative name for the file in the archive.
01449            Directories are added recursively by default. This can be avoided by
01450            setting `recursive' to False.
01451         """
01452         self._check("aw")
01453 
01454         if arcname is None:
01455             arcname = name
01456 
01457         # Skip if somebody tries to archive the archive...
01458         if self.name is not None and os.path.abspath(name) == self.name:
01459             self._dbg(2, "tarfile: Skipped %r" % name)
01460             return
01461 
01462         # Special case: The user wants to add the current
01463         # working directory.
01464         if name == ".":
01465             if recursive:
01466                 if arcname == ".":
01467                     arcname = ""
01468                 for f in os.listdir("."):
01469                     self.add(f, os.path.join(arcname, f))
01470             return
01471 
01472         self._dbg(1, name)
01473 
01474         # Create a TarInfo object from the file.
01475         tarinfo = self.gettarinfo(name, arcname)
01476 
01477         if tarinfo is None:
01478             self._dbg(1, "tarfile: Unsupported type %r" % name)
01479             return
01480 
01481         # Append the tar header and data to the archive.
01482         if tarinfo.isreg():
01483             f = file(name, "rb")
01484             self.addfile(tarinfo, f)
01485             f.close()
01486 
01487         elif tarinfo.isdir():
01488             self.addfile(tarinfo)
01489             if recursive:
01490                 for f in os.listdir(name):
01491                     self.add(os.path.join(name, f), os.path.join(arcname, f))
01492 
01493         else:
01494             self.addfile(tarinfo)
01495 
01496     def addfile(self, tarinfo, fileobj=None):
01497         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
01498            given, tarinfo.size bytes are read from it and added to the archive.
01499            You can create TarInfo objects using gettarinfo().
01500            On Windows platforms, `fileobj' should always be opened with mode
01501            'rb' to avoid irritation about the file size.
01502         """
01503         self._check("aw")
01504 
01505         tarinfo = copy.copy(tarinfo)
01506 
01507         buf = tarinfo.tobuf(self.posix)
01508         self.fileobj.write(buf)
01509         self.offset += len(buf)
01510 
01511         # If there's data to follow, append it.
01512         if fileobj is not None:
01513             copyfileobj(fileobj, self.fileobj, tarinfo.size)
01514             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
01515             if remainder > 0:
01516                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
01517                 blocks += 1
01518             self.offset += blocks * BLOCKSIZE
01519 
01520         self.members.append(tarinfo)
01521 
01522     def extractall(self, path=".", members=None):
01523         """Extract all members from the archive to the current working
01524            directory and set owner, modification time and permissions on
01525            directories afterwards. `path' specifies a different directory
01526            to extract to. `members' is optional and must be a subset of the
01527            list returned by getmembers().
01528         """
01529         directories = []
01530 
01531         if members is None:
01532             members = self
01533 
01534         for tarinfo in members:
01535             if tarinfo.isdir():
01536                 # Extract directories with a safe mode.
01537                 directories.append(tarinfo)
01538                 tarinfo = copy.copy(tarinfo)
01539                 tarinfo.mode = 0700
01540             self.extract(tarinfo, path)
01541 
01542         # Reverse sort directories.
01543         directories.sort(lambda a, b: cmp(a.name, b.name))
01544         directories.reverse()
01545 
01546         # Set correct owner, mtime and filemode on directories.
01547         for tarinfo in directories:
01548             dirpath = os.path.join(path, tarinfo.name)
01549             try:
01550                 self.chown(tarinfo, dirpath)
01551                 self.utime(tarinfo, dirpath)
01552                 self.chmod(tarinfo, dirpath)
01553             except ExtractError, e:
01554                 if self.errorlevel > 1:
01555                     raise
01556                 else:
01557                     self._dbg(1, "tarfile: %s" % e)
01558 
01559     def extract(self, member, path=""):
01560         """Extract a member from the archive to the current working directory,
01561            using its full name. Its file information is extracted as accurately
01562            as possible. `member' may be a filename or a TarInfo object. You can
01563            specify a different directory using `path'.
01564         """
01565         self._check("r")
01566 
01567         if isinstance(member, TarInfo):
01568             tarinfo = member
01569         else:
01570             tarinfo = self.getmember(member)
01571 
01572         # Prepare the link target for makelink().
01573         if tarinfo.islnk():
01574             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
01575 
01576         try:
01577             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
01578         except EnvironmentError, e:
01579             if self.errorlevel > 0:
01580                 raise
01581             else:
01582                 if e.filename is None:
01583                     self._dbg(1, "tarfile: %s" % e.strerror)
01584                 else:
01585                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
01586         except ExtractError, e:
01587             if self.errorlevel > 1:
01588                 raise
01589             else:
01590                 self._dbg(1, "tarfile: %s" % e)
01591 
01592     def extractfile(self, member):
01593         """Extract a member from the archive as a file object. `member' may be
01594            a filename or a TarInfo object. If `member' is a regular file, a
01595            file-like object is returned. If `member' is a link, a file-like
01596            object is constructed from the link's target. If `member' is none of
01597            the above, None is returned.
01598            The file-like object is read-only and provides the following
01599            methods: read(), readline(), readlines(), seek() and tell()
01600         """
01601         self._check("r")
01602 
01603         if isinstance(member, TarInfo):
01604             tarinfo = member
01605         else:
01606             tarinfo = self.getmember(member)
01607 
01608         if tarinfo.isreg():
01609             return self.fileobject(self, tarinfo)
01610 
01611         elif tarinfo.type not in SUPPORTED_TYPES:
01612             # If a member's type is unknown, it is treated as a
01613             # regular file.
01614             return self.fileobject(self, tarinfo)
01615 
01616         elif tarinfo.islnk() or tarinfo.issym():
01617             if isinstance(self.fileobj, _Stream):
01618                 # A small but ugly workaround for the case that someone tries
01619                 # to extract a (sym)link as a file-object from a non-seekable
01620                 # stream of tar blocks.
01621                 raise StreamError("cannot extract (sym)link as file object")
01622             else:
01623                 # A (sym)link's file object is its target's file object.
01624                 return self.extractfile(self._getmember(tarinfo.linkname,
01625                                                         tarinfo))
01626         else:
01627             # If there's no data associated with the member (directory, chrdev,
01628             # blkdev, etc.), return None instead of a file object.
01629             return None
01630 
01631     def _extract_member(self, tarinfo, targetpath):
01632         """Extract the TarInfo object tarinfo to a physical
01633            file called targetpath.
01634         """
01635         # Fetch the TarInfo object for the given name
01636         # and build the destination pathname, replacing
01637         # forward slashes to platform specific separators.
01638         if targetpath[-1:] == "/":
01639             targetpath = targetpath[:-1]
01640         targetpath = os.path.normpath(targetpath)
01641 
01642         # Create all upper directories.
01643         upperdirs = os.path.dirname(targetpath)
01644         if upperdirs and not os.path.exists(upperdirs):
01645             # Create directories that are not part of the archive with
01646             # default permissions.
01647             os.makedirs(upperdirs)
01648 
01649         if tarinfo.islnk() or tarinfo.issym():
01650             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
01651         else:
01652             self._dbg(1, tarinfo.name)
01653 
01654         if tarinfo.isreg():
01655             self.makefile(tarinfo, targetpath)
01656         elif tarinfo.isdir():
01657             self.makedir(tarinfo, targetpath)
01658         elif tarinfo.isfifo():
01659             self.makefifo(tarinfo, targetpath)
01660         elif tarinfo.ischr() or tarinfo.isblk():
01661             self.makedev(tarinfo, targetpath)
01662         elif tarinfo.islnk() or tarinfo.issym():
01663             self.makelink(tarinfo, targetpath)
01664         elif tarinfo.type not in SUPPORTED_TYPES:
01665             self.makeunknown(tarinfo, targetpath)
01666         else:
01667             self.makefile(tarinfo, targetpath)
01668 
01669         self.chown(tarinfo, targetpath)
01670         if not tarinfo.issym():
01671             self.chmod(tarinfo, targetpath)
01672             self.utime(tarinfo, targetpath)
01673 
01674     #--------------------------------------------------------------------------
01675     # Below are the different file methods. They are called via
01676     # _extract_member() when extract() is called. They can be replaced in a
01677     # subclass to implement other functionality.
01678 
01679     def makedir(self, tarinfo, targetpath):
01680         """Make a directory called targetpath.
01681         """
01682         try:
01683             # Use a safe mode for the directory, the real mode is set
01684             # later in _extract_member().
01685             os.mkdir(targetpath, 0700)
01686         except EnvironmentError, e:
01687             if e.errno != errno.EEXIST:
01688                 raise
01689 
01690     def makefile(self, tarinfo, targetpath):
01691         """Make a file called targetpath.
01692         """
01693         source = self.extractfile(tarinfo)
01694         target = file(targetpath, "wb")
01695         copyfileobj(source, target)
01696         source.close()
01697         target.close()
01698 
01699     def makeunknown(self, tarinfo, targetpath):
01700         """Make a file from a TarInfo object with an unknown type
01701            at targetpath.
01702         """
01703         self.makefile(tarinfo, targetpath)
01704         self._dbg(1, "tarfile: Unknown file type %r, " \
01705                      "extracted as regular file." % tarinfo.type)
01706 
01707     def makefifo(self, tarinfo, targetpath):
01708         """Make a fifo called targetpath.
01709         """
01710         if hasattr(os, "mkfifo"):
01711             os.mkfifo(targetpath)
01712         else:
01713             raise ExtractError("fifo not supported by system")
01714 
01715     def makedev(self, tarinfo, targetpath):
01716         """Make a character or block device called targetpath.
01717         """
01718         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
01719             raise ExtractError("special devices not supported by system")
01720 
01721         mode = tarinfo.mode
01722         if tarinfo.isblk():
01723             mode |= stat.S_IFBLK
01724         else:
01725             mode |= stat.S_IFCHR
01726 
01727         os.mknod(targetpath, mode,
01728                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
01729 
01730     def makelink(self, tarinfo, targetpath):
01731         """Make a (symbolic) link called targetpath. If it cannot be created
01732           (platform limitation), we try to make a copy of the referenced file
01733           instead of a link.
01734         """
01735         linkpath = tarinfo.linkname
01736         try:
01737             if tarinfo.issym():
01738                 os.symlink(linkpath, targetpath)
01739             else:
01740                 # See extract().
01741                 os.link(tarinfo._link_target, targetpath)
01742         except AttributeError:
01743             if tarinfo.issym():
01744                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
01745                                         linkpath)
01746                 linkpath = normpath(linkpath)
01747 
01748             try:
01749                 self._extract_member(self.getmember(linkpath), targetpath)
01750             except (EnvironmentError, KeyError), e:
01751                 linkpath = os.path.normpath(linkpath)
01752                 try:
01753                     shutil.copy2(linkpath, targetpath)
01754                 except EnvironmentError, e:
01755                     raise IOError("link could not be created")
01756 
01757     def chown(self, tarinfo, targetpath):
01758         """Set owner of targetpath according to tarinfo.
01759         """
01760         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
01761             # We have to be root to do so.
01762             try:
01763                 g = grp.getgrnam(tarinfo.gname)[2]
01764             except KeyError:
01765                 try:
01766                     g = grp.getgrgid(tarinfo.gid)[2]
01767                 except KeyError:
01768                     g = os.getgid()
01769             try:
01770                 u = pwd.getpwnam(tarinfo.uname)[2]
01771             except KeyError:
01772                 try:
01773                     u = pwd.getpwuid(tarinfo.uid)[2]
01774                 except KeyError:
01775                     u = os.getuid()
01776             try:
01777                 if tarinfo.issym() and hasattr(os, "lchown"):
01778                     os.lchown(targetpath, u, g)
01779                 else:
01780                     if sys.platform != "os2emx":
01781                         os.chown(targetpath, u, g)
01782             except EnvironmentError, e:
01783                 raise ExtractError("could not change owner")
01784 
01785     def chmod(self, tarinfo, targetpath):
01786         """Set file permissions of targetpath according to tarinfo.
01787         """
01788         if hasattr(os, 'chmod'):
01789             try:
01790                 os.chmod(targetpath, tarinfo.mode)
01791             except EnvironmentError, e:
01792                 raise ExtractError("could not change mode")
01793 
01794     def utime(self, tarinfo, targetpath):
01795         """Set modification time of targetpath according to tarinfo.
01796         """
01797         if not hasattr(os, 'utime'):
01798             return
01799         if sys.platform == "win32" and tarinfo.isdir():
01800             # According to msdn.microsoft.com, it is an error (EACCES)
01801             # to use utime() on directories.
01802             return
01803         try:
01804             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
01805         except EnvironmentError, e:
01806             raise ExtractError("could not change modification time")
01807 
01808     #--------------------------------------------------------------------------
01809     def next(self):
01810         """Return the next member of the archive as a TarInfo object, when
01811            TarFile is opened for reading. Return None if there is no more
01812            available.
01813         """
01814         self._check("ra")
01815         if self.firstmember is not None:
01816             m = self.firstmember
01817             self.firstmember = None
01818             return m
01819 
01820         # Read the next block.
01821         self.fileobj.seek(self.offset)
01822         while True:
01823             buf = self.fileobj.read(BLOCKSIZE)
01824             if not buf:
01825                 return None
01826 
01827             try:
01828                 tarinfo = TarInfo.frombuf(buf)
01829 
01830                 # Set the TarInfo object's offset to the current position of the
01831                 # TarFile and set self.offset to the position where the data blocks
01832                 # should begin.
01833                 tarinfo.offset = self.offset
01834                 self.offset += BLOCKSIZE
01835 
01836                 tarinfo = self.proc_member(tarinfo)
01837 
01838             except ValueError, e:
01839                 if self.ignore_zeros:
01840                     self._dbg(2, "0x%X: empty or invalid block: %s" %
01841                               (self.offset, e))
01842                     self.offset += BLOCKSIZE
01843                     continue
01844                 else:
01845                     if self.offset == 0:
01846                         raise ReadError("empty, unreadable or compressed "
01847                                         "file: %s" % e)
01848                     return None
01849             break
01850 
01851         # Some old tar programs represent a directory as a regular
01852         # file with a trailing slash.
01853         if tarinfo.isreg() and tarinfo.name.endswith("/"):
01854             tarinfo.type = DIRTYPE
01855 
01856         # Directory names should have a '/' at the end.
01857         if tarinfo.isdir() and not tarinfo.name.endswith("/"):
01858             tarinfo.name += "/"
01859 
01860         self.members.append(tarinfo)
01861         return tarinfo
01862 
01863     #--------------------------------------------------------------------------
01864     # The following are methods that are called depending on the type of a
01865     # member. The entry point is proc_member() which is called with a TarInfo
01866     # object created from the header block from the current offset. The
01867     # proc_member() method can be overridden in a subclass to add custom
01868     # proc_*() methods. A proc_*() method MUST implement the following
01869     # operations:
01870     # 1. Set tarinfo.offset_data to the position where the data blocks begin,
01871     #    if there is data that follows.
01872     # 2. Set self.offset to the position where the next member's header will
01873     #    begin.
01874     # 3. Return tarinfo or another valid TarInfo object.
01875     def proc_member(self, tarinfo):
01876         """Choose the right processing method for tarinfo depending
01877            on its type and call it.
01878         """
01879         if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
01880             return self.proc_gnulong(tarinfo)
01881         elif tarinfo.type == GNUTYPE_SPARSE:
01882             return self.proc_sparse(tarinfo)
01883         else:
01884             return self.proc_builtin(tarinfo)
01885 
01886     def proc_builtin(self, tarinfo):
01887         """Process a builtin type member or an unknown member
01888            which will be treated as a regular file.
01889         """
01890         tarinfo.offset_data = self.offset
01891         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
01892             # Skip the following data blocks.
01893             self.offset += self._block(tarinfo.size)
01894         return tarinfo
01895 
01896     def proc_gnulong(self, tarinfo):
01897         """Process the blocks that hold a GNU longname
01898            or longlink member.
01899         """
01900         buf = ""
01901         count = tarinfo.size
01902         while count > 0:
01903             block = self.fileobj.read(BLOCKSIZE)
01904             buf += block
01905             self.offset += BLOCKSIZE
01906             count -= BLOCKSIZE
01907 
01908         # Fetch the next header and process it.
01909         b = self.fileobj.read(BLOCKSIZE)
01910         t = TarInfo.frombuf(b)
01911         t.offset = self.offset
01912         self.offset += BLOCKSIZE
01913         next = self.proc_member(t)
01914 
01915         # Patch the TarInfo object from the next header with
01916         # the longname information.
01917         next.offset = tarinfo.offset
01918         if tarinfo.type == GNUTYPE_LONGNAME:
01919             next.name = nts(buf)
01920         elif tarinfo.type == GNUTYPE_LONGLINK:
01921             next.linkname = nts(buf)
01922 
01923         return next
01924 
01925     def proc_sparse(self, tarinfo):
01926         """Process a GNU sparse header plus extra headers.
01927         """
01928         buf = tarinfo.buf
01929         sp = _ringbuffer()
01930         pos = 386
01931         lastpos = 0L
01932         realpos = 0L
01933         # There are 4 possible sparse structs in the
01934         # first header.
01935         for i in xrange(4):
01936             try:
01937                 offset = nti(buf[pos:pos + 12])
01938                 numbytes = nti(buf[pos + 12:pos + 24])
01939             except ValueError:
01940                 break
01941             if offset > lastpos:
01942                 sp.append(_hole(lastpos, offset - lastpos))
01943             sp.append(_data(offset, numbytes, realpos))
01944             realpos += numbytes
01945             lastpos = offset + numbytes
01946             pos += 24
01947 
01948         isextended = ord(buf[482])
01949         origsize = nti(buf[483:495])
01950 
01951         # If the isextended flag is given,
01952         # there are extra headers to process.
01953         while isextended == 1:
01954             buf = self.fileobj.read(BLOCKSIZE)
01955             self.offset += BLOCKSIZE
01956             pos = 0
01957             for i in xrange(21):
01958                 try:
01959                     offset = nti(buf[pos:pos + 12])
01960                     numbytes = nti(buf[pos + 12:pos + 24])
01961                 except ValueError:
01962                     break
01963                 if offset > lastpos:
01964                     sp.append(_hole(lastpos, offset - lastpos))
01965                 sp.append(_data(offset, numbytes, realpos))
01966                 realpos += numbytes
01967                 lastpos = offset + numbytes
01968                 pos += 24
01969             isextended = ord(buf[504])
01970 
01971         if lastpos < origsize:
01972             sp.append(_hole(lastpos, origsize - lastpos))
01973 
01974         tarinfo.sparse = sp
01975 
01976         tarinfo.offset_data = self.offset
01977         self.offset += self._block(tarinfo.size)
01978         tarinfo.size = origsize
01979 
01980         return tarinfo
01981 
01982     #--------------------------------------------------------------------------
01983     # Little helper methods:
01984 
01985     def _block(self, count):
01986         """Round up a byte count by BLOCKSIZE and return it,
01987            e.g. _block(834) => 1024.
01988         """
01989         blocks, remainder = divmod(count, BLOCKSIZE)
01990         if remainder:
01991             blocks += 1
01992         return blocks * BLOCKSIZE
01993 
01994     def _getmember(self, name, tarinfo=None):
01995         """Find an archive member by name from bottom to top.
01996            If tarinfo is given, it is used as the starting point.
01997         """
01998         # Ensure that all members have been loaded.
01999         members = self.getmembers()
02000 
02001         if tarinfo is None:
02002             end = len(members)
02003         else:
02004             end = members.index(tarinfo)
02005 
02006         for i in xrange(end - 1, -1, -1):
02007             if name == members[i].name:
02008                 return members[i]
02009 
02010     def _load(self):
02011         """Read through the entire archive file and look for readable
02012            members.
02013         """
02014         while True:
02015             tarinfo = self.next()
02016             if tarinfo is None:
02017                 break
02018         self._loaded = True
02019 
02020     def _check(self, mode=None):
02021         """Check if TarFile is still open, and if the operation's mode
02022            corresponds to TarFile's mode.
02023         """
02024         if self.closed:
02025             raise IOError("%s is closed" % self.__class__.__name__)
02026         if mode is not None and self._mode not in mode:
02027             raise IOError("bad operation for mode %r" % self._mode)
02028 
02029     def __iter__(self):
02030         """Provide an iterator object.
02031         """
02032         if self._loaded:
02033             return iter(self.members)
02034         else:
02035             return TarIter(self)
02036 
02037     def _dbg(self, level, msg):
02038         """Write debugging output to sys.stderr.
02039         """
02040         if level <= self.debug:
02041             print >> sys.stderr, msg
02042 # class TarFile
02043 
02044 class TarIter:
02045     """Iterator Class.
02046 
02047        for tarinfo in TarFile(...):
02048            suite...
02049     """
02050 
02051     def __init__(self, tarfile):
02052         """Construct a TarIter object.
02053         """
02054         self.tarfile = tarfile
02055         self.index = 0
02056     def __iter__(self):
02057         """Return iterator object.
02058         """
02059         return self
02060     def next(self):
02061         """Return the next item using TarFile's next() method.
02062            When all members have been read, set TarFile as _loaded.
02063         """
02064         # Fix for SF #1100429: Under rare circumstances it can
02065         # happen that getmembers() is called during iteration,
02066         # which will cause TarIter to stop prematurely.
02067         if not self.tarfile._loaded:
02068             tarinfo = self.tarfile.next()
02069             if not tarinfo:
02070                 self.tarfile._loaded = True
02071                 raise StopIteration
02072         else:
02073             try:
02074                 tarinfo = self.tarfile.members[self.index]
02075             except IndexError:
02076                 raise StopIteration
02077         self.index += 1
02078         return tarinfo
02079 
02080 # Helper classes for sparse file support
02081 class _section:
02082     """Base class for _data and _hole.
02083     """
02084     def __init__(self, offset, size):
02085         self.offset = offset
02086         self.size = size
02087     def __contains__(self, offset):
02088         return self.offset <= offset < self.offset + self.size
02089 
02090 class _data(_section):
02091     """Represent a data section in a sparse file.
02092     """
02093     def __init__(self, offset, size, realpos):
02094         _section.__init__(self, offset, size)
02095         self.realpos = realpos
02096 
02097 class _hole(_section):
02098     """Represent a hole section in a sparse file.
02099     """
02100     pass
02101 
02102 class _ringbuffer(list):
02103     """Ringbuffer class which increases performance
02104        over a regular list.
02105     """
02106     def __init__(self):
02107         self.idx = 0
02108     def find(self, offset):
02109         idx = self.idx
02110         while True:
02111             item = self[idx]
02112             if offset in item:
02113                 break
02114             idx += 1
02115             if idx == len(self):
02116                 idx = 0
02117             if idx == self.idx:
02118                 # End of File
02119                 return None
02120         self.idx = idx
02121         return item
02122 
02123 #---------------------------------------------
02124 # zipfile compatible TarFile class
02125 #---------------------------------------------
02126 TAR_PLAIN = 0           # zipfile.ZIP_STORED
02127 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
02128 class TarFileCompat:
02129     """TarFile class compatible with standard module zipfile's
02130        ZipFile class.
02131     """
02132     def __init__(self, file, mode="r", compression=TAR_PLAIN):
02133         if compression == TAR_PLAIN:
02134             self.tarfile = TarFile.taropen(file, mode)
02135         elif compression == TAR_GZIPPED:
02136             self.tarfile = TarFile.gzopen(file, mode)
02137         else:
02138             raise ValueError("unknown compression constant")
02139         if mode[0:1] == "r":
02140             members = self.tarfile.getmembers()
02141             for m in members:
02142                 m.filename = m.name
02143                 m.file_size = m.size
02144                 m.date_time = time.gmtime(m.mtime)[:6]
02145     def namelist(self):
02146         return map(lambda m: m.name, self.infolist())
02147     def infolist(self):
02148         return filter(lambda m: m.type in REGULAR_TYPES,
02149                       self.tarfile.getmembers())
02150     def printdir(self):
02151         self.tarfile.list()
02152     def testzip(self):
02153         return
02154     def getinfo(self, name):
02155         return self.tarfile.getmember(name)
02156     def read(self, name):
02157         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
02158     def write(self, filename, arcname=None, compress_type=None):
02159         self.tarfile.add(filename, arcname)
02160     def writestr(self, zinfo, bytes):
02161         try:
02162             from cStringIO import StringIO
02163         except ImportError:
02164             from StringIO import StringIO
02165         import calendar
02166         zinfo.name = zinfo.filename
02167         zinfo.size = zinfo.file_size
02168         zinfo.mtime = calendar.timegm(zinfo.date_time)
02169         self.tarfile.addfile(zinfo, StringIO(bytes))
02170     def close(self):
02171         self.tarfile.close()
02172 #class TarFileCompat
02173 
02174 #--------------------
02175 # exported functions
02176 #--------------------
02177 def is_tarfile(name):
02178     """Return True if name points to a tar archive that we
02179        are able to handle, else return False.
02180     """
02181     try:
02182         t = open(name)
02183         t.close()
02184         return True
02185     except TarError:
02186         return False
02187 
02188 open = TarFile.open