Back to index

obnam  1.1
metadata.py
Go to the documentation of this file.
00001 # Copyright (C) 2009  Lars Wirzenius
00002 #
00003 # This program is free software: you can redistribute it and/or modify
00004 # it under the terms of the GNU General Public License as published by
00005 # the Free Software Foundation, either version 3 of the License, or
00006 # (at your option) any later version.
00007 #
00008 # This program is distributed in the hope that it will be useful,
00009 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 # GNU General Public License for more details.
00012 #
00013 # You should have received a copy of the GNU General Public License
00014 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00015 
00016 
00017 import grp
00018 import os
00019 import pwd
00020 import stat
00021 import struct
00022 
00023 import obnamlib
00024 
00025 
00026 metadata_verify_fields = (
00027     'st_mode', 'st_mtime_sec', 'st_mtime_nsec', 
00028     'st_nlink', 'st_size', 'st_uid', 'groupname', 'username', 'target',
00029     'xattr',
00030 )
00031 metadata_fields = metadata_verify_fields + (
00032     'st_blocks', 'st_dev', 'st_gid', 'st_ino',  'st_atime_sec', 
00033     'st_atime_nsec', 'md5',
00034 )
00035 
00036 
00037 class Metadata(object):
00038 
00039     '''Represent metadata for a filesystem entry.
00040     
00041     The metadata for a filesystem entry (file, directory, device, ...)
00042     consists of its stat(2) result, plus ACL and xattr.
00043     
00044     This class represents them as fields.
00045     
00046     We do not store all stat(2) fields. Here's a commentary on all fields:
00047     
00048         field?          stored? why
00049     
00050         st_atime_sec    yes     mutt compares atime, mtime to see ifmsg is new
00051         st_atime_nsec   yes     mutt compares atime, mtime to see ifmsg is new
00052         st_blksize      no      no way to restore, not useful backed up
00053         st_blocks       yes     should restore create holes in file?
00054         st_ctime        no      no way to restore, not useful backed up
00055         st_dev          yes     used to restore hardlinks
00056         st_gid          yes     used to restore group ownership
00057         st_ino          yes     used to restore hardlinks
00058         st_mode         yes     used to restore permissions
00059         st_mtime_sec    yes     used to restore mtime
00060         st_mtime_nsec   yes     used to restore mtime
00061         st_nlink        yes     used to restore hardlinks
00062         st_rdev         no      no use (correct me if I'm wrong about this)
00063         st_size         yes     user needs it to see size of file in backup
00064         st_uid          yes     used to restored ownership
00065 
00066     The field 'target' stores the target of a symlink.
00067         
00068     Additionally, the fields 'groupname' and 'username' are stored. They
00069     contain the textual names that correspond to st_gid and st_uid. When
00070     restoring, the names will be preferred by default.
00071     
00072     The 'md5' field optionally stores the whole-file checksum for the file.
00073     
00074     The 'xattr' field optionally stores extended attributes encoded as
00075     a binary blob.
00076     
00077     '''
00078     
00079     def __init__(self, **kwargs):
00080         for field in metadata_fields:
00081             setattr(self, field, None)
00082         for field, value in kwargs.iteritems():
00083             setattr(self, field, value)
00084 
00085     def isdir(self):
00086         return self.st_mode is not None and stat.S_ISDIR(self.st_mode)
00087 
00088     def islink(self):
00089         return self.st_mode is not None and stat.S_ISLNK(self.st_mode)
00090 
00091     def isfile(self):
00092         return self.st_mode is not None and stat.S_ISREG(self.st_mode)
00093 
00094     def __repr__(self): # pragma: no cover
00095         fields = ', '.join('%s=%s' % (k, getattr(self, k))
00096                            for k in metadata_fields)
00097         return 'Metadata(%s)' % fields
00098 
00099     def __cmp__(self, other):
00100         for field in metadata_fields:
00101             ours = getattr(self, field)
00102             theirs = getattr(other, field)
00103             if ours == theirs:
00104                 continue
00105             if ours < theirs:
00106                 return -1
00107             if ours > theirs:
00108                 return +1
00109         return 0
00110 
00111 
00112 # Caching versions of username/groupname lookups.
00113 # These work on the assumption that the mappings from uid/gid do not
00114 # change during the runtime of the backup.
00115 
00116 _uid_to_username = {}
00117 def _cached_getpwuid(uid): # pragma: no cover
00118     if uid not in _uid_to_username:
00119         _uid_to_username[uid] = pwd.getpwuid(uid)
00120     return _uid_to_username[uid]
00121     
00122 _gid_to_groupname = {}
00123 def _cached_getgrgid(gid): # pragma: no cover
00124     if gid not in _gid_to_groupname:
00125         _gid_to_groupname[gid] = grp.getgrgid(gid)
00126     return _gid_to_groupname[gid]
00127 
00128 
00129 def get_xattrs_as_blob(fs, filename): # pragma: no cover
00130     names = fs.llistxattr(filename)
00131     if not names:
00132         return None
00133     values = [fs.lgetxattr(filename, name) for name in names]
00134 
00135     name_blob = ''.join('%s\0' % name for name in names)
00136 
00137     lengths = [len(v) for v in values]
00138     fmt = '!' + 'Q' * len(values)
00139     value_blob = struct.pack(fmt, *lengths) + ''.join(values)
00140 
00141     return ('%s%s%s' % 
00142             (struct.pack('!Q', len(name_blob)),
00143              name_blob,
00144              value_blob))
00145 
00146 
00147 def set_xattrs_from_blob(fs, filename, blob): # pragma: no cover
00148     sizesize = struct.calcsize('!Q')
00149     name_blob_size = struct.unpack('!Q', blob[:sizesize])[0]
00150     name_blob = blob[sizesize : sizesize + name_blob_size]
00151     value_blob = blob[sizesize + name_blob_size : ]
00152 
00153     names = [s for s in name_blob.split('\0')[:-1]]
00154     fmt = '!' + 'Q' * len(names)
00155     lengths_size = sizesize * len(names)
00156     lengths = struct.unpack(fmt, value_blob[:lengths_size])
00157     
00158     pos = lengths_size
00159     for i, name in enumerate(names):
00160         value = value_blob[pos:pos + lengths[i]]
00161         pos += lengths[i]
00162         fs.lsetxattr(filename, name, value)
00163 
00164 
00165 def read_metadata(fs, filename, st=None, getpwuid=None, getgrgid=None):
00166     '''Return object detailing metadata for a filesystem entry.'''
00167     metadata = Metadata()
00168     stat_result = st or fs.lstat(filename)
00169     for field in metadata_fields:
00170         if field.startswith('st_') and hasattr(stat_result, field):
00171             setattr(metadata, field, getattr(stat_result, field))
00172 
00173     if stat.S_ISLNK(stat_result.st_mode):
00174         metadata.target = fs.readlink(filename)
00175     else:
00176         metadata.target = ''
00177 
00178     getgrgid = getgrgid or _cached_getgrgid
00179     try:
00180         metadata.groupname = getgrgid(metadata.st_gid)[0]
00181     except KeyError:
00182         metadata.groupname = None
00183 
00184     getpwuid = getpwuid or _cached_getpwuid
00185     try:
00186         metadata.username = getpwuid(metadata.st_uid)[0]
00187     except KeyError:
00188         metadata.username = None
00189 
00190     metadata.xattr = get_xattrs_as_blob(fs, filename)
00191 
00192     return metadata
00193 
00194 
00195 def set_metadata(fs, filename, metadata, getuid=None):
00196     '''Set metadata for a filesystem entry.
00197 
00198     We only set metadata that can sensibly be set: st_atime, st_mode,
00199     st_mtime. We also attempt to set ownership (st_gid, st_uid), but
00200     only if we're running as root. We ignore the username, groupname
00201     fields: we assume the caller will change st_uid, st_gid accordingly
00202     if they want to mess with things. This makes the user take care
00203     of error situations and looking up user preferences.
00204     
00205     '''
00206 
00207     if stat.S_ISLNK(metadata.st_mode):
00208         fs.symlink(metadata.target, filename)
00209     else:
00210         fs.chmod(filename, metadata.st_mode)
00211 
00212     if metadata.xattr: # pragma: no cover
00213         set_xattrs_from_blob(fs, filename, metadata.xattr)
00214 
00215     fs.lutimes(filename, metadata.st_atime_sec, metadata.st_atime_nsec, 
00216                metadata.st_mtime_sec, metadata.st_mtime_nsec)
00217 
00218     getuid = getuid or os.getuid
00219     if getuid() == 0:
00220         fs.lchown(filename, metadata.st_uid, metadata.st_gid)
00221     
00222     
00223 metadata_format = struct.Struct('!Q' +  # flags
00224                                 'Q' +   # st_mode
00225                                 'qQ' +  # st_mtime_sec and _nsec
00226                                 'qQ' +  # st_atime_sec and _nsec
00227                                 'Q' +   # st_nlink
00228                                 'Q' +   # st_size
00229                                 'Q' +   # st_uid
00230                                 'Q' +   # st_gid
00231                                 'Q' +   # st_dev
00232                                 'Q' +   # st_ino
00233                                 'Q' +   # st_blocks
00234                                 'Q' +   # len of groupname
00235                                 'Q' +   # len of username
00236                                 'Q' +   # len of symlink target
00237                                 'Q' +   # len of md5
00238                                 'Q' +   # len of xattr
00239                                 '')
00240 
00241 def encode_metadata(metadata):
00242     flags = 0
00243     for i, name in enumerate(obnamlib.metadata_fields):
00244         if getattr(metadata, name) is not None:
00245             flags |= (1 << i)
00246 
00247     packed = metadata_format.pack(flags,
00248                                   metadata.st_mode or 0,
00249                                   metadata.st_mtime_sec or 0,
00250                                   metadata.st_mtime_nsec or 0,
00251                                   metadata.st_atime_sec or 0,
00252                                   metadata.st_atime_nsec or 0,
00253                                   metadata.st_nlink or 0,
00254                                   metadata.st_size or 0,
00255                                   metadata.st_uid or 0,
00256                                   metadata.st_gid or 0,
00257                                   metadata.st_dev or 0,
00258                                   metadata.st_ino or 0,
00259                                   metadata.st_blocks or 0,
00260                                   len(metadata.groupname or ''),
00261                                   len(metadata.username or ''),
00262                                   len(metadata.target or ''),
00263                                   len(metadata.md5 or ''),
00264                                   len(metadata.xattr or ''))
00265     return (packed + 
00266              (metadata.groupname or '') +
00267              (metadata.username or '') +
00268              (metadata.target or '') +
00269              (metadata.md5 or '') +
00270              (metadata.xattr or ''))
00271 
00272 
00273 def decode_metadata(encoded):
00274 
00275     items = metadata_format.unpack_from(encoded)
00276     flags = items[0]
00277     pos = [1, metadata_format.size]
00278     metadata = obnamlib.Metadata()
00279     
00280     def is_present(field):
00281         i = obnamlib.metadata_fields.index(field)
00282         return (flags & (1 << i)) != 0
00283 
00284     def decode(field, num_items, inc_offset, getvalue):
00285         if is_present(field):
00286             value = getvalue(pos[0], pos[1])
00287             setattr(metadata, field, value)
00288             if inc_offset:
00289                 pos[1] += len(value)
00290         pos[0] += num_items
00291 
00292     def decode_integer(field):
00293         decode(field, 1, False, lambda i, o: items[i])
00294 
00295     def decode_string(field):
00296         decode(field, 1, True, lambda i, o: encoded[o:o + items[i]])
00297     
00298     decode_integer('st_mode')
00299     decode_integer('st_mtime_sec')
00300     decode_integer('st_mtime_nsec')
00301     decode_integer('st_atime_sec')
00302     decode_integer('st_atime_nsec')
00303     decode_integer('st_nlink')
00304     decode_integer('st_size')
00305     decode_integer('st_uid')
00306     decode_integer('st_gid')
00307     decode_integer('st_dev')
00308     decode_integer('st_ino')
00309     decode_integer('st_blocks')
00310     decode_string('groupname')
00311     decode_string('username')
00312     decode_string('target')
00313     decode_string('md5')
00314     decode_string('xattr')
00315     
00316     return metadata
00317