Back to index

obnam  1.1
repo.py
Go to the documentation of this file.
00001 # Copyright (C) 2009-2011  Lars Wirzenius
00002 #
00003 # This program is free software: you can redistribute it and/or modify
00004 # it under the terms of the GNU General Public License as published by
00005 # the Free Software Foundation, either version 3 of the License, or
00006 # (at your option) any later version.
00007 #
00008 # This program is distributed in the hope that it will be useful,
00009 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 # GNU General Public License for more details.
00012 #
00013 # You should have received a copy of the GNU General Public License
00014 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00015 
00016 
00017 import errno
00018 import hashlib
00019 import larch
00020 import logging
00021 import os
00022 import random
00023 import re
00024 import stat
00025 import struct
00026 import time
00027 import tracing
00028 
00029 import obnamlib
00030 
00031 
00032 class LockFail(obnamlib.Error):
00033 
00034     pass
00035 
00036 
00037 class BadFormat(obnamlib.Error):
00038 
00039     pass
00040 
00041 
00042 class HookedFS(object):
00043 
00044     '''A class to filter read/written data through hooks.'''
00045     
00046     def __init__(self, repo, fs, hooks):
00047         self.repo = repo
00048         self.fs = fs
00049         self.hooks = hooks
00050         
00051     def __getattr__(self, name):
00052         return getattr(self.fs, name)
00053         
00054     def _get_toplevel(self, filename):
00055         parts = filename.split(os.sep)
00056         if len(parts) > 1:
00057             return parts[0]
00058         else: # pragma: no cover
00059             raise obnamlib.Error('File at repository root: %s' % filename)
00060         
00061     def cat(self, filename, runfilters=True):
00062         data = self.fs.cat(filename)
00063         toplevel = self._get_toplevel(filename)
00064         if not runfilters:
00065             return data
00066         return self.hooks.filter_read('repository-data', data,
00067                                       repo=self.repo, toplevel=toplevel)
00068 
00069     def lock(self, filename, data):
00070         self.fs.lock(filename, data)
00071 
00072     def write_file(self, filename, data, runfilters=True):
00073         tracing.trace('writing hooked %s' % filename)
00074         toplevel = self._get_toplevel(filename)
00075         if runfilters:
00076             data = self.hooks.filter_write('repository-data', data,
00077                                            repo=self.repo, toplevel=toplevel)
00078         self.fs.write_file(filename, data)
00079         
00080     def overwrite_file(self, filename, data, runfilters=True):
00081         tracing.trace('overwriting hooked %s' % filename)
00082         toplevel = self._get_toplevel(filename)
00083         if runfilters:
00084             data = self.hooks.filter_write('repository-data', data,
00085                                            repo=self.repo, toplevel=toplevel)
00086         self.fs.overwrite_file(filename, data)
00087         
00088 
00089 class Repository(object):
00090 
00091     '''Repository for backup data.
00092     
00093     Backup data is put on a virtual file system
00094     (obnamlib.VirtualFileSystem instance), in some form that
00095     the API of this class does not care about.
00096     
00097     The repository may contain data for several clients that share 
00098     encryption keys. Each client is identified by a name.
00099     
00100     The repository has a "root" object, which is conceptually a list of
00101     client names.
00102     
00103     Each client in turn is conceptually a list of generations,
00104     which correspond to snapshots of the user data that existed
00105     when the generation was created.
00106     
00107     Read-only access to the repository does not require locking.
00108     Write access may affect only the root object, or only a client's
00109     own data, and thus locking may affect only the root, or only
00110     the client.
00111     
00112     When a new generation is started, it is a copy-on-write clone
00113     of the previous generation, and the caller needs to modify
00114     the new generation to match the current state of user data.
00115     
00116     The file 'metadata/format' at the root of the repository contains the
00117     version of the repository format it uses. The version is
00118     specified using a single integer.
00119 
00120     '''
00121     
00122     format_version = 6
00123 
00124     def __init__(self, fs, node_size, upload_queue_size, lru_size, hooks,
00125                  idpath_depth, idpath_bits, idpath_skip, current_time,
00126                  lock_timeout, client_name):
00127 
00128         self.current_time = current_time
00129         self.setup_hooks(hooks or obnamlib.HookManager())
00130         self.fs = HookedFS(self, fs, self.hooks)
00131         self.node_size = node_size
00132         self.upload_queue_size = upload_queue_size
00133         self.lru_size = lru_size
00134         
00135         hider = hashlib.md5()
00136         hider.update(client_name)
00137 
00138         self.lockmgr = obnamlib.LockManager(self.fs, lock_timeout, 
00139                                             hider.hexdigest())
00140 
00141         self.got_root_lock = False
00142         self._open_client_list()
00143         self.got_shared_lock = False
00144         self.got_client_lock = False
00145         self.current_client = None
00146         self.current_client_id = None
00147         self.new_generation = None
00148         self.added_clients = []
00149         self.removed_clients = []
00150         self.removed_generations = []
00151         self.client = None
00152         self._open_shared()
00153         self.prev_chunkid = None
00154         self.chunk_idpath = larch.IdPath('chunks', idpath_depth, 
00155                                          idpath_bits, idpath_skip)
00156         self._chunks_exists = False
00157 
00158     def _open_client_list(self):
00159         self.clientlist = obnamlib.ClientList(self.fs, self.node_size, 
00160                                               self.upload_queue_size, 
00161                                               self.lru_size, self)
00162 
00163     def _open_shared(self):
00164         self.chunklist = obnamlib.ChunkList(self.fs, self.node_size, 
00165                                             self.upload_queue_size, 
00166                                             self.lru_size, self)
00167         self.chunksums = obnamlib.ChecksumTree(self.fs, 'chunksums', 
00168                                                len(self.checksum('')),
00169                                                self.node_size, 
00170                                                self.upload_queue_size, 
00171                                                self.lru_size, self)
00172 
00173     def setup_hooks(self, hooks):
00174         self.hooks = hooks
00175         
00176         self.hooks.new('repository-toplevel-init')
00177         self.hooks.new_filter('repository-data')
00178         self.hooks.new('repository-add-client')
00179         
00180     def checksum(self, data):
00181         '''Return checksum of data.
00182         
00183         The checksum is (currently) MD5.
00184         
00185         '''
00186 
00187         checksummer = self.new_checksummer()
00188         checksummer.update(data)
00189         return checksummer.hexdigest()
00190 
00191     def new_checksummer(self):
00192         '''Return a new checksum algorithm.'''
00193         return hashlib.md5()
00194 
00195     def acceptable_version(self, version):
00196         '''Are we compatible with on-disk format?'''
00197         return self.format_version == version
00198 
00199     def client_dir(self, client_id):
00200         '''Return name of sub-directory for a given client.'''
00201         return str(client_id)
00202 
00203     def list_clients(self):
00204         '''Return list of names of clients using this repository.'''
00205 
00206         self.check_format_version()
00207         listed = set(self.clientlist.list_clients())
00208         added = set(self.added_clients)
00209         removed = set(self.removed_clients)
00210         clients = listed.union(added).difference(removed)
00211         return list(clients)
00212 
00213     def require_root_lock(self):
00214         '''Ensure we have the lock on the repository's root node.'''
00215         if not self.got_root_lock:
00216             raise LockFail('have not got lock on root node')
00217 
00218     def require_shared_lock(self):
00219         '''Ensure we have the lock on the shared B-trees except clientlist.'''
00220         if not self.got_shared_lock:
00221             raise LockFail('have not got lock on shared B-trees')
00222 
00223     def require_client_lock(self):
00224         '''Ensure we have the lock on the currently open client.'''
00225         if not self.got_client_lock:
00226             raise LockFail('have not got lock on client')
00227 
00228     def require_open_client(self):
00229         '''Ensure we have opened the client (r/w or r/o).'''
00230         if self.current_client is None:
00231             raise obnamlib.Error('client is not open')
00232 
00233     def require_started_generation(self):
00234         '''Ensure we have started a new generation.'''
00235         if self.new_generation is None:
00236             raise obnamlib.Error('new generation has not started')
00237 
00238     def require_no_root_lock(self):
00239         '''Ensure we haven't locked root yet.'''
00240         if self.got_root_lock:
00241             raise obnamlib.Error('We have already locked root, oops')
00242 
00243     def require_no_shared_lock(self):
00244         '''Ensure we haven't locked shared B-trees yet.'''
00245         if self.got_shared_lock:
00246             raise obnamlib.Error('We have already locked shared B-trees, oops')
00247 
00248     def require_no_client_lock(self):
00249         '''Ensure we haven't locked the per-client B-tree yet.'''
00250         if self.got_client_lock:
00251             raise obnamlib.Error('We have already locked the client, oops')
00252 
00253     def lock_root(self):
00254         '''Lock root node.
00255         
00256         Raise obnamlib.LockFail if locking fails. Lock will be released
00257         by commit_root() or unlock_root().
00258         
00259         '''
00260 
00261         tracing.trace('locking root')
00262         self.require_no_root_lock()
00263         self.require_no_client_lock()
00264         self.require_no_shared_lock()
00265 
00266         self.lockmgr.lock(['.'])
00267         self.check_format_version()
00268         self.got_root_lock = True
00269         self.added_clients = []
00270         self.removed_clients = []
00271         self._write_format_version(self.format_version)
00272         self.clientlist.start_changes()
00273 
00274     def unlock_root(self):
00275         '''Unlock root node without committing changes made.'''
00276         tracing.trace('unlocking root')
00277         self.require_root_lock()
00278         self.added_clients = []
00279         self.removed_clients = []
00280         self.lockmgr.unlock(['.'])
00281         self.got_root_lock = False
00282         self._open_client_list()
00283         
00284     def commit_root(self):
00285         '''Commit changes to root node, and unlock it.'''
00286         tracing.trace('committing root')
00287         self.require_root_lock()
00288         for client_name in self.added_clients:
00289             self.clientlist.add_client(client_name)
00290             self.hooks.call('repository-add-client', 
00291                             self.clientlist, client_name)
00292         self.added_clients = []
00293         for client_name in self.removed_clients:
00294             client_id = self.clientlist.get_client_id(client_name)
00295             client_dir = self.client_dir(client_id)
00296             if client_id is not None and self.fs.exists(client_dir):
00297                 self.fs.rmtree(client_dir)
00298             self.clientlist.remove_client(client_name)
00299         self.clientlist.commit()
00300         self.unlock_root()
00301         
00302     def get_format_version(self):
00303         '''Return (major, minor) of the on-disk format version.
00304         
00305         If on-disk repository does not have a version yet, return None.
00306         
00307         '''
00308         
00309         if self.fs.exists('metadata/format'):
00310             data = self.fs.cat('metadata/format', runfilters=False)
00311             lines = data.splitlines()
00312             line = lines[0]
00313             try:
00314                 version = int(line)
00315             except ValueError, e: # pragma: no cover
00316                 msg = ('Invalid repository format version (%s) -- '
00317                             'forgot encryption?' %
00318                        repr(line))
00319                 raise obnamlib.Error(msg)
00320             return version
00321         else:
00322             return None
00323         
00324     def _write_format_version(self, version):
00325         '''Write the desired format version to the repository.'''
00326         tracing.trace('write format version')
00327         if not self.fs.exists('metadata'):
00328             self.fs.mkdir('metadata')
00329         self.fs.overwrite_file('metadata/format', '%s\n' % version,
00330                                runfilters=False)
00331 
00332     def check_format_version(self):
00333         '''Verify that on-disk format version is compatbile.
00334         
00335         If not, raise BadFormat.
00336         
00337         '''
00338         
00339         on_disk = self.get_format_version()
00340         if on_disk is not None and not self.acceptable_version(on_disk):
00341             raise BadFormat('On-disk repository format %s is incompatible '
00342                             'with program format %s; you need to use a '
00343                             'different version of Obnam' %
00344                                 (on_disk, self.format_version))
00345         
00346     def add_client(self, client_name):
00347         '''Add a new client to the repository.'''
00348         tracing.trace('client_name=%s', client_name)
00349         self.require_root_lock()
00350         if client_name in self.list_clients():
00351             raise obnamlib.Error('client %s already exists in repository' % 
00352                                  client_name)
00353         self.added_clients.append(client_name)
00354         
00355     def remove_client(self, client_name):
00356         '''Remove a client from the repository.
00357         
00358         This removes all data related to the client, including all
00359         actual file data unless other clients also use it.
00360         
00361         '''
00362         
00363         tracing.trace('client_name=%s', client_name)
00364         self.require_root_lock()
00365         if client_name not in self.list_clients():
00366             raise obnamlib.Error('client %s does not exist' % client_name)
00367         self.removed_clients.append(client_name)
00368 
00369     @property
00370     def shared_dirs(self):
00371         return [self.chunklist.dirname, self.chunksums.dirname,
00372                 self.chunk_idpath.dirname]
00373         
00374     def lock_shared(self):
00375         '''Lock a client for exclusive write access.
00376         
00377         Raise obnamlib.LockFail if locking fails. Lock will be released
00378         by commit_client() or unlock_client().
00379 
00380         '''
00381 
00382         tracing.trace('locking shared')
00383         self.require_no_shared_lock()
00384         self.check_format_version()
00385         self.lockmgr.lock(self.shared_dirs)
00386         self.got_shared_lock = True
00387         tracing.trace('starting changes in chunksums and chunklist')
00388         self.chunksums.start_changes()
00389         self.chunklist.start_changes()
00390         
00391         # Initialize the chunks directory for encryption, etc, if it just
00392         # got created.
00393         dirname = self.chunk_idpath.dirname
00394         filenames = self.fs.listdir(dirname)
00395         if filenames == [] or filenames == ['lock']:
00396             self.hooks.call('repository-toplevel-init', self, dirname)
00397 
00398 
00399     def commit_shared(self):
00400         '''Commit changes to shared B-trees.'''
00401         
00402         tracing.trace('committing shared')
00403         self.require_shared_lock()
00404         self.chunklist.commit()
00405         self.chunksums.commit()
00406         self.unlock_shared()
00407 
00408     def unlock_shared(self):
00409         '''Unlock currently locked shared B-trees.'''
00410         tracing.trace('unlocking shared')
00411         self.require_shared_lock()
00412         self.lockmgr.unlock(self.shared_dirs)
00413         self.got_shared_lock = False
00414         self._open_shared()
00415         
00416     def lock_client(self, client_name):
00417         '''Lock a client for exclusive write access.
00418         
00419         Raise obnamlib.LockFail if locking fails. Lock will be released
00420         by commit_client() or unlock_client().
00421 
00422         '''
00423 
00424         tracing.trace('client_name=%s', client_name)
00425         self.require_no_client_lock()
00426         self.require_no_shared_lock()
00427         
00428         self.check_format_version()
00429         client_id = self.clientlist.get_client_id(client_name)
00430         if client_id is None:
00431             raise LockFail('client %s does not exist' % client_name)
00432 
00433         client_dir = self.client_dir(client_id)
00434         if not self.fs.exists(client_dir):
00435             self.fs.mkdir(client_dir)
00436             self.hooks.call('repository-toplevel-init', self, client_dir)
00437 
00438         self.lockmgr.lock([client_dir])
00439         self.got_client_lock = True
00440         self.current_client = client_name
00441         self.current_client_id = client_id
00442         self.added_generations = []
00443         self.removed_generations = []
00444         self.client = obnamlib.ClientMetadataTree(self.fs, client_dir, 
00445                                                   self.node_size,
00446                                                   self.upload_queue_size, 
00447                                                   self.lru_size, self)
00448         self.client.init_forest()
00449 
00450     def unlock_client(self):
00451         '''Unlock currently locked client, without committing changes.'''
00452         tracing.trace('unlocking client')
00453         self.require_client_lock()
00454         self.new_generation = None
00455         self._really_remove_generations(self.added_generations)
00456         self.lockmgr.unlock([self.client.dirname])
00457         self.client = None # FIXME: This should remove uncommitted data.
00458         self.added_generations = []
00459         self.removed_generations = []
00460         self.got_client_lock = False
00461         self.current_client = None
00462         self.current_client_id = None
00463 
00464     def commit_client(self, checkpoint=False):
00465         '''Commit changes to and unlock currently locked client.'''
00466         tracing.trace('committing client (checkpoint=%s)', checkpoint)
00467         self.require_client_lock()
00468         self.require_shared_lock()
00469         commit_client = self.new_generation or self.removed_generations
00470         if self.new_generation:
00471             self.client.set_current_generation_is_checkpoint(checkpoint)
00472         self.added_generations = []
00473         self._really_remove_generations(self.removed_generations)
00474         if commit_client:
00475             self.client.commit()
00476         self.unlock_client()
00477         
00478     def open_client(self, client_name):
00479         '''Open a client for read-only operation.'''
00480         tracing.trace('open r/o client_name=%s' % client_name)
00481         self.check_format_version()
00482         client_id = self.clientlist.get_client_id(client_name)
00483         if client_id is None:
00484             raise obnamlib.Error('%s is not an existing client' % client_name)
00485         self.current_client = client_name
00486         self.current_client_id = client_id
00487         client_dir = self.client_dir(client_id)
00488         self.client = obnamlib.ClientMetadataTree(self.fs, client_dir, 
00489                                                   self.node_size, 
00490                                                   self.upload_queue_size, 
00491                                                   self.lru_size, self)
00492         self.client.init_forest()
00493         
00494     def list_generations(self):
00495         '''List existing generations for currently open client.'''
00496         self.require_open_client()
00497         return self.client.list_generations()
00498         
00499     def get_is_checkpoint(self, genid):
00500         '''Is a generation a checkpoint one?'''
00501         self.require_open_client()
00502         return self.client.get_is_checkpoint(genid)
00503         
00504     def start_generation(self):
00505         '''Start a new generation.
00506         
00507         The new generation is a copy-on-write clone of the previous
00508         one (or empty, if first generation).
00509         
00510         '''
00511         tracing.trace('start new generation')
00512         self.require_client_lock()
00513         if self.new_generation is not None:
00514             raise obnamlib.Error('Cannot start two new generations')
00515         self.client.start_generation()
00516         self.new_generation = \
00517             self.client.get_generation_id(self.client.tree)
00518         self.added_generations.append(self.new_generation)
00519         return self.new_generation
00520 
00521     def _really_remove_generations(self, remove_genids):
00522         '''Really remove a list of generations.
00523         
00524         This is not part of the public API.
00525         
00526         This does not make any safety checks.
00527         
00528         '''
00529 
00530         def find_chunkids_in_gens(genids):
00531             chunkids = set()
00532             for genid in genids:
00533                 x = self.client.list_chunks_in_generation(genid)
00534                 chunkids = chunkids.union(set(x))
00535             return chunkids
00536 
00537         def find_gens_to_keep():
00538             return [genid
00539                     for genid in self.list_generations()
00540                     if genid not in remove_genids]
00541 
00542         def remove_chunks(chunk_ids):
00543             for chunk_id in chunk_ids:
00544                 try:
00545                     checksum = self.chunklist.get_checksum(chunk_id)
00546                 except KeyError:
00547                     # No checksum, therefore it can't be shared, therefore
00548                     # we can remove it.
00549                     self.remove_chunk(chunk_id)
00550                 else:
00551                     self.chunksums.remove(checksum, chunk_id, 
00552                                           self.current_client_id)
00553                     if not self.chunksums.chunk_is_used(checksum, chunk_id):
00554                         self.remove_chunk(chunk_id)
00555 
00556         def remove_gens(genids):
00557             if self.new_generation is None:
00558                 self.client.start_changes(create_tree=False)
00559             for genid in genids:
00560                 self.client.remove_generation(genid)
00561 
00562         if not remove_genids:
00563             return
00564 
00565         self.require_client_lock()
00566         self.require_shared_lock()
00567 
00568         maybe_remove = find_chunkids_in_gens(remove_genids)
00569         keep_genids = find_gens_to_keep()
00570         keep = find_chunkids_in_gens(keep_genids)
00571         remove = maybe_remove.difference(keep)
00572         remove_chunks(remove)
00573         remove_gens(remove_genids)
00574 
00575     def remove_generation(self, gen):
00576         '''Remove a committed generation.'''
00577         self.require_client_lock()
00578         if gen == self.new_generation:
00579             raise obnamlib.Error('cannot remove started generation')
00580         self.removed_generations.append(gen)
00581 
00582     def get_generation_times(self, gen):
00583         '''Return start and end times of a generation.
00584         
00585         An unfinished generation has no end time, so None is returned.
00586         
00587         '''
00588 
00589         self.require_open_client()
00590         return self.client.get_generation_times(gen)
00591 
00592     def listdir(self, gen, dirname):
00593         '''Return list of basenames in a directory within generation.'''
00594         self.require_open_client()
00595         return self.client.listdir(gen, dirname)
00596         
00597     def get_metadata(self, gen, filename):
00598         '''Return metadata for a file in a generation.'''
00599 
00600         self.require_open_client()
00601         try:
00602             encoded = self.client.get_metadata(gen, filename)
00603         except KeyError:
00604             raise obnamlib.Error('%s does not exist' % filename)
00605         return obnamlib.decode_metadata(encoded)
00606 
00607     def create(self, filename, metadata):
00608         '''Create a new (empty) file in the new generation.'''
00609         self.require_started_generation()
00610         encoded = obnamlib.encode_metadata(metadata)
00611         self.client.create(filename, encoded)
00612 
00613     def remove(self, filename):
00614         '''Remove file or directory or directory tree from generation.'''
00615         self.require_started_generation()
00616         self.client.remove(filename)
00617 
00618     def _chunk_filename(self, chunkid):
00619         return self.chunk_idpath.convert(chunkid)
00620 
00621     def put_chunk_only(self, data):
00622         '''Put chunk of data into repository.
00623         
00624         If the same data is already in the repository, it will be put there
00625         a second time. It is the caller's responsibility to check
00626         that the data is not already in the repository.
00627         
00628         Return the unique identifier of the new chunk.
00629         
00630         '''
00631         
00632         def random_chunkid():
00633             return random.randint(0, obnamlib.MAX_ID)
00634         
00635         self.require_started_generation()
00636 
00637         if self.prev_chunkid is None:
00638             self.prev_chunkid = random_chunkid()
00639 
00640         while True:
00641             chunkid = (self.prev_chunkid + 1) % obnamlib.MAX_ID
00642             filename = self._chunk_filename(chunkid)
00643             try:
00644                 self.fs.write_file(filename, data)
00645             except OSError, e: # pragma: no cover
00646                 if e.errno == errno.EEXIST:
00647                     self.prev_chunkid = random_chunkid()
00648                     continue
00649                 raise
00650             else:
00651                 tracing.trace('chunkid=%s', chunkid)
00652                 break
00653 
00654         self.prev_chunkid = chunkid
00655         return chunkid
00656 
00657     def put_chunk_in_shared_trees(self, chunkid, checksum):
00658         '''Put the chunk into the shared trees.
00659         
00660         The chunk is assumed to already exist in the repository, so we
00661         just need to add it to the shared trees that map chunkids to
00662         checksums and checksums to chunkids.
00663         
00664         '''
00665 
00666         tracing.trace('chunkid=%s', chunkid)
00667         tracing.trace('checksum=%s', repr(checksum))
00668 
00669         self.require_started_generation()
00670         self.require_shared_lock()
00671 
00672         self.chunklist.add(chunkid, checksum)
00673         self.chunksums.add(checksum, chunkid, self.current_client_id)
00674         
00675     def get_chunk(self, chunkid):
00676         '''Return data of chunk with given id.'''
00677         self.require_open_client()
00678         return self.fs.cat(self._chunk_filename(chunkid))
00679         
00680     def chunk_exists(self, chunkid):
00681         '''Does a chunk exist in the repository?'''
00682         self.require_open_client()
00683         return self.fs.exists(self._chunk_filename(chunkid))
00684         
00685     def find_chunks(self, checksum):
00686         '''Return identifiers of chunks with given checksum.
00687         
00688         Because of hash collisions, the list may be longer than one.
00689         
00690         '''
00691 
00692         self.require_open_client()
00693         return self.chunksums.find(checksum)
00694 
00695     def list_chunks(self):
00696         '''Return list of ids of all chunks in repository.'''
00697         result = []
00698         pat = re.compile(r'^.*/.*/[0-9a-fA-F]+$')
00699         if self.fs.exists('chunks'):
00700             for pathname, st in self.fs.scan_tree('chunks'):
00701                 if stat.S_ISREG(st.st_mode) and pat.match(pathname):
00702                     basename = os.path.basename(pathname)
00703                     result.append(int(basename, 16))
00704         return result
00705 
00706     def remove_chunk(self, chunk_id):
00707         '''Remove a chunk from the repository.
00708         
00709         Note that this does _not_ remove the chunk from the chunk
00710         checksum forest. The caller is not supposed to call us until
00711         the chunk is not there anymore.
00712         
00713         However, it does remove the chunk from the chunk list forest.
00714         
00715         '''
00716 
00717         tracing.trace('chunk_id=%s', chunk_id)
00718         self.require_open_client()
00719         self.require_shared_lock()
00720         self.chunklist.remove(chunk_id)
00721         filename = self._chunk_filename(chunk_id)
00722         try:
00723             self.fs.remove(filename)
00724         except OSError:
00725             pass
00726 
00727     def get_file_chunks(self, gen, filename):
00728         '''Return list of ids of chunks belonging to a file.'''
00729         self.require_open_client()
00730         return self.client.get_file_chunks(gen, filename)
00731 
00732     def set_file_chunks(self, filename, chunkids):
00733         '''Set ids of chunks belonging to a file.
00734         
00735         File must be in the started generation.
00736         
00737         '''
00738         
00739         self.require_started_generation()
00740         self.client.set_file_chunks(filename, chunkids)
00741 
00742     def append_file_chunks(self, filename, chunkids):
00743         '''Append to list of ids of chunks belonging to a file.
00744         
00745         File must be in the started generation.
00746         
00747         '''
00748         
00749         self.require_started_generation()
00750         self.client.append_file_chunks(filename, chunkids)
00751 
00752     def set_file_data(self, filename, contents): # pragma: no cover
00753         '''Store contents of file in B-tree instead of chunks dir.'''
00754         self.require_started_generation()
00755         self.client.set_file_data(filename, contents)
00756 
00757     def get_file_data(self, gen, filename): # pragma: no cover
00758         '''Returned contents of file stored in B-tree instead of chunks dir.'''
00759         self.require_open_client()
00760         return self.client.get_file_data(gen, filename)
00761         
00762     def genspec(self, spec):
00763         '''Interpret a generation specification.'''
00764 
00765         self.require_open_client()
00766         gens = self.list_generations()
00767         if not gens:
00768             raise obnamlib.Error('No generations')
00769         if spec == 'latest':
00770             return gens[-1]
00771         else:
00772             try:
00773                 intspec = int(spec)
00774             except ValueError:
00775                 raise obnamlib.Error('Generation %s is not an integer' % spec)
00776             if intspec in gens:
00777                 return intspec
00778             else:
00779                 raise obnamlib.Error('Generation %s not found' % spec)
00780 
00781     def walk(self, gen, arg, depth_first=False):
00782         '''Iterate over each pathname specified by argument.
00783         
00784         This is a generator. Each return value is a tuple consisting
00785         of a pathname and its corresponding metadata. Directories are
00786         recursed into.
00787         
00788         '''
00789         
00790         arg = os.path.normpath(arg)
00791         metadata = self.get_metadata(gen, arg)
00792         if metadata.isdir():
00793             if not depth_first:
00794                 yield arg, metadata
00795             kids = self.listdir(gen, arg)
00796             kidpaths = [os.path.join(arg, kid) for kid in kids]
00797             for kidpath in kidpaths:
00798                 for x in self.walk(gen, kidpath, depth_first=depth_first):
00799                     yield x
00800             if depth_first:
00801                 yield arg, metadata
00802         else:
00803             yield arg, metadata
00804