Back to index

obnam  1.1
fsck_plugin.py
Go to the documentation of this file.
00001 # Copyright (C) 2010  Lars Wirzenius
00002 #
00003 # This program is free software: you can redistribute it and/or modify
00004 # it under the terms of the GNU General Public License as published by
00005 # the Free Software Foundation, either version 3 of the License, or
00006 # (at your option) any later version.
00007 #
00008 # This program is distributed in the hope that it will be useful,
00009 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 # GNU General Public License for more details.
00012 #
00013 # You should have received a copy of the GNU General Public License
00014 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00015 
00016 
00017 import larch.fsck
00018 import logging
00019 import os
00020 import sys
00021 import ttystatus
00022 
00023 import obnamlib
00024 
00025 
00026 class WorkItem(larch.fsck.WorkItem):
00027 
00028     '''A work item for fsck.
00029     
00030     Whoever creates a WorkItem shall set the ``repo`` to the repository 
00031     being used.
00032     
00033     '''
00034 
00035 
00036 class CheckChunk(WorkItem):
00037 
00038     def __init__(self, chunkid, checksummer):
00039         self.chunkid = chunkid
00040         self.checksummer = checksummer
00041         self.name = 'chunk %s' % chunkid
00042 
00043     def do(self):
00044         logging.debug('Checking chunk %s' % self.chunkid)
00045         if not self.repo.chunk_exists(self.chunkid):
00046             self.error('chunk %s does not exist' % self.chunkid)
00047         else:
00048             data = self.repo.get_chunk(self.chunkid)
00049             checksum = self.repo.checksum(data)
00050             try:
00051                 correct = self.repo.chunklist.get_checksum(self.chunkid)
00052             except KeyError:
00053                 self.error('chunk %s not in chunklist' % self.chunkid)
00054             else:
00055                 if checksum != correct:
00056                     self.error('chunk %s has wrong checksum' % self.chunkid)
00057 
00058             if self.chunkid not in self.repo.chunksums.find(checksum):
00059                 self.error('chunk %s not in chunksums' % self.chunkid)
00060 
00061             self.checksummer.update(data)
00062         self.chunkids_seen.add(self.chunkid)
00063 
00064 
00065 class CheckFileChecksum(WorkItem):
00066 
00067     def __init__(self, filename, correct, chunkids, checksummer):
00068         self.filename = filename
00069         self.name = '%s checksum' % filename
00070         self.correct = correct
00071         self.chunkids = chunkids
00072         self.checksummer = checksummer
00073         
00074     def do(self):
00075         logging.debug('Checking whole-file checksum for %s' % self.filename)
00076         if self.correct != self.checksummer.digest():
00077             self.error('%s whole-file checksum mismatch' % self.name)
00078 
00079 
00080 class CheckFile(WorkItem):
00081 
00082     def __init__(self, client_name, genid, filename):
00083         self.client_name = client_name
00084         self.genid = genid
00085         self.filename = filename
00086         self.name = '%s:%s:%s' % (client_name, genid, filename)
00087 
00088     def do(self):
00089         logging.debug('Checking client=%s genid=%s filename=%s' %
00090                         (self.client_name, self.genid, self.filename))
00091         if self.repo.current_client != self.client_name:
00092             self.repo.open_client(self.client_name)
00093         metadata = self.repo.get_metadata(self.genid, self.filename)
00094         if metadata.isfile():
00095             chunkids = self.repo.get_file_chunks(self.genid, self.filename)
00096             checksummer = self.repo.new_checksummer()
00097             for chunkid in chunkids:
00098                 yield CheckChunk(chunkid, checksummer)
00099             yield CheckFileChecksum(self.name, metadata.md5, chunkids,
00100                                      checksummer)
00101 
00102 
00103 class CheckDirectory(WorkItem):
00104 
00105     def __init__(self, client_name, genid, dirname):
00106         self.client_name = client_name
00107         self.genid = genid
00108         self.dirname = dirname
00109         self.name = '%s:%s:%s' % (client_name, genid, dirname)
00110         
00111     def do(self):
00112         logging.debug('Checking client=%s genid=%s dirname=%s' %
00113                         (self.client_name, self.genid, self.dirname))
00114         if self.repo.current_client != self.client_name:
00115             self.repo.open_client(self.client_name)
00116         self.repo.get_metadata(self.genid, self.dirname)
00117         for basename in self.repo.listdir(self.genid, self.dirname):
00118             pathname = os.path.join(self.dirname, basename)
00119             metadata = self.repo.get_metadata(self.genid, pathname)
00120             if metadata.isdir():
00121                 yield CheckDirectory(self.client_name, self.genid, pathname)
00122             else:
00123                 yield CheckFile(self.client_name, self.genid, pathname)
00124 
00125 
00126 class CheckGeneration(WorkItem):
00127 
00128     def __init__(self, client_name, genid):
00129         self.client_name = client_name
00130         self.genid = genid
00131         self.name = '%s:%s' % (client_name, genid)
00132         
00133     def do(self):
00134         logging.debug('Checking client=%s genid=%s' % 
00135                         (self.client_name, self.genid))
00136 
00137         started, ended = self.repo.client.get_generation_times(self.genid)
00138         if started is None:
00139             self.error('%s:%s: no generation start time' %
00140                         (self.client_name, self.genid))
00141         if ended is None:
00142             self.error('%s:%s: no generation end time' %
00143                         (self.client_name, self.genid))
00144 
00145         n = self.repo.client.get_generation_file_count(self.genid)
00146         if n is None:
00147             self.error('%s:%s: no file count' % (self.client_name, self.genid))
00148 
00149         n = self.repo.client.get_generation_data(self.genid)
00150         if n is None:
00151             self.error('%s:%s: no total data' % (self.client_name, self.genid))
00152 
00153         return [CheckDirectory(self.client_name, self.genid, '/')]
00154 
00155 
00156 class CheckGenerationIdsAreDifferent(WorkItem):
00157 
00158     def __init__(self, client_name, genids):
00159         self.client_name = client_name
00160         self.genids = list(genids)
00161     
00162     def do(self):
00163         logging.debug('Checking genid uniqueness for client=%s' % 
00164                         self.client_name)
00165         done = set()
00166         while self.genids:
00167             genid = self.genids.pop()
00168             if genid in done:
00169                 self.error('%s: duplicate generation id %s' % genid)
00170             else:
00171                 done.add(genid)
00172 
00173 
00174 class CheckClientExists(WorkItem):
00175 
00176     def __init__(self, client_name):
00177         self.client_name = client_name
00178         self.name = 'does client %s exist?' % client_name
00179 
00180     def do(self):
00181         logging.debug('Checking client=%s exists' % self.client_name)
00182         client_id = self.repo.clientlist.get_client_id(self.client_name)
00183         if client_id is None:
00184             self.error('Client %s is in client list, but has no id' %
00185                           self.client_name)
00186 
00187 
00188 class CheckClient(WorkItem):
00189 
00190     def __init__(self, client_name):
00191         self.client_name = client_name
00192         self.name = 'client %s' % client_name
00193 
00194     def do(self):
00195         logging.debug('Checking client=%s' % self.client_name)
00196         if self.repo.current_client != self.client_name:
00197             self.repo.open_client(self.client_name)
00198         yield CheckGenerationIdsAreDifferent(self.client_name,
00199                                               self.repo.list_generations())
00200         for genid in self.repo.list_generations():
00201             yield CheckGeneration(self.client_name, genid)
00202 
00203 
00204 class CheckClientlist(WorkItem):
00205 
00206     name = 'client list'
00207 
00208     def do(self):
00209         logging.debug('Checking clientlist')
00210         clients = self.repo.clientlist.list_clients()
00211         for client_name in clients:
00212             client_id = self.repo.clientlist.get_client_id(client_name)
00213             client_dir = self.repo.client_dir(client_id)
00214             yield CheckBTree(str(client_dir))
00215         for client_name in clients:
00216             yield CheckClientExists(client_name)
00217         for client_name in clients:
00218             yield CheckClient(client_name)
00219 
00220 
00221 class CheckForExtraChunks(WorkItem):
00222 
00223     def __init__(self):
00224         self.name = 'extra chunks'
00225         
00226     def do(self):
00227         logging.debug('Checking for extra chunks')
00228         for chunkid in self.repo.list_chunks():
00229             if chunkid not in self.chunkids_seen:
00230                 self.error('chunk %s not used by anyone' % chunkid)
00231 
00232 
00233 class CheckBTree(WorkItem):
00234 
00235     def __init__(self, dirname):
00236         self.dirname = dirname
00237         self.name = 'B-tree %s' % dirname
00238 
00239     def do(self):
00240         if not self.repo.fs.exists(self.dirname):
00241             logging.debug('B-tree %s does not exist, skipping' % self.dirname)
00242             return
00243         logging.debug('Checking B-tree %s' % self.dirname)
00244         forest = larch.open_forest(allow_writes=False, dirname=self.dirname, 
00245                                    vfs=self.repo.fs)
00246         fsck = larch.fsck.Fsck(forest, self.warning, self.error, 
00247                                self.settings['fsck-fix'])
00248         fsck.find_work()
00249         for work in fsck.work:
00250             work.do()
00251 
00252 
00253 class CheckRepository(WorkItem):
00254 
00255     def __init__(self):
00256         self.name = 'repository'
00257         
00258     def do(self):
00259         logging.debug('Checking repository')
00260         yield CheckBTree('clientlist')
00261         yield CheckBTree('chunklist')
00262         yield CheckBTree('chunksums')
00263         yield CheckClientlist()
00264 
00265 
00266 class FsckPlugin(obnamlib.ObnamPlugin):
00267 
00268     def enable(self):
00269         self.app.add_subcommand('fsck', self.fsck)
00270         self.app.settings.boolean(['fsck-fix'], 
00271                                   'should fsck try to fix problems?')
00272 
00273     def configure_ttystatus(self):
00274         self.app.ts.clear()
00275         self.app.ts['item'] = None
00276         self.app.ts['items'] = 0
00277         self.app.ts.format(
00278             'Checking %Counter(item)/%Integer(items): %String(item)')
00279         
00280     def fsck(self, args):
00281         '''Verify internal consistency of backup repository.'''
00282         self.app.settings.require('repository')
00283         logging.debug('fsck on %s' % self.app.settings['repository'])
00284         self.repo = self.app.open_repository()
00285         
00286         self.repo.lock_root()
00287         client_names = self.repo.list_clients()
00288         client_dirs = [self.repo.client_dir(
00289                             self.repo.clientlist.get_client_id(name))
00290                        for name in client_names]
00291         self.repo.lockmgr.lock(client_dirs)
00292         self.repo.lock_shared()
00293 
00294         self.errors = 0
00295         self.chunkids_seen = set()
00296         self.work_items = []
00297         self.add_item(CheckRepository())
00298         final_items = [CheckForExtraChunks()]
00299         
00300         self.configure_ttystatus()
00301         i = 0
00302         while self.work_items:
00303             work = self.work_items.pop()
00304             logging.debug('doing: %s' % str(work))
00305             self.app.ts['item'] = work
00306             for more in reversed(list(work.do() or [])):
00307                 self.add_item(more, append=True)
00308             i += 1
00309             if not self.work_items:
00310                 for work in final_items:
00311                     self.add_item(work)
00312                 final_items = []
00313 
00314         self.repo.unlock_shared()
00315         self.repo.lockmgr.unlock(client_dirs)
00316         self.repo.unlock_root()
00317 
00318         self.repo.fs.close()
00319         self.app.ts.finish()
00320         
00321         if self.errors:
00322             sys.exit(1)
00323 
00324     def add_item(self, work, append=False):
00325         work.warning = self.warning
00326         work.error = self.error
00327         work.repo = self.repo
00328         work.settings = self.app.settings
00329         work.chunkids_seen = self.chunkids_seen
00330         if append:
00331             self.work_items.append(work)
00332         else:
00333             self.work_items.insert(0, work)
00334         self.app.ts.increase('items', 1)
00335         self.app.dump_memory_profile('after adding %s' % repr(work))
00336 
00337     def error(self, msg):
00338         self.app.ts.error(msg)
00339         self.errors += 1
00340 
00341     def warning(self, msg):
00342         self.app.ts.notify(msg)
00343