Back to index

obnam  1.1
forget_policy.py
Go to the documentation of this file.
00001 # Copyright (C) 2010  Lars Wirzenius
00002 #
00003 # This program is free software: you can redistribute it and/or modify
00004 # it under the terms of the GNU General Public License as published by
00005 # the Free Software Foundation, either version 3 of the License, or
00006 # (at your option) any later version.
00007 #
00008 # This program is distributed in the hope that it will be useful,
00009 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 # GNU General Public License for more details.
00012 #
00013 # You should have received a copy of the GNU General Public License
00014 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00015 
00016 
00017 import re
00018 
00019 import obnamlib
00020 
00021 
00022 class ForgetPolicy(object):
00023 
00024     '''Parse and interpret a policy for what to forget and what to keep.
00025     
00026     See documentation for the --keep option for details.
00027     
00028     '''
00029     
00030     periods = {
00031         'h': 'hourly',
00032         'd': 'daily',
00033         'w': 'weekly',
00034         'm': 'monthly',
00035         'y': 'yearly',
00036     }
00037     
00038     rule_pat = re.compile(r'(?P<count>\d+)(?P<period>(h|d|w|m|y))')
00039     
00040     def parse(self, optarg):
00041         '''Parse the argument of --keep.
00042         
00043         Return a dictionary indexed by 'hourly', 'daily', 'weekly',
00044         'monthly', 'yearly', and giving the number of generations
00045         to keep for each time period.
00046         
00047         '''
00048         
00049         remaining = optarg
00050         m = self.rule_pat.match(remaining)
00051         if not m:
00052             raise obnamlib.Error('Forget policy syntax error: %s' % optarg)
00053 
00054         result = dict((y, None) for x, y in self.periods.iteritems())
00055         while m:
00056             count = int(m.group('count'))
00057             period = self.periods[m.group('period')]
00058             if result[period] is not None:
00059                 raise obnamlib.Error('Forget policy may not '
00060                                      'duplicate period (%s): %s' % 
00061                                      (period, optarg))
00062             result[period] = count
00063             remaining = remaining[m.end():]
00064             if not remaining:
00065                 break
00066             if not remaining.startswith(','):
00067                 raise obnamlib.Error('Forget policy must have rules '
00068                                      'separated by commas: %s' % optarg)
00069             remaining = remaining[1:]
00070             m = self.rule_pat.match(remaining)
00071         
00072         result.update((x, 0) for x, y in result.iteritems() if y is None)
00073         return result
00074 
00075     def last_in_each_period(self, period, genlist):
00076         formats = {
00077             'hourly': '%Y-%m-%d %H',
00078             'daily': '%Y-%m-%d',
00079             'weekly': '%Y-%W',
00080             'monthly': '%Y-%m',
00081             'yearly': '%Y',
00082         }
00083     
00084         matches = []
00085         for genid, dt in genlist:
00086             formatted = dt.strftime(formats[period])
00087             if not matches:
00088                 matches.append((genid, formatted))
00089             elif matches[-1][1] == formatted:
00090                 matches[-1] = (genid, formatted)
00091             else:
00092                 matches.append((genid, formatted))
00093         return [genid for genid, formatted in matches]
00094 
00095     def match(self, rules, genlist):
00096         '''Match a parsed ruleset against a list of generations and times.
00097         
00098         The ruleset should be of the form returned by the parse method.
00099         
00100         genlist should be a list of generation identifiers and timestamps.
00101         Identifiers can be anything, timestamps should be an instance
00102         of datetime.datetime, with no time zone (it is ignored).
00103         
00104         genlist should be in ascending order by time: oldest one first.
00105         
00106         Return value is all those pairs from genlist that should be
00107         kept (i.e., which match the rules).
00108         
00109         '''
00110 
00111         result_ids = set()
00112         for period in rules:
00113             genids = self.last_in_each_period(period, genlist)
00114             if rules[period]:
00115                 for genid in genids[-rules[period]:]:
00116                     result_ids.add(genid)
00117 
00118         return [(genid, dt) for genid, dt in genlist
00119                 if genid in result_ids]
00120