Back to index

moin  1.9.0~rc2
__init__.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - LogFile package
00004 
00005     This module supports buffered log reads, iterating forward and backward line-by-line, etc.
00006 
00007     @copyright: 2005-2007 MoinMoin:ThomasWaldmann
00008     @license: GNU GPL, see COPYING for details.
00009 """
00010 
00011 from MoinMoin import log
00012 logging = log.getLogger(__name__)
00013 
00014 import os, codecs, errno
00015 from MoinMoin import config, wikiutil
00016 
00017 class LogError(Exception):
00018     """ Base class for log errors """
00019 
00020 class LogMissing(LogError):
00021     """ Raised when the log is missing """
00022 
00023 
00024 class LineBuffer:
00025     """
00026     Reads lines from a file
00027         self.len      number of lines in self.lines
00028         self.lines    list of lines (unicode)
00029         self.offsets  list of file offsets for each line. additionally the position
00030                       after the last read line is stored into self.offsets[-1]
00031     """
00032     def __init__(self, file, offset, size, forward=True):
00033         """
00034 
00035         TODO: when this gets refactored, don't use "file" (is a builtin)
00036 
00037         @param file: open file object
00038         @param offset: position in file to start from
00039         @param size: aproximate number of bytes to read
00040         @param forward : read from offset on or from offset-size to offset
00041         @type forward: boolean
00042         """
00043         self.loglevel = logging.NOTSET
00044         if forward:
00045             begin = offset
00046             logging.log(self.loglevel, "LineBuffer.init: forward seek %d read %d" % (begin, size))
00047             file.seek(begin)
00048             lines = file.readlines(size)
00049         else:
00050             if offset < 2 * size:
00051                 begin = 0
00052                 size = offset
00053             else:
00054                 begin = offset - size
00055             logging.log(self.loglevel, "LineBuffer.init: backward seek %d read %d" % (begin, size))
00056             file.seek(begin)
00057             lines = file.read(size).splitlines(True)
00058             if begin != 0:
00059                 # remove potentially incomplete first line
00060                 begin += len(lines[0])
00061                 lines = lines[1:]
00062                 # XXX check for min one line read
00063 
00064         linecount = len(lines)
00065 
00066         # now calculate the file offsets of all read lines
00067         offsets = [len(line) for line in lines]
00068         offsets.append(0) # later this element will have the file offset after the last read line
00069 
00070         lengthpreviousline = 0
00071         offset = begin
00072         for i in xrange(linecount+1):
00073             offset += lengthpreviousline
00074             lengthpreviousline = offsets[i]
00075             offsets[i] = offset
00076 
00077         self.offsets = offsets
00078         self.len = linecount
00079         # Decode lines after offset in file is calculated
00080         self.lines = [unicode(line, config.charset) for line in lines]
00081 
00082 
00083 class LogFile:
00084     """
00085     .filter: function that gets the values from .parser.
00086              must return True to keep it or False to remove it
00087     Overwrite .parser() and .add() to customize this class to special log files
00088     """
00089 
00090     def __init__(self, filename, buffer_size=4096):
00091         """
00092         @param filename: name of the log file
00093         @param buffer_size: approx. size of one buffer in bytes
00094         """
00095         self.loglevel = logging.NOTSET
00096         self.__filename = filename
00097         self.__buffer = None # currently used buffer, points to one of the following:
00098         self.__buffer1 = None
00099         self.__buffer2 = None
00100         self.buffer_size = buffer_size
00101         self.__lineno = 0
00102         self.filter = None
00103 
00104     def __iter__(self):
00105         return self
00106 
00107     def reverse(self):
00108         """ yield log entries in reverse direction starting from last one
00109 
00110         @rtype: iterator
00111         """
00112         self.to_end()
00113         while 1:
00114             try:
00115                 logging.log(self.loglevel, "LogFile.reverse %s" % self.__filename)
00116                 result = self.previous()
00117             except StopIteration:
00118                 return
00119             yield result
00120 
00121     def sanityCheck(self):
00122         """ Check for log file write access.
00123 
00124         @rtype: string (error message) or None
00125         """
00126         if not os.access(self.__filename, os.W_OK):
00127             return "The log '%s' is not writable!" % (self.__filename, )
00128         return None
00129 
00130     def __getattr__(self, name):
00131         """
00132         generate some attributes when needed
00133         """
00134         if name == "_LogFile__rel_index": # Python black magic: this is the real name of the __rel_index attribute
00135             # starting iteration from begin
00136             self.__buffer1 = LineBuffer(self._input, 0, self.buffer_size)
00137             self.__buffer2 = LineBuffer(self._input,
00138                                         self.__buffer1.offsets[-1],
00139                                         self.buffer_size)
00140             self.__buffer = self.__buffer1
00141             self.__rel_index = 0
00142             return 0
00143         elif name == "_input":
00144             try:
00145                 # Open the file (NOT using codecs.open, it breaks our offset calculation. We decode it later.).
00146                 # Use binary mode in order to retain \r - otherwise the offset calculation would fail.
00147                 self._input = file(self.__filename, "rb", )
00148             except IOError, err:
00149                 if err.errno == errno.ENOENT: # "file not found"
00150                     # XXX workaround if edit-log does not exist: just create it empty
00151                     # if this workaround raises another error, we don't catch
00152                     # it, so the admin will see it.
00153                     f = file(self.__filename, "ab")
00154                     f.write('')
00155                     f.close()
00156                     self._input = file(self.__filename, "rb", )
00157                 else:
00158                     logging.error("logfile: %r IOERROR errno %d (%s)" % (self.__filename, err.errno, os.strerror(err.errno)))
00159                     raise
00160             return self._input
00161         elif name == "_output":
00162             self._output = codecs.open(self.__filename, 'a', config.charset)
00163             return self._output
00164         else:
00165             raise AttributeError(name)
00166 
00167     def size(self):
00168         """ Return log size in bytes
00169 
00170         Return 0 if the file does not exist. Raises other OSError.
00171 
00172         @return: size of log file in bytes
00173         @rtype: Int
00174         """
00175         try:
00176             return os.path.getsize(self.__filename)
00177         except OSError, err:
00178             if err.errno == errno.ENOENT:
00179                 return 0
00180             raise
00181 
00182     def lines(self):
00183         """ Return number of lines in the log file
00184 
00185         Return 0 if the file does not exist. Raises other OSError.
00186 
00187         Expensive for big log files - O(n)
00188 
00189         @return: size of log file in lines
00190         @rtype: Int
00191         """
00192         try:
00193             f = file(self.__filename, 'r')
00194             try:
00195                 count = 0
00196                 for line in f:
00197                     count += 1
00198                 return count
00199             finally:
00200                 f.close()
00201         except (OSError, IOError), err:
00202             if err.errno == errno.ENOENT:
00203                 return 0
00204             raise
00205 
00206     def date(self):
00207         # ToDo check if we need this method
00208         """ Return timestamp of log file in usecs """
00209         try:
00210             mtime = os.path.getmtime(self.__filename)
00211         except OSError, err:
00212             if err.errno == errno.ENOENT:
00213                 # This can happen on fresh wiki when building the index
00214                 # Usually the first request will create an event log
00215                 raise LogMissing(str(err))
00216             raise
00217         return wikiutil.timestamp2version(mtime)
00218 
00219     def peek(self, lines):
00220         """ Move position in file forward or backwards by "lines" count
00221 
00222         It adjusts .__lineno if set.
00223         This function is not aware of filters!
00224 
00225         @param lines: number of lines, may be negative to move backward
00226         @rtype: boolean
00227         @return: True if moving more than to the beginning and moving
00228                  to the end or beyond
00229         """
00230         logging.log(self.loglevel, "LogFile.peek %s" % self.__filename)
00231         self.__rel_index += lines
00232         while self.__rel_index < 0:
00233             if self.__buffer is self.__buffer2:
00234                 if self.__buffer.offsets[0] == 0:
00235                     # already at the beginning of the file
00236                     self.__rel_index = 0
00237                     self.__lineno = 0
00238                     return True
00239                 else:
00240                     # change to buffer 1
00241                     self.__buffer = self.__buffer1
00242                     self.__rel_index += self.__buffer.len
00243             else: # self.__buffer is self.__buffer1
00244                 if self.__buffer.offsets[0] == 0:
00245                     # already at the beginning of the file
00246                     self.__rel_index = 0
00247                     self.__lineno = 0
00248                     return True
00249                 else:
00250                     # load previous lines
00251                     self.__buffer2 = self.__buffer1
00252                     self.__buffer1 = LineBuffer(self._input,
00253                                                 self.__buffer.offsets[0],
00254                                                 self.buffer_size,
00255                                                 forward=False)
00256                     self.__buffer = self.__buffer1
00257                     self.__rel_index += self.__buffer.len
00258 
00259         while self.__rel_index >= self.__buffer.len:
00260             if self.__buffer is self.__buffer1:
00261                 # change to buffer 2
00262                 self.__rel_index -= self.__buffer.len
00263                 self.__buffer = self.__buffer2
00264             else: # self.__buffer is self.__buffer2
00265                 # try to load next buffer
00266                 tmpbuff = LineBuffer(self._input,
00267                                      self.__buffer.offsets[-1],
00268                                      self.buffer_size)
00269                 if tmpbuff.len == 0:
00270                     # end of file
00271                     if self.__lineno is not None:
00272                         self.__lineno += (lines -
00273                                          (self.__rel_index - self.__buffer.len))
00274                     self.__rel_index = self.__buffer.len # point to after last read line
00275                     return True
00276                 # shift buffers
00277                 self.__rel_index -= self.__buffer.len
00278                 self.__buffer1 = self.__buffer2
00279                 self.__buffer2 = tmpbuff
00280                 self.__buffer = self.__buffer2
00281 
00282         if self.__lineno is not None:
00283             self.__lineno += lines
00284         return False
00285 
00286     def __next(self):
00287         """get next line already parsed"""
00288         if self.peek(0):
00289             raise StopIteration
00290         result = self.parser(self.__buffer.lines[self.__rel_index])
00291         self.peek(1)
00292         return result
00293 
00294     def next(self):
00295         """get next line that passes through the filter
00296         @return: next entry
00297         raises StopIteration at file end
00298         """
00299         result = None
00300         while result is None:
00301             while result is None:
00302                 logging.log(self.loglevel, "LogFile.next %s" % self.__filename)
00303                 result = self.__next()
00304             if self.filter and not self.filter(result):
00305                 result = None
00306         return result
00307 
00308     def __previous(self):
00309         """get previous line already parsed"""
00310         if self.peek(-1):
00311             raise StopIteration
00312         return self.parser(self.__buffer.lines[self.__rel_index])
00313 
00314     def previous(self):
00315         """get previous line that passes through the filter
00316         @return: previous entry
00317         raises StopIteration at file begin
00318         """
00319         result = None
00320         while result is None:
00321             while result is None:
00322                 logging.log(self.loglevel, "LogFile.previous %s" % self.__filename)
00323                 result = self.__previous()
00324             if self.filter and not self.filter(result):
00325                 result = None
00326         return result
00327 
00328     def to_begin(self):
00329         """moves file position to the begin"""
00330         logging.log(self.loglevel, "LogFile.to_begin %s" % self.__filename)
00331         if self.__buffer1 is None or self.__buffer1.offsets[0] != 0:
00332             self.__buffer1 = LineBuffer(self._input,
00333                                         0,
00334                                         self.buffer_size)
00335             self.__buffer2 = LineBuffer(self._input,
00336                                         self.__buffer1.offsets[-1],
00337                                         self.buffer_size)
00338         self.__buffer = self.__buffer1
00339         self.__rel_index = 0
00340         self.__lineno = 0
00341 
00342     def to_end(self):
00343         """moves file position to the end"""
00344         logging.log(self.loglevel, "LogFile.to_end %s" % self.__filename)
00345         self._input.seek(0, 2) # to end of file
00346         size = self._input.tell()
00347         if self.__buffer2 is None or size > self.__buffer2.offsets[-1]:
00348             self.__buffer2 = LineBuffer(self._input,
00349                                         size,
00350                                         self.buffer_size,
00351                                         forward=False)
00352 
00353             self.__buffer1 = LineBuffer(self._input,
00354                                         self.__buffer2.offsets[0],
00355                                         self.buffer_size,
00356                                         forward=False)
00357         self.__buffer = self.__buffer2
00358         self.__rel_index = self.__buffer2.len
00359         self.__lineno = None
00360 
00361     def position(self):
00362         """ Return the current file position
00363 
00364         This can be converted into a String using back-ticks and then be rebuild.
00365         For this plain file implementation position is an Integer.
00366         """
00367         return self.__buffer.offsets[self.__rel_index]
00368 
00369     def seek(self, position, line_no=None):
00370         """ moves file position to an value formerly gotten from .position().
00371         To enable line counting line_no must be provided.
00372         .seek is much more efficient for moving long distances than .peek.
00373         raises ValueError if position is invalid
00374         """
00375         logging.log(self.loglevel, "LogFile.seek %s pos %d" % (self.__filename, position))
00376         if self.__buffer1:
00377             logging.log(self.loglevel, "b1 %r %r" % (self.__buffer1.offsets[0], self.__buffer1.offsets[-1]))
00378         if self.__buffer2:
00379             logging.log(self.loglevel, "b2 %r %r" % (self.__buffer2.offsets[0], self.__buffer2.offsets[-1]))
00380         if self.__buffer1 and self.__buffer1.offsets[0] <= position < self.__buffer1.offsets[-1]:
00381             # position is in .__buffer1
00382             self.__rel_index = self.__buffer1.offsets.index(position)
00383             self.__buffer = self.__buffer1
00384         elif self.__buffer2 and self.__buffer2.offsets[0] <= position < self.__buffer2.offsets[-1]:
00385             # position is in .__buffer2
00386             self.__rel_index = self.__buffer2.offsets.index(position)
00387             self.__buffer = self.__buffer2
00388         elif self.__buffer1 and self.__buffer1.offsets[-1] == position:
00389             # we already have one buffer directly before where we want to go
00390             self.__buffer2 = LineBuffer(self._input,
00391                                         position,
00392                                         self.buffer_size)
00393             self.__buffer = self.__buffer2
00394             self.__rel_index = 0
00395         elif self.__buffer2 and self.__buffer2.offsets[-1] == position:
00396             # we already have one buffer directly before where we want to go
00397             self.__buffer1 = self.__buffer2
00398             self.__buffer2 = LineBuffer(self._input,
00399                                         position,
00400                                         self.buffer_size)
00401             self.__buffer = self.__buffer2
00402             self.__rel_index = 0
00403         else:
00404             # load buffers around position
00405             self.__buffer1 = LineBuffer(self._input,
00406                                         position,
00407                                         self.buffer_size,
00408                                         forward=False)
00409             self.__buffer2 = LineBuffer(self._input,
00410                                         position,
00411                                         self.buffer_size)
00412             self.__buffer = self.__buffer2
00413             self.__rel_index = 0
00414             # XXX test for valid position
00415         self.__lineno = line_no
00416 
00417     def line_no(self):
00418         """@return: the current line number or None if line number is unknown"""
00419         return self.__lineno
00420 
00421     def calculate_line_no(self):
00422         """ Calculate the current line number from buffer offsets
00423 
00424         If line number is unknown it is calculated by parsing the whole file.
00425         This may be expensive.
00426         """
00427         self._input.seek(0, 0)
00428         lines = self._input.read(self.__buffer.offsets[self.__rel_index])
00429         self.__lineno = len(lines.splitlines())
00430         return self.__lineno
00431 
00432     def parser(self, line):
00433         """
00434         @param line: line as read from file
00435         @return: parsed line or None on error
00436         Converts the line from file to program representation
00437         This implementation uses TAB separated strings.
00438         This method should be overwritten by the sub classes.
00439         """
00440         return line.split("\t")
00441 
00442     def add(self, *data):
00443         """
00444         add line to log file
00445         This implementation save the values as TAB separated strings.
00446         This method should be overwritten by the sub classes.
00447         """
00448         line = "\t".join(data)
00449         self._add(line)
00450 
00451     def _add(self, line):
00452         """
00453         @param line: flat line
00454         @type line: String
00455         write on entry in the log file
00456         """
00457         if line is not None:
00458             if line[-1] != '\n':
00459                 line += '\n'
00460             self._output.write(line)
00461             self._output.close() # does this maybe help against the sporadic fedora wikis 160 \0 bytes in the edit-log?
00462             del self._output # re-open the output file automagically