Back to index

python3.2  3.2.2
fileinput.py
Go to the documentation of this file.
00001 """Helper class to quickly write a loop over all standard input files.
00002 
00003 Typical use is:
00004 
00005     import fileinput
00006     for line in fileinput.input():
00007         process(line)
00008 
00009 This iterates over the lines of all files listed in sys.argv[1:],
00010 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
00011 is also replaced by sys.stdin.  To specify an alternative list of
00012 filenames, pass it as the argument to input().  A single file name is
00013 also allowed.
00014 
00015 Functions filename(), lineno() return the filename and cumulative line
00016 number of the line that has just been read; filelineno() returns its
00017 line number in the current file; isfirstline() returns true iff the
00018 line just read is the first line of its file; isstdin() returns true
00019 iff the line was read from sys.stdin.  Function nextfile() closes the
00020 current file so that the next iteration will read the first line from
00021 the next file (if any); lines not read from the file will not count
00022 towards the cumulative line count; the filename is not changed until
00023 after the first line of the next file has been read.  Function close()
00024 closes the sequence.
00025 
00026 Before any lines have been read, filename() returns None and both line
00027 numbers are zero; nextfile() has no effect.  After all lines have been
00028 read, filename() and the line number functions return the values
00029 pertaining to the last line read; nextfile() has no effect.
00030 
00031 All files are opened in text mode by default, you can override this by
00032 setting the mode parameter to input() or FileInput.__init__().
00033 If an I/O error occurs during opening or reading a file, the IOError
00034 exception is raised.
00035 
00036 If sys.stdin is used more than once, the second and further use will
00037 return no lines, except perhaps for interactive use, or if it has been
00038 explicitly reset (e.g. using sys.stdin.seek(0)).
00039 
00040 Empty files are opened and immediately closed; the only time their
00041 presence in the list of filenames is noticeable at all is when the
00042 last file opened is empty.
00043 
00044 It is possible that the last line of a file doesn't end in a newline
00045 character; otherwise lines are returned including the trailing
00046 newline.
00047 
00048 Class FileInput is the implementation; its methods filename(),
00049 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
00050 correspond to the functions in the module.  In addition it has a
00051 readline() method which returns the next input line, and a
00052 __getitem__() method which implements the sequence behavior.  The
00053 sequence must be accessed in strictly sequential order; sequence
00054 access and readline() cannot be mixed.
00055 
00056 Optional in-place filtering: if the keyword argument inplace=1 is
00057 passed to input() or to the FileInput constructor, the file is moved
00058 to a backup file and standard output is directed to the input file.
00059 This makes it possible to write a filter that rewrites its input file
00060 in place.  If the keyword argument backup=".<some extension>" is also
00061 given, it specifies the extension for the backup file, and the backup
00062 file remains around; by default, the extension is ".bak" and it is
00063 deleted when the output file is closed.  In-place filtering is
00064 disabled when standard input is read.  XXX The current implementation
00065 does not work for MS-DOS 8+3 filesystems.
00066 
00067 Performance: this module is unfortunately one of the slower ways of
00068 processing large numbers of input lines.  Nevertheless, a significant
00069 speed-up has been obtained by using readlines(bufsize) instead of
00070 readline().  A new keyword argument, bufsize=N, is present on the
00071 input() function and the FileInput() class to override the default
00072 buffer size.
00073 
00074 XXX Possible additions:
00075 
00076 - optional getopt argument processing
00077 - isatty()
00078 - read(), read(size), even readlines()
00079 
00080 """
00081 
00082 import sys, os
00083 
00084 __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
00085            "isfirstline", "isstdin", "FileInput"]
00086 
00087 _state = None
00088 
00089 DEFAULT_BUFSIZE = 8*1024
00090 
00091 def input(files=None, inplace=False, backup="", bufsize=0,
00092           mode="r", openhook=None):
00093     """input(files=None, inplace=False, backup="", bufsize=0, \
00094 mode="r", openhook=None)
00095 
00096     Create an instance of the FileInput class. The instance will be used
00097     as global state for the functions of this module, and is also returned
00098     to use during iteration. The parameters to this function will be passed
00099     along to the constructor of the FileInput class.
00100     """
00101     global _state
00102     if _state and _state._file:
00103         raise RuntimeError("input() already active")
00104     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
00105     return _state
00106 
00107 def close():
00108     """Close the sequence."""
00109     global _state
00110     state = _state
00111     _state = None
00112     if state:
00113         state.close()
00114 
00115 def nextfile():
00116     """
00117     Close the current file so that the next iteration will read the first
00118     line from the next file (if any); lines not read from the file will
00119     not count towards the cumulative line count. The filename is not
00120     changed until after the first line of the next file has been read.
00121     Before the first line has been read, this function has no effect;
00122     it cannot be used to skip the first file. After the last line of the
00123     last file has been read, this function has no effect.
00124     """
00125     if not _state:
00126         raise RuntimeError("no active input()")
00127     return _state.nextfile()
00128 
00129 def filename():
00130     """
00131     Return the name of the file currently being read.
00132     Before the first line has been read, returns None.
00133     """
00134     if not _state:
00135         raise RuntimeError("no active input()")
00136     return _state.filename()
00137 
00138 def lineno():
00139     """
00140     Return the cumulative line number of the line that has just been read.
00141     Before the first line has been read, returns 0. After the last line
00142     of the last file has been read, returns the line number of that line.
00143     """
00144     if not _state:
00145         raise RuntimeError("no active input()")
00146     return _state.lineno()
00147 
00148 def filelineno():
00149     """
00150     Return the line number in the current file. Before the first line
00151     has been read, returns 0. After the last line of the last file has
00152     been read, returns the line number of that line within the file.
00153     """
00154     if not _state:
00155         raise RuntimeError("no active input()")
00156     return _state.filelineno()
00157 
00158 def fileno():
00159     """
00160     Return the file number of the current file. When no file is currently
00161     opened, returns -1.
00162     """
00163     if not _state:
00164         raise RuntimeError("no active input()")
00165     return _state.fileno()
00166 
00167 def isfirstline():
00168     """
00169     Returns true the line just read is the first line of its file,
00170     otherwise returns false.
00171     """
00172     if not _state:
00173         raise RuntimeError("no active input()")
00174     return _state.isfirstline()
00175 
00176 def isstdin():
00177     """
00178     Returns true if the last line was read from sys.stdin,
00179     otherwise returns false.
00180     """
00181     if not _state:
00182         raise RuntimeError("no active input()")
00183     return _state.isstdin()
00184 
00185 class FileInput:
00186     """class FileInput([files[, inplace[, backup[, mode[, openhook]]]]])
00187 
00188     Class FileInput is the implementation of the module; its methods
00189     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
00190     nextfile() and close() correspond to the functions of the same name
00191     in the module.
00192     In addition it has a readline() method which returns the next
00193     input line, and a __getitem__() method which implements the
00194     sequence behavior. The sequence must be accessed in strictly
00195     sequential order; random access and readline() cannot be mixed.
00196     """
00197 
00198     def __init__(self, files=None, inplace=False, backup="", bufsize=0,
00199                  mode="r", openhook=None):
00200         if isinstance(files, str):
00201             files = (files,)
00202         else:
00203             if files is None:
00204                 files = sys.argv[1:]
00205             if not files:
00206                 files = ('-',)
00207             else:
00208                 files = tuple(files)
00209         self._files = files
00210         self._inplace = inplace
00211         self._backup = backup
00212         self._bufsize = bufsize or DEFAULT_BUFSIZE
00213         self._savestdout = None
00214         self._output = None
00215         self._filename = None
00216         self._lineno = 0
00217         self._filelineno = 0
00218         self._file = None
00219         self._isstdin = False
00220         self._backupfilename = None
00221         self._buffer = []
00222         self._bufindex = 0
00223         # restrict mode argument to reading modes
00224         if mode not in ('r', 'rU', 'U', 'rb'):
00225             raise ValueError("FileInput opening mode must be one of "
00226                              "'r', 'rU', 'U' and 'rb'")
00227         self._mode = mode
00228         if inplace and openhook:
00229             raise ValueError("FileInput cannot use an opening hook in inplace mode")
00230         elif openhook and not hasattr(openhook, '__call__'):
00231             raise ValueError("FileInput openhook must be callable")
00232         self._openhook = openhook
00233 
00234     def __del__(self):
00235         self.close()
00236 
00237     def close(self):
00238         self.nextfile()
00239         self._files = ()
00240 
00241     def __enter__(self):
00242         return self
00243 
00244     def __exit__(self, type, value, traceback):
00245         self.close()
00246 
00247     def __iter__(self):
00248         return self
00249 
00250     def __next__(self):
00251         try:
00252             line = self._buffer[self._bufindex]
00253         except IndexError:
00254             pass
00255         else:
00256             self._bufindex += 1
00257             self._lineno += 1
00258             self._filelineno += 1
00259             return line
00260         line = self.readline()
00261         if not line:
00262             raise StopIteration
00263         return line
00264 
00265     def __getitem__(self, i):
00266         if i != self._lineno:
00267             raise RuntimeError("accessing lines out of order")
00268         try:
00269             return self.__next__()
00270         except StopIteration:
00271             raise IndexError("end of input reached")
00272 
00273     def nextfile(self):
00274         savestdout = self._savestdout
00275         self._savestdout = 0
00276         if savestdout:
00277             sys.stdout = savestdout
00278 
00279         output = self._output
00280         self._output = 0
00281         if output:
00282             output.close()
00283 
00284         file = self._file
00285         self._file = 0
00286         if file and not self._isstdin:
00287             file.close()
00288 
00289         backupfilename = self._backupfilename
00290         self._backupfilename = 0
00291         if backupfilename and not self._backup:
00292             try: os.unlink(backupfilename)
00293             except OSError: pass
00294 
00295         self._isstdin = False
00296         self._buffer = []
00297         self._bufindex = 0
00298 
00299     def readline(self):
00300         try:
00301             line = self._buffer[self._bufindex]
00302         except IndexError:
00303             pass
00304         else:
00305             self._bufindex += 1
00306             self._lineno += 1
00307             self._filelineno += 1
00308             return line
00309         if not self._file:
00310             if not self._files:
00311                 return ""
00312             self._filename = self._files[0]
00313             self._files = self._files[1:]
00314             self._filelineno = 0
00315             self._file = None
00316             self._isstdin = False
00317             self._backupfilename = 0
00318             if self._filename == '-':
00319                 self._filename = '<stdin>'
00320                 self._file = sys.stdin
00321                 self._isstdin = True
00322             else:
00323                 if self._inplace:
00324                     self._backupfilename = (
00325                         self._filename + (self._backup or ".bak"))
00326                     try: os.unlink(self._backupfilename)
00327                     except os.error: pass
00328                     # The next few lines may raise IOError
00329                     os.rename(self._filename, self._backupfilename)
00330                     self._file = open(self._backupfilename, self._mode)
00331                     try:
00332                         perm = os.fstat(self._file.fileno()).st_mode
00333                     except OSError:
00334                         self._output = open(self._filename, "w")
00335                     else:
00336                         mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
00337                         if hasattr(os, 'O_BINARY'):
00338                             mode |= os.O_BINARY
00339 
00340                         fd = os.open(self._filename, mode, perm)
00341                         self._output = os.fdopen(fd, "w")
00342                         try:
00343                             if hasattr(os, 'chmod'):
00344                                 os.chmod(self._filename, perm)
00345                         except OSError:
00346                             pass
00347                     self._savestdout = sys.stdout
00348                     sys.stdout = self._output
00349                 else:
00350                     # This may raise IOError
00351                     if self._openhook:
00352                         self._file = self._openhook(self._filename, self._mode)
00353                     else:
00354                         self._file = open(self._filename, self._mode)
00355         self._buffer = self._file.readlines(self._bufsize)
00356         self._bufindex = 0
00357         if not self._buffer:
00358             self.nextfile()
00359         # Recursive call
00360         return self.readline()
00361 
00362     def filename(self):
00363         return self._filename
00364 
00365     def lineno(self):
00366         return self._lineno
00367 
00368     def filelineno(self):
00369         return self._filelineno
00370 
00371     def fileno(self):
00372         if self._file:
00373             try:
00374                 return self._file.fileno()
00375             except ValueError:
00376                 return -1
00377         else:
00378             return -1
00379 
00380     def isfirstline(self):
00381         return self._filelineno == 1
00382 
00383     def isstdin(self):
00384         return self._isstdin
00385 
00386 
00387 def hook_compressed(filename, mode):
00388     ext = os.path.splitext(filename)[1]
00389     if ext == '.gz':
00390         import gzip
00391         return gzip.open(filename, mode)
00392     elif ext == '.bz2':
00393         import bz2
00394         return bz2.BZ2File(filename, mode)
00395     else:
00396         return open(filename, mode)
00397 
00398 
00399 def hook_encoded(encoding):
00400     import codecs
00401     def openhook(filename, mode):
00402         return codecs.open(filename, mode, encoding)
00403     return openhook
00404 
00405 
00406 def _test():
00407     import getopt
00408     inplace = False
00409     backup = False
00410     opts, args = getopt.getopt(sys.argv[1:], "ib:")
00411     for o, a in opts:
00412         if o == '-i': inplace = True
00413         if o == '-b': backup = a
00414     for line in input(args, inplace=inplace, backup=backup):
00415         if line[-1:] == '\n': line = line[:-1]
00416         if line[-1:] == '\r': line = line[:-1]
00417         print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
00418                                    isfirstline() and "*" or "", line))
00419     print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
00420 
00421 if __name__ == '__main__':
00422     _test()