Back to index

python3.2  3.2.2
pipes.py
Go to the documentation of this file.
00001 """Conversion pipeline templates.
00002 
00003 The problem:
00004 ------------
00005 
00006 Suppose you have some data that you want to convert to another format,
00007 such as from GIF image format to PPM image format.  Maybe the
00008 conversion involves several steps (e.g. piping it through compress or
00009 uuencode).  Some of the conversion steps may require that their input
00010 is a disk file, others may be able to read standard input; similar for
00011 their output.  The input to the entire conversion may also be read
00012 from a disk file or from an open file, and similar for its output.
00013 
00014 The module lets you construct a pipeline template by sticking one or
00015 more conversion steps together.  It will take care of creating and
00016 removing temporary files if they are necessary to hold intermediate
00017 data.  You can then use the template to do conversions from many
00018 different sources to many different destinations.  The temporary
00019 file names used are different each time the template is used.
00020 
00021 The templates are objects so you can create templates for many
00022 different conversion steps and store them in a dictionary, for
00023 instance.
00024 
00025 
00026 Directions:
00027 -----------
00028 
00029 To create a template:
00030     t = Template()
00031 
00032 To add a conversion step to a template:
00033    t.append(command, kind)
00034 where kind is a string of two characters: the first is '-' if the
00035 command reads its standard input or 'f' if it requires a file; the
00036 second likewise for the output. The command must be valid /bin/sh
00037 syntax.  If input or output files are required, they are passed as
00038 $IN and $OUT; otherwise, it must be  possible to use the command in
00039 a pipeline.
00040 
00041 To add a conversion step at the beginning:
00042    t.prepend(command, kind)
00043 
00044 To convert a file to another file using a template:
00045   sts = t.copy(infile, outfile)
00046 If infile or outfile are the empty string, standard input is read or
00047 standard output is written, respectively.  The return value is the
00048 exit status of the conversion pipeline.
00049 
00050 To open a file for reading or writing through a conversion pipeline:
00051    fp = t.open(file, mode)
00052 where mode is 'r' to read the file, or 'w' to write it -- just like
00053 for the built-in function open() or for os.popen().
00054 
00055 To create a new template object initialized to a given one:
00056    t2 = t.clone()
00057 
00058 For an example, see the function test() at the end of the file.
00059 """                                     # '
00060 
00061 
00062 import re
00063 import os
00064 import tempfile
00065 import string
00066 
00067 __all__ = ["Template"]
00068 
00069 # Conversion step kinds
00070 
00071 FILEIN_FILEOUT = 'ff'                   # Must read & write real files
00072 STDIN_FILEOUT  = '-f'                   # Must write a real file
00073 FILEIN_STDOUT  = 'f-'                   # Must read a real file
00074 STDIN_STDOUT   = '--'                   # Normal pipeline element
00075 SOURCE         = '.-'                   # Must be first, writes stdout
00076 SINK           = '-.'                   # Must be last, reads stdin
00077 
00078 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
00079              SOURCE, SINK]
00080 
00081 
00082 class Template:
00083     """Class representing a pipeline template."""
00084 
00085     def __init__(self):
00086         """Template() returns a fresh pipeline template."""
00087         self.debugging = 0
00088         self.reset()
00089 
00090     def __repr__(self):
00091         """t.__repr__() implements repr(t)."""
00092         return '<Template instance, steps=%r>' % (self.steps,)
00093 
00094     def reset(self):
00095         """t.reset() restores a pipeline template to its initial state."""
00096         self.steps = []
00097 
00098     def clone(self):
00099         """t.clone() returns a new pipeline template with identical
00100         initial state as the current one."""
00101         t = Template()
00102         t.steps = self.steps[:]
00103         t.debugging = self.debugging
00104         return t
00105 
00106     def debug(self, flag):
00107         """t.debug(flag) turns debugging on or off."""
00108         self.debugging = flag
00109 
00110     def append(self, cmd, kind):
00111         """t.append(cmd, kind) adds a new step at the end."""
00112         if type(cmd) is not type(''):
00113             raise TypeError('Template.append: cmd must be a string')
00114         if kind not in stepkinds:
00115             raise ValueError('Template.append: bad kind %r' % (kind,))
00116         if kind == SOURCE:
00117             raise ValueError('Template.append: SOURCE can only be prepended')
00118         if self.steps and self.steps[-1][1] == SINK:
00119             raise ValueError('Template.append: already ends with SINK')
00120         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
00121             raise ValueError('Template.append: missing $IN in cmd')
00122         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
00123             raise ValueError('Template.append: missing $OUT in cmd')
00124         self.steps.append((cmd, kind))
00125 
00126     def prepend(self, cmd, kind):
00127         """t.prepend(cmd, kind) adds a new step at the front."""
00128         if type(cmd) is not type(''):
00129             raise TypeError('Template.prepend: cmd must be a string')
00130         if kind not in stepkinds:
00131             raise ValueError('Template.prepend: bad kind %r' % (kind,))
00132         if kind == SINK:
00133             raise ValueError('Template.prepend: SINK can only be appended')
00134         if self.steps and self.steps[0][1] == SOURCE:
00135             raise ValueError('Template.prepend: already begins with SOURCE')
00136         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
00137             raise ValueError('Template.prepend: missing $IN in cmd')
00138         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
00139             raise ValueError('Template.prepend: missing $OUT in cmd')
00140         self.steps.insert(0, (cmd, kind))
00141 
00142     def open(self, file, rw):
00143         """t.open(file, rw) returns a pipe or file object open for
00144         reading or writing; the file is the other end of the pipeline."""
00145         if rw == 'r':
00146             return self.open_r(file)
00147         if rw == 'w':
00148             return self.open_w(file)
00149         raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
00150                          % (rw,))
00151 
00152     def open_r(self, file):
00153         """t.open_r(file) and t.open_w(file) implement
00154         t.open(file, 'r') and t.open(file, 'w') respectively."""
00155         if not self.steps:
00156             return open(file, 'r')
00157         if self.steps[-1][1] == SINK:
00158             raise ValueError('Template.open_r: pipeline ends width SINK')
00159         cmd = self.makepipeline(file, '')
00160         return os.popen(cmd, 'r')
00161 
00162     def open_w(self, file):
00163         if not self.steps:
00164             return open(file, 'w')
00165         if self.steps[0][1] == SOURCE:
00166             raise ValueError('Template.open_w: pipeline begins with SOURCE')
00167         cmd = self.makepipeline('', file)
00168         return os.popen(cmd, 'w')
00169 
00170     def copy(self, infile, outfile):
00171         return os.system(self.makepipeline(infile, outfile))
00172 
00173     def makepipeline(self, infile, outfile):
00174         cmd = makepipeline(infile, self.steps, outfile)
00175         if self.debugging:
00176             print(cmd)
00177             cmd = 'set -x; ' + cmd
00178         return cmd
00179 
00180 
00181 def makepipeline(infile, steps, outfile):
00182     # Build a list with for each command:
00183     # [input filename or '', command string, kind, output filename or '']
00184 
00185     list = []
00186     for cmd, kind in steps:
00187         list.append(['', cmd, kind, ''])
00188     #
00189     # Make sure there is at least one step
00190     #
00191     if not list:
00192         list.append(['', 'cat', '--', ''])
00193     #
00194     # Take care of the input and output ends
00195     #
00196     [cmd, kind] = list[0][1:3]
00197     if kind[0] == 'f' and not infile:
00198         list.insert(0, ['', 'cat', '--', ''])
00199     list[0][0] = infile
00200     #
00201     [cmd, kind] = list[-1][1:3]
00202     if kind[1] == 'f' and not outfile:
00203         list.append(['', 'cat', '--', ''])
00204     list[-1][-1] = outfile
00205     #
00206     # Invent temporary files to connect stages that need files
00207     #
00208     garbage = []
00209     for i in range(1, len(list)):
00210         lkind = list[i-1][2]
00211         rkind = list[i][2]
00212         if lkind[1] == 'f' or rkind[0] == 'f':
00213             (fd, temp) = tempfile.mkstemp()
00214             os.close(fd)
00215             garbage.append(temp)
00216             list[i-1][-1] = list[i][0] = temp
00217     #
00218     for item in list:
00219         [inf, cmd, kind, outf] = item
00220         if kind[1] == 'f':
00221             cmd = 'OUT=' + quote(outf) + '; ' + cmd
00222         if kind[0] == 'f':
00223             cmd = 'IN=' + quote(inf) + '; ' + cmd
00224         if kind[0] == '-' and inf:
00225             cmd = cmd + ' <' + quote(inf)
00226         if kind[1] == '-' and outf:
00227             cmd = cmd + ' >' + quote(outf)
00228         item[1] = cmd
00229     #
00230     cmdlist = list[0][1]
00231     for item in list[1:]:
00232         [cmd, kind] = item[1:3]
00233         if item[0] == '':
00234             if 'f' in kind:
00235                 cmd = '{ ' + cmd + '; }'
00236             cmdlist = cmdlist + ' |\n' + cmd
00237         else:
00238             cmdlist = cmdlist + '\n' + cmd
00239     #
00240     if garbage:
00241         rmcmd = 'rm -f'
00242         for file in garbage:
00243             rmcmd = rmcmd + ' ' + quote(file)
00244         trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
00245         cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
00246     #
00247     return cmdlist
00248 
00249 
00250 # Reliably quote a string as a single argument for /bin/sh
00251 
00252 # Safe unquoted
00253 _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
00254 
00255 def quote(file):
00256     """Return a shell-escaped version of the file string."""
00257     for c in file:
00258         if c not in _safechars:
00259             break
00260     else:
00261         if not file:
00262             return "''"
00263         return file
00264     # use single quotes, and put single quotes into double quotes
00265     # the string $'b is then quoted as '$'"'"'b'
00266     return "'" + file.replace("'", "'\"'\"'") + "'"