Back to index

enigmail  1.4.3
Preprocessor.py
Go to the documentation of this file.
00001 """
00002 This is a very primitive line based preprocessor, for times when using
00003 a C preprocessor isn't an option.
00004 """
00005 
00006 # ***** BEGIN LICENSE BLOCK *****
00007 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
00008 #
00009 # The contents of this file are subject to the Mozilla Public License Version
00010 # 1.1 (the "License"); you may not use this file except in compliance with
00011 # the License. You may obtain a copy of the License at
00012 # http://www.mozilla.org/MPL/
00013 #
00014 # Software distributed under the License is distributed on an "AS IS" basis,
00015 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00016 # for the specific language governing rights and limitations under the
00017 # License.
00018 #
00019 # The Original Code is Mozilla build system.
00020 #
00021 # The Initial Developer of the Original Code is
00022 # Mozilla Foundation.
00023 # Portions created by the Initial Developer are Copyright (C) 2007
00024 # the Initial Developer. All Rights Reserved.
00025 #
00026 # Contributor(s):
00027 #  Axel Hecht <axel@pike.org>
00028 #
00029 # Alternatively, the contents of this file may be used under the terms of
00030 # either the GNU General Public License Version 2 or later (the "GPL"), or
00031 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00032 # in which case the provisions of the GPL or the LGPL are applicable instead
00033 # of those above. If you wish to allow use of your version of this file only
00034 # under the terms of either the GPL or the LGPL, and not to allow others to
00035 # use your version of this file under the terms of the MPL, indicate your
00036 # decision by deleting the provisions above and replace them with the notice
00037 # and other provisions required by the GPL or the LGPL. If you do not delete
00038 # the provisions above, a recipient may use your version of this file under
00039 # the terms of any one of the MPL, the GPL or the LGPL.
00040 #
00041 # ***** END LICENSE BLOCK *****
00042 
00043 import sys
00044 import os
00045 import os.path
00046 import re
00047 from optparse import OptionParser
00048 
00049 # hack around win32 mangling our line endings
00050 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65443
00051 if sys.platform == "win32":
00052   import msvcrt
00053   msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
00054   os.linesep = '\n'
00055 
00056 import Expression
00057 
00058 __all__ = ['Preprocessor', 'preprocess']
00059 
00060 
00061 class Preprocessor:
00062   """
00063   Class for preprocessing text files.
00064   """
00065   class Error(RuntimeError):
00066     def __init__(self, cpp, MSG, context):
00067       self.file = cpp.context['FILE']
00068       self.line = cpp.context['LINE']
00069       self.key = MSG
00070       RuntimeError.__init__(self, (self.file, self.line, self.key, context))
00071   def __init__(self):
00072     self.context = Expression.Context()
00073     for k,v in {'FILE': '',
00074                 'LINE': 0,
00075                 'DIRECTORY': os.path.abspath('.')}.iteritems():
00076       self.context[k] = v
00077     self.disableLevel = 0
00078     # ifStates can be
00079     #  0: hadTrue
00080     #  1: wantsTrue
00081     #  2: #else found
00082     self.ifStates = []
00083     self.checkLineNumbers = False
00084     self.writtenLines = 0
00085     self.filters = []
00086     self.cmds = {}
00087     for cmd, level in {'define': 0,
00088                        'undef': 0,
00089                        'if': sys.maxint,
00090                        'ifdef': sys.maxint,
00091                        'ifndef': sys.maxint,
00092                        'else': 1,
00093                        'elif': 1,
00094                        'elifdef': 1,
00095                        'elifndef': 1,
00096                        'endif': sys.maxint,
00097                        'expand': 0,
00098                        'literal': 0,
00099                        'filter': 0,
00100                        'unfilter': 0,
00101                        'include': 0,
00102                        'includesubst': 0,
00103                        'error': 0}.iteritems():
00104       self.cmds[cmd] = (level, getattr(self, 'do_' + cmd))
00105     self.out = sys.stdout
00106     self.setMarker('#')
00107     self.LE = '\n'
00108     self.varsubst = re.compile('@(?P<VAR>\w+)@', re.U)
00109   
00110   def setLineEndings(self, aLE):
00111     """
00112     Set the line endings to be used for output.
00113     """
00114     self.LE = {'cr': '\x0D', 'lf': '\x0A', 'crlf': '\x0D\x0A'}[aLE]
00115   
00116   def setMarker(self, aMarker):
00117     """
00118     Set the marker to be used for processing directives.
00119     Used for handling CSS files, with pp.setMarker('%'), for example.
00120     """
00121     self.marker = aMarker
00122     self.instruction = re.compile('%s(?P<cmd>[a-z]+)(?:\s(?P<args>.*))?$'%aMarker, re.U)
00123     self.comment = re.compile(aMarker, re.U)
00124   
00125   def clone(self):
00126     """
00127     Create a clone of the current processor, including line ending
00128     settings, marker, variable definitions, output stream.
00129     """
00130     rv = Preprocessor()
00131     rv.context.update(self.context)
00132     rv.setMarker(self.marker)
00133     rv.LE = self.LE
00134     rv.out = self.out
00135     return rv
00136   
00137   def write(self, aLine):
00138     """
00139     Internal method for handling output.
00140     """
00141     if self.checkLineNumbers:
00142       self.writtenLines += 1
00143       ln = self.context['LINE']
00144       if self.writtenLines != ln:
00145         self.out.write('//@line %(line)d "%(file)s"%(le)s'%{'line': ln,
00146                                                             'file': self.context['FILE'],
00147                                                             'le': self.LE})
00148         self.writtenLines = ln
00149     for f in self.filters:
00150       aLine = f[1](aLine)
00151     # ensure our line ending. Only need to handle \n, as we're reading
00152     # with universal line ending support, at least for files.
00153     aLine = re.sub('\n', self.LE, aLine)
00154     self.out.write(aLine)
00155   
00156   def handleCommandLine(self, args, defaultToStdin = False):
00157     """
00158     Parse a commandline into this parser.
00159     Uses OptionParser internally, no args mean sys.argv[1:].
00160     """
00161     p = self.getCommandLineParser()
00162     (options, args) = p.parse_args(args=args)
00163     includes = options.I
00164     if defaultToStdin and len(args) == 0:
00165       args = [sys.stdin]
00166     includes.extend(args)
00167     for f in includes:
00168       self.do_include(f)
00169     pass
00170 
00171   def getCommandLineParser(self, unescapeDefines = False):
00172     escapedValue = re.compile('".*"$')
00173     numberValue = re.compile('\d+$')
00174     def handleE(option, opt, value, parser):
00175       for k,v in os.environ.iteritems():
00176         self.context[k] = v
00177     def handleD(option, opt, value, parser):
00178       vals = value.split('=', 1)
00179       if len(vals) == 1:
00180         vals.append(1)
00181       elif unescapeDefines and escapedValue.match(vals[1]):
00182         # strip escaped string values
00183         vals[1] = vals[1][1:-1]
00184       elif numberValue.match(vals[1]):
00185         vals[1] = int(vals[1])
00186       self.context[vals[0]] = vals[1]
00187     def handleU(option, opt, value, parser):
00188       del self.context[value]
00189     def handleF(option, opt, value, parser):
00190       self.do_filter(value)
00191     def handleLE(option, opt, value, parser):
00192       self.setLineEndings(value)
00193     def handleMarker(option, opt, value, parser):
00194       self.setMarker(value)
00195     p = OptionParser()
00196     p.add_option('-I', action='append', type="string", default = [],
00197                  metavar="FILENAME", help='Include file')
00198     p.add_option('-E', action='callback', callback=handleE,
00199                  help='Import the environment into the defined variables')
00200     p.add_option('-D', action='callback', callback=handleD, type="string",
00201                  metavar="VAR[=VAL]", help='Define a variable')
00202     p.add_option('-U', action='callback', callback=handleU, type="string",
00203                  metavar="VAR", help='Undefine a variable')
00204     p.add_option('-F', action='callback', callback=handleF, type="string",
00205                  metavar="FILTER", help='Enable the specified filter')
00206     p.add_option('--line-endings', action='callback', callback=handleLE,
00207                  type="string", metavar="[cr|lr|crlf]",
00208                  help='Use the specified line endings [Default: OS dependent]')
00209     p.add_option('--marker', action='callback', callback=handleMarker,
00210                  type="string",
00211                  help='Use the specified marker instead of #')
00212     return p
00213 
00214   def handleLine(self, aLine):
00215     """
00216     Handle a single line of input (internal).
00217     """
00218     m = self.instruction.match(aLine)
00219     if m:
00220       args = None
00221       cmd = m.group('cmd')
00222       try:
00223         args = m.group('args')
00224       except IndexError:
00225         pass
00226       if cmd not in self.cmds:
00227         raise Preprocessor.Error(self, 'INVALID_CMD', aLine)
00228       level, cmd = self.cmds[cmd]
00229       if (level >= self.disableLevel):
00230         cmd(args)
00231     elif self.disableLevel == 0 and not self.comment.match(aLine):
00232       self.write(aLine)
00233     pass
00234 
00235   # Instruction handlers
00236   # These are named do_'instruction name' and take one argument
00237   
00238   # Variables
00239   def do_define(self, args):
00240     m = re.match('(?P<name>\w+)(?:\s(?P<value>.*))?', args, re.U)
00241     if not m:
00242       raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
00243     val = 1
00244     if m.group('value'):
00245       val = m.group('value')
00246       try:
00247         val = int(val)
00248       except:
00249         pass
00250     self.context[m.group('name')] = val
00251   def do_undef(self, args):
00252     m = re.match('(?P<name>\w+)$', args, re.U)
00253     if not m:
00254       raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
00255     if args in self.context:
00256       del self.context[args]
00257   # Logic
00258   def ensure_not_else(self):
00259     if len(self.ifStates) == 0 or self.ifStates[-1] == 2:
00260       sys.stderr.write('WARNING: bad nesting of #else\n')
00261   def do_if(self, args, replace=False):
00262     if self.disableLevel and not replace:
00263       self.disableLevel += 1
00264       return
00265     val = None
00266     try:
00267       e = Expression.Expression(args)
00268       val = e.evaluate(self.context)
00269     except Exception:
00270       # XXX do real error reporting
00271       raise Preprocessor.Error(self, 'SYNTAX_ERR', args)
00272     if type(val) == str:
00273       # we're looking for a number value, strings are false
00274       val = False
00275     if not val:
00276       self.disableLevel = 1
00277     if replace:
00278       if val:
00279         self.disableLevel = 0
00280       self.ifStates[-1] = self.disableLevel
00281     else:
00282       self.ifStates.append(self.disableLevel)
00283     pass
00284   def do_ifdef(self, args, replace=False):
00285     if self.disableLevel and not replace:
00286       self.disableLevel += 1
00287       return
00288     if re.match('\W', args, re.U):
00289       raise Preprocessor.Error(self, 'INVALID_VAR', args)
00290     if args not in self.context:
00291       self.disableLevel = 1
00292     if replace:
00293       if args in self.context:
00294         self.disableLevel = 0
00295       self.ifStates[-1] = self.disableLevel
00296     else:
00297       self.ifStates.append(self.disableLevel)
00298     pass
00299   def do_ifndef(self, args, replace=False):
00300     if self.disableLevel and not replace:
00301       self.disableLevel += 1
00302       return
00303     if re.match('\W', args, re.U):
00304       raise Preprocessor.Error(self, 'INVALID_VAR', args)
00305     if args in self.context:
00306       self.disableLevel = 1
00307     if replace:
00308       if args not in self.context:
00309         self.disableLevel = 0
00310       self.ifStates[-1] = self.disableLevel
00311     else:
00312       self.ifStates.append(self.disableLevel)
00313     pass
00314   def do_else(self, args, ifState = 2):
00315     self.ensure_not_else()
00316     hadTrue = self.ifStates[-1] == 0
00317     self.ifStates[-1] = ifState # in-else
00318     if hadTrue:
00319       self.disableLevel = 1
00320       return
00321     self.disableLevel = 0
00322   def do_elif(self, args):
00323     if self.disableLevel == 1:
00324       if self.ifStates[-1] == 1:
00325         self.do_if(args, replace=True)
00326     else:
00327       self.do_else(None, self.ifStates[-1])
00328   def do_elifdef(self, args):
00329     if self.disableLevel == 1:
00330       if self.ifStates[-1] == 1:
00331         self.do_ifdef(args, replace=True)
00332     else:
00333       self.do_else(None, self.ifStates[-1])
00334   def do_elifndef(self, args):
00335     if self.disableLevel == 1:
00336       if self.ifStates[-1] == 1:
00337         self.do_ifndef(args, replace=True)
00338     else:
00339       self.do_else(None, self.ifStates[-1])
00340   def do_endif(self, args):
00341     if self.disableLevel > 0:
00342       self.disableLevel -= 1
00343     if self.disableLevel == 0:
00344       self.ifStates.pop()
00345   # output processing
00346   def do_expand(self, args):
00347     lst = re.split('__(\w+)__', args, re.U)
00348     do_replace = False
00349     def vsubst(v):
00350       if v in self.context:
00351         return str(self.context[v])
00352       return ''
00353     for i in range(1, len(lst), 2):
00354       lst[i] = vsubst(lst[i])
00355     lst.append('\n') # add back the newline
00356     self.write(reduce(lambda x, y: x+y, lst, ''))
00357   def do_literal(self, args):
00358     self.write(args + self.LE)
00359   def do_filter(self, args):
00360     filters = [f for f in args.split(' ') if hasattr(self, 'filter_' + f)]
00361     if len(filters) == 0:
00362       return
00363     current = dict(self.filters)
00364     for f in filters:
00365       current[f] = getattr(self, 'filter_' + f)
00366     filterNames = current.keys()
00367     filterNames.sort()
00368     self.filters = [(fn, current[fn]) for fn in filterNames]
00369     return
00370   def do_unfilter(self, args):
00371     filters = args.split(' ')
00372     current = dict(self.filters)
00373     for f in filters:
00374       if f in current:
00375         del current[f]
00376     filterNames = current.keys()
00377     filterNames.sort()
00378     self.filters = [(fn, current[fn]) for fn in filterNames]
00379     return
00380   # Filters
00381   #
00382   # emptyLines
00383   #   Strips blank lines from the output.
00384   def filter_emptyLines(self, aLine):
00385     if aLine == '\n':
00386       return ''
00387     return aLine
00388   # slashslash
00389   #   Strips everything after //
00390   def filter_slashslash(self, aLine):
00391     [aLine, rest] = aLine.split('//', 1)
00392     if rest:
00393       aLine += '\n'
00394     return aLine
00395   # spaces
00396   #   Collapses sequences of spaces into a single space
00397   def filter_spaces(self, aLine):
00398     return re.sub(' +', ' ', aLine).strip(' ')
00399   # substition
00400   #   helper to be used by both substition and attemptSubstitution
00401   def filter_substitution(self, aLine, fatal=True):
00402     def repl(matchobj):
00403       varname = matchobj.group('VAR')
00404       if varname in self.context:
00405         return str(self.context[varname])
00406       if fatal:
00407         raise Preprocessor.Error(self, 'UNDEFINED_VAR', varname)
00408       return ''
00409     return self.varsubst.sub(repl, aLine)
00410   def filter_attemptSubstitution(self, aLine):
00411     return self.filter_substitution(aLine, fatal=False)
00412   # File ops
00413   def do_include(self, args):
00414     """
00415     Preprocess a given file.
00416     args can either be a file name, or a file-like object.
00417     Files should be opened, and will be closed after processing.
00418     """
00419     isName = type(args) == str or type(args) == unicode
00420     oldWrittenLines = self.writtenLines
00421     oldCheckLineNumbers = self.checkLineNumbers
00422     self.checkLineNumbers = False
00423     if isName:
00424       try:
00425         args = str(args)
00426         if not os.path.isabs(args):
00427           args = os.path.join(self.context['DIRECTORY'], args)
00428         args = open(args, 'rU')
00429       except:
00430         raise Preprocessor.Error(self, 'FILE_NOT_FOUND', str(args))
00431     self.checkLineNumbers = bool(re.search('\.(js|java)(?:\.in)?$', args.name))
00432     oldFile = self.context['FILE']
00433     oldLine = self.context['LINE']
00434     oldDir = self.context['DIRECTORY']
00435     if args.isatty():
00436       # we're stdin, use '-' and '' for file and dir
00437       self.context['FILE'] = '-'
00438       self.context['DIRECTORY'] = ''
00439     else:
00440       abspath = os.path.abspath(args.name)
00441       self.context['FILE'] = abspath
00442       self.context['DIRECTORY'] = os.path.dirname(abspath)
00443     self.context['LINE'] = 0
00444     self.writtenLines = 0
00445     for l in args:
00446       self.context['LINE'] += 1
00447       self.handleLine(l)
00448     args.close()
00449     self.context['FILE'] = oldFile
00450     self.checkLineNumbers = oldCheckLineNumbers
00451     self.writtenLines = oldWrittenLines
00452     self.context['LINE'] = oldLine
00453     self.context['DIRECTORY'] = oldDir
00454   def do_includesubst(self, args):
00455     args = self.filter_substitution(args)
00456     self.do_include(args)
00457   def do_error(self, args):
00458     raise Preprocessor.Error(self, 'Error: ', str(args))
00459 
00460 def main():
00461   pp = Preprocessor()
00462   pp.handleCommandLine(None, True)
00463   return
00464 
00465 def preprocess(includes=[sys.stdin], defines={},
00466                output = sys.stdout,
00467                line_endings='\n', marker='#'):
00468   pp = Preprocessor()
00469   pp.context.update(defines)
00470   pp.setLineEndings(line_endings)
00471   pp.setMarker(marker)
00472   pp.out = output
00473   for f in includes:
00474     pp.do_include(f)
00475 
00476 if __name__ == "__main__":
00477   main()