Back to index

enigmail  1.4.3
yacc.py
Go to the documentation of this file.
00001 # -----------------------------------------------------------------------------
00002 # ply: yacc.py
00003 #
00004 # Copyright (C) 2001-2009,
00005 # David M. Beazley (Dabeaz LLC)
00006 # All rights reserved.
00007 #
00008 # Redistribution and use in source and binary forms, with or without
00009 # modification, are permitted provided that the following conditions are
00010 # met:
00011 # 
00012 # * Redistributions of source code must retain the above copyright notice,
00013 #   this list of conditions and the following disclaimer.  
00014 # * Redistributions in binary form must reproduce the above copyright notice, 
00015 #   this list of conditions and the following disclaimer in the documentation
00016 #   and/or other materials provided with the distribution.  
00017 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
00018 #   endorse or promote products derived from this software without
00019 #  specific prior written permission. 
00020 #
00021 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00025 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00027 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00028 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00029 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00030 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00031 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 # -----------------------------------------------------------------------------
00033 #
00034 # This implements an LR parser that is constructed from grammar rules defined
00035 # as Python functions. The grammer is specified by supplying the BNF inside
00036 # Python documentation strings.  The inspiration for this technique was borrowed
00037 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
00038 # Spark and the GNU bison utility.
00039 #
00040 # The current implementation is only somewhat object-oriented. The
00041 # LR parser itself is defined in terms of an object (which allows multiple
00042 # parsers to co-exist).  However, most of the variables used during table
00043 # construction are defined in terms of global variables.  Users shouldn't
00044 # notice unless they are trying to define multiple parsers at the same
00045 # time using threads (in which case they should have their head examined).
00046 #
00047 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
00048 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
00049 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
00050 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
00051 # by the more efficient DeRemer and Pennello algorithm.
00052 #
00053 # :::::::: WARNING :::::::
00054 #
00055 # Construction of LR parsing tables is fairly complicated and expensive.
00056 # To make this module run fast, a *LOT* of work has been put into
00057 # optimization---often at the expensive of readability and what might
00058 # consider to be good Python "coding style."   Modify the code at your
00059 # own risk!
00060 # ----------------------------------------------------------------------------
00061 
00062 __version__    = "3.3"
00063 __tabversion__ = "3.2"       # Table version
00064 
00065 #-----------------------------------------------------------------------------
00066 #                     === User configurable parameters ===
00067 #
00068 # Change these to modify the default behavior of yacc (if you wish)
00069 #-----------------------------------------------------------------------------
00070 
00071 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
00072                                # a 'parser.out' file in the current directory
00073 
00074 debug_file  = 'parser.out'     # Default name of the debugging file
00075 tab_module  = 'parsetab'       # Default name of the table module
00076 default_lr  = 'LALR'           # Default LR table generation method
00077 
00078 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
00079 
00080 yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
00081                                # implementations of certain functions.
00082 
00083 resultlimit = 40               # Size limit of results when running in debug mode.
00084 
00085 pickle_protocol = 0            # Protocol to use when writing pickle files
00086 
00087 import re, types, sys, os.path
00088 
00089 # Compatibility function for python 2.6/3.0
00090 if sys.version_info[0] < 3:
00091     def func_code(f):
00092         return f.func_code
00093 else:
00094     def func_code(f):
00095         return f.__code__
00096 
00097 # Compatibility
00098 try:
00099     MAXINT = sys.maxint
00100 except AttributeError:
00101     MAXINT = sys.maxsize
00102 
00103 # Python 2.x/3.0 compatibility.
00104 def load_ply_lex():
00105     if sys.version_info[0] < 3:
00106         import lex
00107     else:
00108         import ply.lex as lex
00109     return lex
00110 
00111 # This object is a stand-in for a logging object created by the 
00112 # logging module.   PLY will use this by default to create things
00113 # such as the parser.out file.  If a user wants more detailed
00114 # information, they can create their own logging object and pass
00115 # it into PLY.
00116 
00117 class PlyLogger(object):
00118     def __init__(self,f):
00119         self.f = f
00120     def debug(self,msg,*args,**kwargs):
00121         self.f.write((msg % args) + "\n")
00122     info     = debug
00123 
00124     def warning(self,msg,*args,**kwargs):
00125         self.f.write("WARNING: "+ (msg % args) + "\n")
00126 
00127     def error(self,msg,*args,**kwargs):
00128         self.f.write("ERROR: " + (msg % args) + "\n")
00129 
00130     critical = debug
00131 
00132 # Null logger is used when no output is generated. Does nothing.
00133 class NullLogger(object):
00134     def __getattribute__(self,name):
00135         return self
00136     def __call__(self,*args,**kwargs):
00137         return self
00138         
00139 # Exception raised for yacc-related errors
00140 class YaccError(Exception):   pass
00141 
00142 # Format the result message that the parser produces when running in debug mode.
00143 def format_result(r):
00144     repr_str = repr(r)
00145     if '\n' in repr_str: repr_str = repr(repr_str)
00146     if len(repr_str) > resultlimit:
00147         repr_str = repr_str[:resultlimit]+" ..."
00148     result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
00149     return result
00150 
00151 
00152 # Format stack entries when the parser is running in debug mode
00153 def format_stack_entry(r):
00154     repr_str = repr(r)
00155     if '\n' in repr_str: repr_str = repr(repr_str)
00156     if len(repr_str) < 16:
00157         return repr_str
00158     else:
00159         return "<%s @ 0x%x>" % (type(r).__name__,id(r))
00160 
00161 #-----------------------------------------------------------------------------
00162 #                        ===  LR Parsing Engine ===
00163 #
00164 # The following classes are used for the LR parser itself.  These are not
00165 # used during table construction and are independent of the actual LR
00166 # table generation algorithm
00167 #-----------------------------------------------------------------------------
00168 
00169 # This class is used to hold non-terminal grammar symbols during parsing.
00170 # It normally has the following attributes set:
00171 #        .type       = Grammar symbol type
00172 #        .value      = Symbol value
00173 #        .lineno     = Starting line number
00174 #        .endlineno  = Ending line number (optional, set automatically)
00175 #        .lexpos     = Starting lex position
00176 #        .endlexpos  = Ending lex position (optional, set automatically)
00177 
00178 class YaccSymbol:
00179     def __str__(self):    return self.type
00180     def __repr__(self):   return str(self)
00181 
00182 # This class is a wrapper around the objects actually passed to each
00183 # grammar rule.   Index lookup and assignment actually assign the
00184 # .value attribute of the underlying YaccSymbol object.
00185 # The lineno() method returns the line number of a given
00186 # item (or 0 if not defined).   The linespan() method returns
00187 # a tuple of (startline,endline) representing the range of lines
00188 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
00189 # representing the range of positional information for a symbol.
00190 
00191 class YaccProduction:
00192     def __init__(self,s,stack=None):
00193         self.slice = s
00194         self.stack = stack
00195         self.lexer = None
00196         self.parser= None
00197     def __getitem__(self,n):
00198         if n >= 0: return self.slice[n].value
00199         else: return self.stack[n].value
00200 
00201     def __setitem__(self,n,v):
00202         self.slice[n].value = v
00203 
00204     def __getslice__(self,i,j):
00205         return [s.value for s in self.slice[i:j]]
00206 
00207     def __len__(self):
00208         return len(self.slice)
00209 
00210     def lineno(self,n):
00211         return getattr(self.slice[n],"lineno",0)
00212 
00213     def set_lineno(self,n,lineno):
00214         self.slice[n].lineno = lineno
00215 
00216     def linespan(self,n):
00217         startline = getattr(self.slice[n],"lineno",0)
00218         endline = getattr(self.slice[n],"endlineno",startline)
00219         return startline,endline
00220 
00221     def lexpos(self,n):
00222         return getattr(self.slice[n],"lexpos",0)
00223 
00224     def lexspan(self,n):
00225         startpos = getattr(self.slice[n],"lexpos",0)
00226         endpos = getattr(self.slice[n],"endlexpos",startpos)
00227         return startpos,endpos
00228 
00229     def error(self):
00230        raise SyntaxError
00231 
00232 
00233 # -----------------------------------------------------------------------------
00234 #                               == LRParser ==
00235 #
00236 # The LR Parsing engine.
00237 # -----------------------------------------------------------------------------
00238 
00239 class LRParser:
00240     def __init__(self,lrtab,errorf):
00241         self.productions = lrtab.lr_productions
00242         self.action      = lrtab.lr_action
00243         self.goto        = lrtab.lr_goto
00244         self.errorfunc   = errorf
00245 
00246     def errok(self):
00247         self.errorok     = 1
00248 
00249     def restart(self):
00250         del self.statestack[:]
00251         del self.symstack[:]
00252         sym = YaccSymbol()
00253         sym.type = '$end'
00254         self.symstack.append(sym)
00255         self.statestack.append(0)
00256 
00257     def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
00258         if debug or yaccdevel:
00259             if isinstance(debug,int):
00260                 debug = PlyLogger(sys.stderr)
00261             return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
00262         elif tracking:
00263             return self.parseopt(input,lexer,debug,tracking,tokenfunc)
00264         else:
00265             return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
00266         
00267 
00268     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00269     # parsedebug().
00270     #
00271     # This is the debugging enabled version of parse().  All changes made to the
00272     # parsing engine should be made here.   For the non-debugging version,
00273     # copy this code to a method parseopt() and delete all of the sections
00274     # enclosed in:
00275     #
00276     #      #--! DEBUG
00277     #      statements
00278     #      #--! DEBUG
00279     #
00280     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00281 
00282     def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
00283         lookahead = None                 # Current lookahead symbol
00284         lookaheadstack = [ ]             # Stack of lookahead symbols
00285         actions = self.action            # Local reference to action table (to avoid lookup on self.)
00286         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
00287         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
00288         pslice  = YaccProduction(None)   # Production object passed to grammar rules
00289         errorcount = 0                   # Used during error recovery 
00290 
00291         # --! DEBUG
00292         debug.info("PLY: PARSE DEBUG START")
00293         # --! DEBUG
00294 
00295         # If no lexer was given, we will try to use the lex module
00296         if not lexer:
00297             lex = load_ply_lex()
00298             lexer = lex.lexer
00299 
00300         # Set up the lexer and parser objects on pslice
00301         pslice.lexer = lexer
00302         pslice.parser = self
00303 
00304         # If input was supplied, pass to lexer
00305         if input is not None:
00306             lexer.input(input)
00307 
00308         if tokenfunc is None:
00309            # Tokenize function
00310            get_token = lexer.token
00311         else:
00312            get_token = tokenfunc
00313 
00314         # Set up the state and symbol stacks
00315 
00316         statestack = [ ]                # Stack of parsing states
00317         self.statestack = statestack
00318         symstack   = [ ]                # Stack of grammar symbols
00319         self.symstack = symstack
00320 
00321         pslice.stack = symstack         # Put in the production
00322         errtoken   = None               # Err token
00323 
00324         # The start state is assumed to be (0,$end)
00325 
00326         statestack.append(0)
00327         sym = YaccSymbol()
00328         sym.type = "$end"
00329         symstack.append(sym)
00330         state = 0
00331         while 1:
00332             # Get the next symbol on the input.  If a lookahead symbol
00333             # is already set, we just use that. Otherwise, we'll pull
00334             # the next token off of the lookaheadstack or from the lexer
00335 
00336             # --! DEBUG
00337             debug.debug('')
00338             debug.debug('State  : %s', state)
00339             # --! DEBUG
00340 
00341             if not lookahead:
00342                 if not lookaheadstack:
00343                     lookahead = get_token()     # Get the next token
00344                 else:
00345                     lookahead = lookaheadstack.pop()
00346                 if not lookahead:
00347                     lookahead = YaccSymbol()
00348                     lookahead.type = "$end"
00349 
00350             # --! DEBUG
00351             debug.debug('Stack  : %s',
00352                         ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
00353             # --! DEBUG
00354 
00355             # Check the action table
00356             ltype = lookahead.type
00357             t = actions[state].get(ltype)
00358 
00359             if t is not None:
00360                 if t > 0:
00361                     # shift a symbol on the stack
00362                     statestack.append(t)
00363                     state = t
00364                     
00365                     # --! DEBUG
00366                     debug.debug("Action : Shift and goto state %s", t)
00367                     # --! DEBUG
00368 
00369                     symstack.append(lookahead)
00370                     lookahead = None
00371 
00372                     # Decrease error count on successful shift
00373                     if errorcount: errorcount -=1
00374                     continue
00375 
00376                 if t < 0:
00377                     # reduce a symbol on the stack, emit a production
00378                     p = prod[-t]
00379                     pname = p.name
00380                     plen  = p.len
00381 
00382                     # Get production function
00383                     sym = YaccSymbol()
00384                     sym.type = pname       # Production name
00385                     sym.value = None
00386 
00387                     # --! DEBUG
00388                     if plen:
00389                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
00390                     else:
00391                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
00392                         
00393                     # --! DEBUG
00394 
00395                     if plen:
00396                         targ = symstack[-plen-1:]
00397                         targ[0] = sym
00398 
00399                         # --! TRACKING
00400                         if tracking:
00401                            t1 = targ[1]
00402                            sym.lineno = t1.lineno
00403                            sym.lexpos = t1.lexpos
00404                            t1 = targ[-1]
00405                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
00406                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
00407 
00408                         # --! TRACKING
00409 
00410                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00411                         # The code enclosed in this section is duplicated 
00412                         # below as a performance optimization.  Make sure
00413                         # changes get made in both locations.
00414 
00415                         pslice.slice = targ
00416                         
00417                         try:
00418                             # Call the grammar rule with our special slice object
00419                             del symstack[-plen:]
00420                             del statestack[-plen:]
00421                             p.callable(pslice)
00422                             # --! DEBUG
00423                             debug.info("Result : %s", format_result(pslice[0]))
00424                             # --! DEBUG
00425                             symstack.append(sym)
00426                             state = goto[statestack[-1]][pname]
00427                             statestack.append(state)
00428                         except SyntaxError:
00429                             # If an error was set. Enter error recovery state
00430                             lookaheadstack.append(lookahead)
00431                             symstack.pop()
00432                             statestack.pop()
00433                             state = statestack[-1]
00434                             sym.type = 'error'
00435                             lookahead = sym
00436                             errorcount = error_count
00437                             self.errorok = 0
00438                         continue
00439                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00440     
00441                     else:
00442 
00443                         # --! TRACKING
00444                         if tracking:
00445                            sym.lineno = lexer.lineno
00446                            sym.lexpos = lexer.lexpos
00447                         # --! TRACKING
00448 
00449                         targ = [ sym ]
00450 
00451                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00452                         # The code enclosed in this section is duplicated 
00453                         # above as a performance optimization.  Make sure
00454                         # changes get made in both locations.
00455 
00456                         pslice.slice = targ
00457 
00458                         try:
00459                             # Call the grammar rule with our special slice object
00460                             p.callable(pslice)
00461                             # --! DEBUG
00462                             debug.info("Result : %s", format_result(pslice[0]))
00463                             # --! DEBUG
00464                             symstack.append(sym)
00465                             state = goto[statestack[-1]][pname]
00466                             statestack.append(state)
00467                         except SyntaxError:
00468                             # If an error was set. Enter error recovery state
00469                             lookaheadstack.append(lookahead)
00470                             symstack.pop()
00471                             statestack.pop()
00472                             state = statestack[-1]
00473                             sym.type = 'error'
00474                             lookahead = sym
00475                             errorcount = error_count
00476                             self.errorok = 0
00477                         continue
00478                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00479 
00480                 if t == 0:
00481                     n = symstack[-1]
00482                     result = getattr(n,"value",None)
00483                     # --! DEBUG
00484                     debug.info("Done   : Returning %s", format_result(result))
00485                     debug.info("PLY: PARSE DEBUG END")
00486                     # --! DEBUG
00487                     return result
00488 
00489             if t == None:
00490 
00491                 # --! DEBUG
00492                 debug.error('Error  : %s',
00493                             ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
00494                 # --! DEBUG
00495 
00496                 # We have some kind of parsing error here.  To handle
00497                 # this, we are going to push the current token onto
00498                 # the tokenstack and replace it with an 'error' token.
00499                 # If there are any synchronization rules, they may
00500                 # catch it.
00501                 #
00502                 # In addition to pushing the error token, we call call
00503                 # the user defined p_error() function if this is the
00504                 # first syntax error.  This function is only called if
00505                 # errorcount == 0.
00506                 if errorcount == 0 or self.errorok:
00507                     errorcount = error_count
00508                     self.errorok = 0
00509                     errtoken = lookahead
00510                     if errtoken.type == "$end":
00511                         errtoken = None               # End of file!
00512                     if self.errorfunc:
00513                         global errok,token,restart
00514                         errok = self.errok        # Set some special functions available in error recovery
00515                         token = get_token
00516                         restart = self.restart
00517                         if errtoken and not hasattr(errtoken,'lexer'):
00518                             errtoken.lexer = lexer
00519                         tok = self.errorfunc(errtoken)
00520                         del errok, token, restart   # Delete special functions
00521 
00522                         if self.errorok:
00523                             # User must have done some kind of panic
00524                             # mode recovery on their own.  The
00525                             # returned token is the next lookahead
00526                             lookahead = tok
00527                             errtoken = None
00528                             continue
00529                     else:
00530                         if errtoken:
00531                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
00532                             else: lineno = 0
00533                             if lineno:
00534                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
00535                             else:
00536                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
00537                         else:
00538                             sys.stderr.write("yacc: Parse error in input. EOF\n")
00539                             return
00540 
00541                 else:
00542                     errorcount = error_count
00543 
00544                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
00545                 # entire parse has been rolled back and we're completely hosed.   The token is
00546                 # discarded and we just keep going.
00547 
00548                 if len(statestack) <= 1 and lookahead.type != "$end":
00549                     lookahead = None
00550                     errtoken = None
00551                     state = 0
00552                     # Nuke the pushback stack
00553                     del lookaheadstack[:]
00554                     continue
00555 
00556                 # case 2: the statestack has a couple of entries on it, but we're
00557                 # at the end of the file. nuke the top entry and generate an error token
00558 
00559                 # Start nuking entries on the stack
00560                 if lookahead.type == "$end":
00561                     # Whoa. We're really hosed here. Bail out
00562                     return
00563 
00564                 if lookahead.type != 'error':
00565                     sym = symstack[-1]
00566                     if sym.type == 'error':
00567                         # Hmmm. Error is on top of stack, we'll just nuke input
00568                         # symbol and continue
00569                         lookahead = None
00570                         continue
00571                     t = YaccSymbol()
00572                     t.type = 'error'
00573                     if hasattr(lookahead,"lineno"):
00574                         t.lineno = lookahead.lineno
00575                     t.value = lookahead
00576                     lookaheadstack.append(lookahead)
00577                     lookahead = t
00578                 else:
00579                     symstack.pop()
00580                     statestack.pop()
00581                     state = statestack[-1]       # Potential bug fix
00582 
00583                 continue
00584 
00585             # Call an error function here
00586             raise RuntimeError("yacc: internal parser error!!!\n")
00587 
00588     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00589     # parseopt().
00590     #
00591     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
00592     # Edit the debug version above, then copy any modifications to the method
00593     # below while removing #--! DEBUG sections.
00594     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00595 
00596 
00597     def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
00598         lookahead = None                 # Current lookahead symbol
00599         lookaheadstack = [ ]             # Stack of lookahead symbols
00600         actions = self.action            # Local reference to action table (to avoid lookup on self.)
00601         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
00602         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
00603         pslice  = YaccProduction(None)   # Production object passed to grammar rules
00604         errorcount = 0                   # Used during error recovery 
00605 
00606         # If no lexer was given, we will try to use the lex module
00607         if not lexer:
00608             lex = load_ply_lex()
00609             lexer = lex.lexer
00610         
00611         # Set up the lexer and parser objects on pslice
00612         pslice.lexer = lexer
00613         pslice.parser = self
00614 
00615         # If input was supplied, pass to lexer
00616         if input is not None:
00617             lexer.input(input)
00618 
00619         if tokenfunc is None:
00620            # Tokenize function
00621            get_token = lexer.token
00622         else:
00623            get_token = tokenfunc
00624 
00625         # Set up the state and symbol stacks
00626 
00627         statestack = [ ]                # Stack of parsing states
00628         self.statestack = statestack
00629         symstack   = [ ]                # Stack of grammar symbols
00630         self.symstack = symstack
00631 
00632         pslice.stack = symstack         # Put in the production
00633         errtoken   = None               # Err token
00634 
00635         # The start state is assumed to be (0,$end)
00636 
00637         statestack.append(0)
00638         sym = YaccSymbol()
00639         sym.type = '$end'
00640         symstack.append(sym)
00641         state = 0
00642         while 1:
00643             # Get the next symbol on the input.  If a lookahead symbol
00644             # is already set, we just use that. Otherwise, we'll pull
00645             # the next token off of the lookaheadstack or from the lexer
00646 
00647             if not lookahead:
00648                 if not lookaheadstack:
00649                     lookahead = get_token()     # Get the next token
00650                 else:
00651                     lookahead = lookaheadstack.pop()
00652                 if not lookahead:
00653                     lookahead = YaccSymbol()
00654                     lookahead.type = '$end'
00655 
00656             # Check the action table
00657             ltype = lookahead.type
00658             t = actions[state].get(ltype)
00659 
00660             if t is not None:
00661                 if t > 0:
00662                     # shift a symbol on the stack
00663                     statestack.append(t)
00664                     state = t
00665 
00666                     symstack.append(lookahead)
00667                     lookahead = None
00668 
00669                     # Decrease error count on successful shift
00670                     if errorcount: errorcount -=1
00671                     continue
00672 
00673                 if t < 0:
00674                     # reduce a symbol on the stack, emit a production
00675                     p = prod[-t]
00676                     pname = p.name
00677                     plen  = p.len
00678 
00679                     # Get production function
00680                     sym = YaccSymbol()
00681                     sym.type = pname       # Production name
00682                     sym.value = None
00683 
00684                     if plen:
00685                         targ = symstack[-plen-1:]
00686                         targ[0] = sym
00687 
00688                         # --! TRACKING
00689                         if tracking:
00690                            t1 = targ[1]
00691                            sym.lineno = t1.lineno
00692                            sym.lexpos = t1.lexpos
00693                            t1 = targ[-1]
00694                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
00695                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
00696 
00697                         # --! TRACKING
00698 
00699                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00700                         # The code enclosed in this section is duplicated 
00701                         # below as a performance optimization.  Make sure
00702                         # changes get made in both locations.
00703 
00704                         pslice.slice = targ
00705                         
00706                         try:
00707                             # Call the grammar rule with our special slice object
00708                             del symstack[-plen:]
00709                             del statestack[-plen:]
00710                             p.callable(pslice)
00711                             symstack.append(sym)
00712                             state = goto[statestack[-1]][pname]
00713                             statestack.append(state)
00714                         except SyntaxError:
00715                             # If an error was set. Enter error recovery state
00716                             lookaheadstack.append(lookahead)
00717                             symstack.pop()
00718                             statestack.pop()
00719                             state = statestack[-1]
00720                             sym.type = 'error'
00721                             lookahead = sym
00722                             errorcount = error_count
00723                             self.errorok = 0
00724                         continue
00725                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00726     
00727                     else:
00728 
00729                         # --! TRACKING
00730                         if tracking:
00731                            sym.lineno = lexer.lineno
00732                            sym.lexpos = lexer.lexpos
00733                         # --! TRACKING
00734 
00735                         targ = [ sym ]
00736 
00737                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00738                         # The code enclosed in this section is duplicated 
00739                         # above as a performance optimization.  Make sure
00740                         # changes get made in both locations.
00741 
00742                         pslice.slice = targ
00743 
00744                         try:
00745                             # Call the grammar rule with our special slice object
00746                             p.callable(pslice)
00747                             symstack.append(sym)
00748                             state = goto[statestack[-1]][pname]
00749                             statestack.append(state)
00750                         except SyntaxError:
00751                             # If an error was set. Enter error recovery state
00752                             lookaheadstack.append(lookahead)
00753                             symstack.pop()
00754                             statestack.pop()
00755                             state = statestack[-1]
00756                             sym.type = 'error'
00757                             lookahead = sym
00758                             errorcount = error_count
00759                             self.errorok = 0
00760                         continue
00761                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00762 
00763                 if t == 0:
00764                     n = symstack[-1]
00765                     return getattr(n,"value",None)
00766 
00767             if t == None:
00768 
00769                 # We have some kind of parsing error here.  To handle
00770                 # this, we are going to push the current token onto
00771                 # the tokenstack and replace it with an 'error' token.
00772                 # If there are any synchronization rules, they may
00773                 # catch it.
00774                 #
00775                 # In addition to pushing the error token, we call call
00776                 # the user defined p_error() function if this is the
00777                 # first syntax error.  This function is only called if
00778                 # errorcount == 0.
00779                 if errorcount == 0 or self.errorok:
00780                     errorcount = error_count
00781                     self.errorok = 0
00782                     errtoken = lookahead
00783                     if errtoken.type == '$end':
00784                         errtoken = None               # End of file!
00785                     if self.errorfunc:
00786                         global errok,token,restart
00787                         errok = self.errok        # Set some special functions available in error recovery
00788                         token = get_token
00789                         restart = self.restart
00790                         if errtoken and not hasattr(errtoken,'lexer'):
00791                             errtoken.lexer = lexer
00792                         tok = self.errorfunc(errtoken)
00793                         del errok, token, restart   # Delete special functions
00794 
00795                         if self.errorok:
00796                             # User must have done some kind of panic
00797                             # mode recovery on their own.  The
00798                             # returned token is the next lookahead
00799                             lookahead = tok
00800                             errtoken = None
00801                             continue
00802                     else:
00803                         if errtoken:
00804                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
00805                             else: lineno = 0
00806                             if lineno:
00807                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
00808                             else:
00809                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
00810                         else:
00811                             sys.stderr.write("yacc: Parse error in input. EOF\n")
00812                             return
00813 
00814                 else:
00815                     errorcount = error_count
00816 
00817                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
00818                 # entire parse has been rolled back and we're completely hosed.   The token is
00819                 # discarded and we just keep going.
00820 
00821                 if len(statestack) <= 1 and lookahead.type != '$end':
00822                     lookahead = None
00823                     errtoken = None
00824                     state = 0
00825                     # Nuke the pushback stack
00826                     del lookaheadstack[:]
00827                     continue
00828 
00829                 # case 2: the statestack has a couple of entries on it, but we're
00830                 # at the end of the file. nuke the top entry and generate an error token
00831 
00832                 # Start nuking entries on the stack
00833                 if lookahead.type == '$end':
00834                     # Whoa. We're really hosed here. Bail out
00835                     return
00836 
00837                 if lookahead.type != 'error':
00838                     sym = symstack[-1]
00839                     if sym.type == 'error':
00840                         # Hmmm. Error is on top of stack, we'll just nuke input
00841                         # symbol and continue
00842                         lookahead = None
00843                         continue
00844                     t = YaccSymbol()
00845                     t.type = 'error'
00846                     if hasattr(lookahead,"lineno"):
00847                         t.lineno = lookahead.lineno
00848                     t.value = lookahead
00849                     lookaheadstack.append(lookahead)
00850                     lookahead = t
00851                 else:
00852                     symstack.pop()
00853                     statestack.pop()
00854                     state = statestack[-1]       # Potential bug fix
00855 
00856                 continue
00857 
00858             # Call an error function here
00859             raise RuntimeError("yacc: internal parser error!!!\n")
00860 
00861     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00862     # parseopt_notrack().
00863     #
00864     # Optimized version of parseopt() with line number tracking removed. 
00865     # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
00866     # code in the #--! TRACKING sections
00867     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00868 
00869     def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
00870         lookahead = None                 # Current lookahead symbol
00871         lookaheadstack = [ ]             # Stack of lookahead symbols
00872         actions = self.action            # Local reference to action table (to avoid lookup on self.)
00873         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
00874         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
00875         pslice  = YaccProduction(None)   # Production object passed to grammar rules
00876         errorcount = 0                   # Used during error recovery 
00877 
00878         # If no lexer was given, we will try to use the lex module
00879         if not lexer:
00880             lex = load_ply_lex()
00881             lexer = lex.lexer
00882         
00883         # Set up the lexer and parser objects on pslice
00884         pslice.lexer = lexer
00885         pslice.parser = self
00886 
00887         # If input was supplied, pass to lexer
00888         if input is not None:
00889             lexer.input(input)
00890 
00891         if tokenfunc is None:
00892            # Tokenize function
00893            get_token = lexer.token
00894         else:
00895            get_token = tokenfunc
00896 
00897         # Set up the state and symbol stacks
00898 
00899         statestack = [ ]                # Stack of parsing states
00900         self.statestack = statestack
00901         symstack   = [ ]                # Stack of grammar symbols
00902         self.symstack = symstack
00903 
00904         pslice.stack = symstack         # Put in the production
00905         errtoken   = None               # Err token
00906 
00907         # The start state is assumed to be (0,$end)
00908 
00909         statestack.append(0)
00910         sym = YaccSymbol()
00911         sym.type = '$end'
00912         symstack.append(sym)
00913         state = 0
00914         while 1:
00915             # Get the next symbol on the input.  If a lookahead symbol
00916             # is already set, we just use that. Otherwise, we'll pull
00917             # the next token off of the lookaheadstack or from the lexer
00918 
00919             if not lookahead:
00920                 if not lookaheadstack:
00921                     lookahead = get_token()     # Get the next token
00922                 else:
00923                     lookahead = lookaheadstack.pop()
00924                 if not lookahead:
00925                     lookahead = YaccSymbol()
00926                     lookahead.type = '$end'
00927 
00928             # Check the action table
00929             ltype = lookahead.type
00930             t = actions[state].get(ltype)
00931 
00932             if t is not None:
00933                 if t > 0:
00934                     # shift a symbol on the stack
00935                     statestack.append(t)
00936                     state = t
00937 
00938                     symstack.append(lookahead)
00939                     lookahead = None
00940 
00941                     # Decrease error count on successful shift
00942                     if errorcount: errorcount -=1
00943                     continue
00944 
00945                 if t < 0:
00946                     # reduce a symbol on the stack, emit a production
00947                     p = prod[-t]
00948                     pname = p.name
00949                     plen  = p.len
00950 
00951                     # Get production function
00952                     sym = YaccSymbol()
00953                     sym.type = pname       # Production name
00954                     sym.value = None
00955 
00956                     if plen:
00957                         targ = symstack[-plen-1:]
00958                         targ[0] = sym
00959 
00960                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00961                         # The code enclosed in this section is duplicated 
00962                         # below as a performance optimization.  Make sure
00963                         # changes get made in both locations.
00964 
00965                         pslice.slice = targ
00966                         
00967                         try:
00968                             # Call the grammar rule with our special slice object
00969                             del symstack[-plen:]
00970                             del statestack[-plen:]
00971                             p.callable(pslice)
00972                             symstack.append(sym)
00973                             state = goto[statestack[-1]][pname]
00974                             statestack.append(state)
00975                         except SyntaxError:
00976                             # If an error was set. Enter error recovery state
00977                             lookaheadstack.append(lookahead)
00978                             symstack.pop()
00979                             statestack.pop()
00980                             state = statestack[-1]
00981                             sym.type = 'error'
00982                             lookahead = sym
00983                             errorcount = error_count
00984                             self.errorok = 0
00985                         continue
00986                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00987     
00988                     else:
00989 
00990                         targ = [ sym ]
00991 
00992                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00993                         # The code enclosed in this section is duplicated 
00994                         # above as a performance optimization.  Make sure
00995                         # changes get made in both locations.
00996 
00997                         pslice.slice = targ
00998 
00999                         try:
01000                             # Call the grammar rule with our special slice object
01001                             p.callable(pslice)
01002                             symstack.append(sym)
01003                             state = goto[statestack[-1]][pname]
01004                             statestack.append(state)
01005                         except SyntaxError:
01006                             # If an error was set. Enter error recovery state
01007                             lookaheadstack.append(lookahead)
01008                             symstack.pop()
01009                             statestack.pop()
01010                             state = statestack[-1]
01011                             sym.type = 'error'
01012                             lookahead = sym
01013                             errorcount = error_count
01014                             self.errorok = 0
01015                         continue
01016                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
01017 
01018                 if t == 0:
01019                     n = symstack[-1]
01020                     return getattr(n,"value",None)
01021 
01022             if t == None:
01023 
01024                 # We have some kind of parsing error here.  To handle
01025                 # this, we are going to push the current token onto
01026                 # the tokenstack and replace it with an 'error' token.
01027                 # If there are any synchronization rules, they may
01028                 # catch it.
01029                 #
01030                 # In addition to pushing the error token, we call call
01031                 # the user defined p_error() function if this is the
01032                 # first syntax error.  This function is only called if
01033                 # errorcount == 0.
01034                 if errorcount == 0 or self.errorok:
01035                     errorcount = error_count
01036                     self.errorok = 0
01037                     errtoken = lookahead
01038                     if errtoken.type == '$end':
01039                         errtoken = None               # End of file!
01040                     if self.errorfunc:
01041                         global errok,token,restart
01042                         errok = self.errok        # Set some special functions available in error recovery
01043                         token = get_token
01044                         restart = self.restart
01045                         if errtoken and not hasattr(errtoken,'lexer'):
01046                             errtoken.lexer = lexer
01047                         tok = self.errorfunc(errtoken)
01048                         del errok, token, restart   # Delete special functions
01049 
01050                         if self.errorok:
01051                             # User must have done some kind of panic
01052                             # mode recovery on their own.  The
01053                             # returned token is the next lookahead
01054                             lookahead = tok
01055                             errtoken = None
01056                             continue
01057                     else:
01058                         if errtoken:
01059                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
01060                             else: lineno = 0
01061                             if lineno:
01062                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
01063                             else:
01064                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
01065                         else:
01066                             sys.stderr.write("yacc: Parse error in input. EOF\n")
01067                             return
01068 
01069                 else:
01070                     errorcount = error_count
01071 
01072                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
01073                 # entire parse has been rolled back and we're completely hosed.   The token is
01074                 # discarded and we just keep going.
01075 
01076                 if len(statestack) <= 1 and lookahead.type != '$end':
01077                     lookahead = None
01078                     errtoken = None
01079                     state = 0
01080                     # Nuke the pushback stack
01081                     del lookaheadstack[:]
01082                     continue
01083 
01084                 # case 2: the statestack has a couple of entries on it, but we're
01085                 # at the end of the file. nuke the top entry and generate an error token
01086 
01087                 # Start nuking entries on the stack
01088                 if lookahead.type == '$end':
01089                     # Whoa. We're really hosed here. Bail out
01090                     return
01091 
01092                 if lookahead.type != 'error':
01093                     sym = symstack[-1]
01094                     if sym.type == 'error':
01095                         # Hmmm. Error is on top of stack, we'll just nuke input
01096                         # symbol and continue
01097                         lookahead = None
01098                         continue
01099                     t = YaccSymbol()
01100                     t.type = 'error'
01101                     if hasattr(lookahead,"lineno"):
01102                         t.lineno = lookahead.lineno
01103                     t.value = lookahead
01104                     lookaheadstack.append(lookahead)
01105                     lookahead = t
01106                 else:
01107                     symstack.pop()
01108                     statestack.pop()
01109                     state = statestack[-1]       # Potential bug fix
01110 
01111                 continue
01112 
01113             # Call an error function here
01114             raise RuntimeError("yacc: internal parser error!!!\n")
01115 
01116 # -----------------------------------------------------------------------------
01117 #                          === Grammar Representation ===
01118 #
01119 # The following functions, classes, and variables are used to represent and
01120 # manipulate the rules that make up a grammar. 
01121 # -----------------------------------------------------------------------------
01122 
01123 import re
01124 
01125 # regex matching identifiers
01126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
01127 
01128 # -----------------------------------------------------------------------------
01129 # class Production:
01130 #
01131 # This class stores the raw information about a single production or grammar rule.
01132 # A grammar rule refers to a specification such as this:
01133 #
01134 #       expr : expr PLUS term 
01135 #
01136 # Here are the basic attributes defined on all productions
01137 #
01138 #       name     - Name of the production.  For example 'expr'
01139 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
01140 #       prec     - Production precedence level
01141 #       number   - Production number.
01142 #       func     - Function that executes on reduce
01143 #       file     - File where production function is defined
01144 #       lineno   - Line number where production function is defined
01145 #
01146 # The following attributes are defined or optional.
01147 #
01148 #       len       - Length of the production (number of symbols on right hand side)
01149 #       usyms     - Set of unique symbols found in the production
01150 # -----------------------------------------------------------------------------
01151 
01152 class Production(object):
01153     reduced = 0
01154     def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
01155         self.name     = name
01156         self.prod     = tuple(prod)
01157         self.number   = number
01158         self.func     = func
01159         self.callable = None
01160         self.file     = file
01161         self.line     = line
01162         self.prec     = precedence
01163 
01164         # Internal settings used during table construction
01165         
01166         self.len  = len(self.prod)   # Length of the production
01167 
01168         # Create a list of unique production symbols used in the production
01169         self.usyms = [ ]             
01170         for s in self.prod:
01171             if s not in self.usyms:
01172                 self.usyms.append(s)
01173 
01174         # List of all LR items for the production
01175         self.lr_items = []
01176         self.lr_next = None
01177 
01178         # Create a string representation
01179         if self.prod:
01180             self.str = "%s -> %s" % (self.name," ".join(self.prod))
01181         else:
01182             self.str = "%s -> <empty>" % self.name
01183 
01184     def __str__(self):
01185         return self.str
01186 
01187     def __repr__(self):
01188         return "Production("+str(self)+")"
01189 
01190     def __len__(self):
01191         return len(self.prod)
01192 
01193     def __nonzero__(self):
01194         return 1
01195 
01196     def __getitem__(self,index):
01197         return self.prod[index]
01198             
01199     # Return the nth lr_item from the production (or None if at the end)
01200     def lr_item(self,n):
01201         if n > len(self.prod): return None
01202         p = LRItem(self,n)
01203 
01204         # Precompute the list of productions immediately following.  Hack. Remove later
01205         try:
01206             p.lr_after = Prodnames[p.prod[n+1]]
01207         except (IndexError,KeyError):
01208             p.lr_after = []
01209         try:
01210             p.lr_before = p.prod[n-1]
01211         except IndexError:
01212             p.lr_before = None
01213 
01214         return p
01215     
01216     # Bind the production function name to a callable
01217     def bind(self,pdict):
01218         if self.func:
01219             self.callable = pdict[self.func]
01220 
01221 # This class serves as a minimal standin for Production objects when
01222 # reading table data from files.   It only contains information
01223 # actually used by the LR parsing engine, plus some additional
01224 # debugging information.
01225 class MiniProduction(object):
01226     def __init__(self,str,name,len,func,file,line):
01227         self.name     = name
01228         self.len      = len
01229         self.func     = func
01230         self.callable = None
01231         self.file     = file
01232         self.line     = line
01233         self.str      = str
01234     def __str__(self):
01235         return self.str
01236     def __repr__(self):
01237         return "MiniProduction(%s)" % self.str
01238 
01239     # Bind the production function name to a callable
01240     def bind(self,pdict):
01241         if self.func:
01242             self.callable = pdict[self.func]
01243 
01244 
01245 # -----------------------------------------------------------------------------
01246 # class LRItem
01247 #
01248 # This class represents a specific stage of parsing a production rule.  For
01249 # example: 
01250 #
01251 #       expr : expr . PLUS term 
01252 #
01253 # In the above, the "." represents the current location of the parse.  Here
01254 # basic attributes:
01255 #
01256 #       name       - Name of the production.  For example 'expr'
01257 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
01258 #       number     - Production number.
01259 #
01260 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
01261 #                    then lr_next refers to 'expr -> expr PLUS . term'
01262 #       lr_index   - LR item index (location of the ".") in the prod list.
01263 #       lookaheads - LALR lookahead symbols for this item
01264 #       len        - Length of the production (number of symbols on right hand side)
01265 #       lr_after    - List of all productions that immediately follow
01266 #       lr_before   - Grammar symbol immediately before
01267 # -----------------------------------------------------------------------------
01268 
01269 class LRItem(object):
01270     def __init__(self,p,n):
01271         self.name       = p.name
01272         self.prod       = list(p.prod)
01273         self.number     = p.number
01274         self.lr_index   = n
01275         self.lookaheads = { }
01276         self.prod.insert(n,".")
01277         self.prod       = tuple(self.prod)
01278         self.len        = len(self.prod)
01279         self.usyms      = p.usyms
01280 
01281     def __str__(self):
01282         if self.prod:
01283             s = "%s -> %s" % (self.name," ".join(self.prod))
01284         else:
01285             s = "%s -> <empty>" % self.name
01286         return s
01287 
01288     def __repr__(self):
01289         return "LRItem("+str(self)+")"
01290 
01291 # -----------------------------------------------------------------------------
01292 # rightmost_terminal()
01293 #
01294 # Return the rightmost terminal from a list of symbols.  Used in add_production()
01295 # -----------------------------------------------------------------------------
01296 def rightmost_terminal(symbols, terminals):
01297     i = len(symbols) - 1
01298     while i >= 0:
01299         if symbols[i] in terminals:
01300             return symbols[i]
01301         i -= 1
01302     return None
01303 
01304 # -----------------------------------------------------------------------------
01305 #                           === GRAMMAR CLASS ===
01306 #
01307 # The following class represents the contents of the specified grammar along
01308 # with various computed properties such as first sets, follow sets, LR items, etc.
01309 # This data is used for critical parts of the table generation process later.
01310 # -----------------------------------------------------------------------------
01311 
01312 class GrammarError(YaccError): pass
01313 
01314 class Grammar(object):
01315     def __init__(self,terminals):
01316         self.Productions  = [None]  # A list of all of the productions.  The first
01317                                     # entry is always reserved for the purpose of
01318                                     # building an augmented grammar
01319 
01320         self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
01321                                     # productions of that nonterminal.
01322 
01323         self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
01324                                     # productions.
01325 
01326         self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
01327                                     # list of the rules where they are used.
01328 
01329         for term in terminals:
01330             self.Terminals[term] = []
01331 
01332         self.Terminals['error'] = []
01333 
01334         self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
01335                                     # of rule numbers where they are used.
01336 
01337         self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
01338 
01339         self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
01340 
01341         self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
01342                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
01343 
01344         self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
01345                                     # This is only used to provide error checking and to generate
01346                                     # a warning about unused precedence rules.
01347 
01348         self.Start = None           # Starting symbol for the grammar
01349 
01350 
01351     def __len__(self):
01352         return len(self.Productions)
01353 
01354     def __getitem__(self,index):
01355         return self.Productions[index]
01356 
01357     # -----------------------------------------------------------------------------
01358     # set_precedence()
01359     #
01360     # Sets the precedence for a given terminal. assoc is the associativity such as
01361     # 'left','right', or 'nonassoc'.  level is a numeric level.
01362     #
01363     # -----------------------------------------------------------------------------
01364 
01365     def set_precedence(self,term,assoc,level):
01366         assert self.Productions == [None],"Must call set_precedence() before add_production()"
01367         if term in self.Precedence:
01368             raise GrammarError("Precedence already specified for terminal '%s'" % term)
01369         if assoc not in ['left','right','nonassoc']:
01370             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
01371         self.Precedence[term] = (assoc,level)
01372  
01373     # -----------------------------------------------------------------------------
01374     # add_production()
01375     #
01376     # Given an action function, this function assembles a production rule and
01377     # computes its precedence level.
01378     #
01379     # The production rule is supplied as a list of symbols.   For example,
01380     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
01381     # symbols ['expr','PLUS','term'].
01382     #
01383     # Precedence is determined by the precedence of the right-most non-terminal
01384     # or the precedence of a terminal specified by %prec.
01385     #
01386     # A variety of error checks are performed to make sure production symbols
01387     # are valid and that %prec is used correctly.
01388     # -----------------------------------------------------------------------------
01389 
01390     def add_production(self,prodname,syms,func=None,file='',line=0):
01391 
01392         if prodname in self.Terminals:
01393             raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
01394         if prodname == 'error':
01395             raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
01396         if not _is_identifier.match(prodname):
01397             raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
01398 
01399         # Look for literal tokens 
01400         for n,s in enumerate(syms):
01401             if s[0] in "'\"":
01402                  try:
01403                      c = eval(s)
01404                      if (len(c) > 1):
01405                           raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
01406                      if not c in self.Terminals:
01407                           self.Terminals[c] = []
01408                      syms[n] = c
01409                      continue
01410                  except SyntaxError:
01411                      pass
01412             if not _is_identifier.match(s) and s != '%prec':
01413                 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
01414         
01415         # Determine the precedence level
01416         if '%prec' in syms:
01417             if syms[-1] == '%prec':
01418                 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
01419             if syms[-2] != '%prec':
01420                 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
01421             precname = syms[-1]
01422             prodprec = self.Precedence.get(precname,None)
01423             if not prodprec:
01424                 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
01425             else:
01426                 self.UsedPrecedence[precname] = 1
01427             del syms[-2:]     # Drop %prec from the rule
01428         else:
01429             # If no %prec, precedence is determined by the rightmost terminal symbol
01430             precname = rightmost_terminal(syms,self.Terminals)
01431             prodprec = self.Precedence.get(precname,('right',0)) 
01432             
01433         # See if the rule is already in the rulemap
01434         map = "%s -> %s" % (prodname,syms)
01435         if map in self.Prodmap:
01436             m = self.Prodmap[map]
01437             raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
01438                                "Previous definition at %s:%d" % (m.file, m.line))
01439 
01440         # From this point on, everything is valid.  Create a new Production instance
01441         pnumber  = len(self.Productions)
01442         if not prodname in self.Nonterminals:
01443             self.Nonterminals[prodname] = [ ]
01444 
01445         # Add the production number to Terminals and Nonterminals
01446         for t in syms:
01447             if t in self.Terminals:
01448                 self.Terminals[t].append(pnumber)
01449             else:
01450                 if not t in self.Nonterminals:
01451                     self.Nonterminals[t] = [ ]
01452                 self.Nonterminals[t].append(pnumber)
01453 
01454         # Create a production and add it to the list of productions
01455         p = Production(pnumber,prodname,syms,prodprec,func,file,line)
01456         self.Productions.append(p)
01457         self.Prodmap[map] = p
01458 
01459         # Add to the global productions list
01460         try:
01461             self.Prodnames[prodname].append(p)
01462         except KeyError:
01463             self.Prodnames[prodname] = [ p ]
01464         return 0
01465 
01466     # -----------------------------------------------------------------------------
01467     # set_start()
01468     #
01469     # Sets the starting symbol and creates the augmented grammar.  Production 
01470     # rule 0 is S' -> start where start is the start symbol.
01471     # -----------------------------------------------------------------------------
01472 
01473     def set_start(self,start=None):
01474         if not start:
01475             start = self.Productions[1].name
01476         if start not in self.Nonterminals:
01477             raise GrammarError("start symbol %s undefined" % start)
01478         self.Productions[0] = Production(0,"S'",[start])
01479         self.Nonterminals[start].append(0)
01480         self.Start = start
01481 
01482     # -----------------------------------------------------------------------------
01483     # find_unreachable()
01484     #
01485     # Find all of the nonterminal symbols that can't be reached from the starting
01486     # symbol.  Returns a list of nonterminals that can't be reached.
01487     # -----------------------------------------------------------------------------
01488 
01489     def find_unreachable(self):
01490         
01491         # Mark all symbols that are reachable from a symbol s
01492         def mark_reachable_from(s):
01493             if reachable[s]:
01494                 # We've already reached symbol s.
01495                 return
01496             reachable[s] = 1
01497             for p in self.Prodnames.get(s,[]):
01498                 for r in p.prod:
01499                     mark_reachable_from(r)
01500 
01501         reachable   = { }
01502         for s in list(self.Terminals) + list(self.Nonterminals):
01503             reachable[s] = 0
01504 
01505         mark_reachable_from( self.Productions[0].prod[0] )
01506 
01507         return [s for s in list(self.Nonterminals)
01508                         if not reachable[s]]
01509     
01510     # -----------------------------------------------------------------------------
01511     # infinite_cycles()
01512     #
01513     # This function looks at the various parsing rules and tries to detect
01514     # infinite recursion cycles (grammar rules where there is no possible way
01515     # to derive a string of only terminals).
01516     # -----------------------------------------------------------------------------
01517 
01518     def infinite_cycles(self):
01519         terminates = {}
01520 
01521         # Terminals:
01522         for t in self.Terminals:
01523             terminates[t] = 1
01524 
01525         terminates['$end'] = 1
01526 
01527         # Nonterminals:
01528 
01529         # Initialize to false:
01530         for n in self.Nonterminals:
01531             terminates[n] = 0
01532 
01533         # Then propagate termination until no change:
01534         while 1:
01535             some_change = 0
01536             for (n,pl) in self.Prodnames.items():
01537                 # Nonterminal n terminates iff any of its productions terminates.
01538                 for p in pl:
01539                     # Production p terminates iff all of its rhs symbols terminate.
01540                     for s in p.prod:
01541                         if not terminates[s]:
01542                             # The symbol s does not terminate,
01543                             # so production p does not terminate.
01544                             p_terminates = 0
01545                             break
01546                     else:
01547                         # didn't break from the loop,
01548                         # so every symbol s terminates
01549                         # so production p terminates.
01550                         p_terminates = 1
01551 
01552                     if p_terminates:
01553                         # symbol n terminates!
01554                         if not terminates[n]:
01555                             terminates[n] = 1
01556                             some_change = 1
01557                         # Don't need to consider any more productions for this n.
01558                         break
01559 
01560             if not some_change:
01561                 break
01562 
01563         infinite = []
01564         for (s,term) in terminates.items():
01565             if not term:
01566                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
01567                     # s is used-but-not-defined, and we've already warned of that,
01568                     # so it would be overkill to say that it's also non-terminating.
01569                     pass
01570                 else:
01571                     infinite.append(s)
01572 
01573         return infinite
01574 
01575 
01576     # -----------------------------------------------------------------------------
01577     # undefined_symbols()
01578     #
01579     # Find all symbols that were used the grammar, but not defined as tokens or
01580     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
01581     # and prod is the production where the symbol was used. 
01582     # -----------------------------------------------------------------------------
01583     def undefined_symbols(self):
01584         result = []
01585         for p in self.Productions:
01586             if not p: continue
01587 
01588             for s in p.prod:
01589                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
01590                     result.append((s,p))
01591         return result
01592 
01593     # -----------------------------------------------------------------------------
01594     # unused_terminals()
01595     #
01596     # Find all terminals that were defined, but not used by the grammar.  Returns
01597     # a list of all symbols.
01598     # -----------------------------------------------------------------------------
01599     def unused_terminals(self):
01600         unused_tok = []
01601         for s,v in self.Terminals.items():
01602             if s != 'error' and not v:
01603                 unused_tok.append(s)
01604 
01605         return unused_tok
01606 
01607     # ------------------------------------------------------------------------------
01608     # unused_rules()
01609     #
01610     # Find all grammar rules that were defined,  but not used (maybe not reachable)
01611     # Returns a list of productions.
01612     # ------------------------------------------------------------------------------
01613 
01614     def unused_rules(self):
01615         unused_prod = []
01616         for s,v in self.Nonterminals.items():
01617             if not v:
01618                 p = self.Prodnames[s][0]
01619                 unused_prod.append(p)
01620         return unused_prod
01621 
01622     # -----------------------------------------------------------------------------
01623     # unused_precedence()
01624     #
01625     # Returns a list of tuples (term,precedence) corresponding to precedence
01626     # rules that were never used by the grammar.  term is the name of the terminal
01627     # on which precedence was applied and precedence is a string such as 'left' or
01628     # 'right' corresponding to the type of precedence. 
01629     # -----------------------------------------------------------------------------
01630 
01631     def unused_precedence(self):
01632         unused = []
01633         for termname in self.Precedence:
01634             if not (termname in self.Terminals or termname in self.UsedPrecedence):
01635                 unused.append((termname,self.Precedence[termname][0]))
01636                 
01637         return unused
01638 
01639     # -------------------------------------------------------------------------
01640     # _first()
01641     #
01642     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
01643     #
01644     # During execution of compute_first1, the result may be incomplete.
01645     # Afterward (e.g., when called from compute_follow()), it will be complete.
01646     # -------------------------------------------------------------------------
01647     def _first(self,beta):
01648 
01649         # We are computing First(x1,x2,x3,...,xn)
01650         result = [ ]
01651         for x in beta:
01652             x_produces_empty = 0
01653 
01654             # Add all the non-<empty> symbols of First[x] to the result.
01655             for f in self.First[x]:
01656                 if f == '<empty>':
01657                     x_produces_empty = 1
01658                 else:
01659                     if f not in result: result.append(f)
01660 
01661             if x_produces_empty:
01662                 # We have to consider the next x in beta,
01663                 # i.e. stay in the loop.
01664                 pass
01665             else:
01666                 # We don't have to consider any further symbols in beta.
01667                 break
01668         else:
01669             # There was no 'break' from the loop,
01670             # so x_produces_empty was true for all x in beta,
01671             # so beta produces empty as well.
01672             result.append('<empty>')
01673 
01674         return result
01675 
01676     # -------------------------------------------------------------------------
01677     # compute_first()
01678     #
01679     # Compute the value of FIRST1(X) for all symbols
01680     # -------------------------------------------------------------------------
01681     def compute_first(self):
01682         if self.First:
01683             return self.First
01684 
01685         # Terminals:
01686         for t in self.Terminals:
01687             self.First[t] = [t]
01688 
01689         self.First['$end'] = ['$end']
01690 
01691         # Nonterminals:
01692 
01693         # Initialize to the empty set:
01694         for n in self.Nonterminals:
01695             self.First[n] = []
01696 
01697         # Then propagate symbols until no change:
01698         while 1:
01699             some_change = 0
01700             for n in self.Nonterminals:
01701                 for p in self.Prodnames[n]:
01702                     for f in self._first(p.prod):
01703                         if f not in self.First[n]:
01704                             self.First[n].append( f )
01705                             some_change = 1
01706             if not some_change:
01707                 break
01708         
01709         return self.First
01710 
01711     # ---------------------------------------------------------------------
01712     # compute_follow()
01713     #
01714     # Computes all of the follow sets for every non-terminal symbol.  The
01715     # follow set is the set of all symbols that might follow a given
01716     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
01717     # ---------------------------------------------------------------------
01718     def compute_follow(self,start=None):
01719         # If already computed, return the result
01720         if self.Follow:
01721             return self.Follow
01722 
01723         # If first sets not computed yet, do that first.
01724         if not self.First:
01725             self.compute_first()
01726 
01727         # Add '$end' to the follow list of the start symbol
01728         for k in self.Nonterminals:
01729             self.Follow[k] = [ ]
01730 
01731         if not start:
01732             start = self.Productions[1].name
01733 
01734         self.Follow[start] = [ '$end' ]
01735 
01736         while 1:
01737             didadd = 0
01738             for p in self.Productions[1:]:
01739                 # Here is the production set
01740                 for i in range(len(p.prod)):
01741                     B = p.prod[i]
01742                     if B in self.Nonterminals:
01743                         # Okay. We got a non-terminal in a production
01744                         fst = self._first(p.prod[i+1:])
01745                         hasempty = 0
01746                         for f in fst:
01747                             if f != '<empty>' and f not in self.Follow[B]:
01748                                 self.Follow[B].append(f)
01749                                 didadd = 1
01750                             if f == '<empty>':
01751                                 hasempty = 1
01752                         if hasempty or i == (len(p.prod)-1):
01753                             # Add elements of follow(a) to follow(b)
01754                             for f in self.Follow[p.name]:
01755                                 if f not in self.Follow[B]:
01756                                     self.Follow[B].append(f)
01757                                     didadd = 1
01758             if not didadd: break
01759         return self.Follow
01760 
01761 
01762     # -----------------------------------------------------------------------------
01763     # build_lritems()
01764     #
01765     # This function walks the list of productions and builds a complete set of the
01766     # LR items.  The LR items are stored in two ways:  First, they are uniquely
01767     # numbered and placed in the list _lritems.  Second, a linked list of LR items
01768     # is built for each production.  For example:
01769     #
01770     #   E -> E PLUS E
01771     #
01772     # Creates the list
01773     #
01774     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
01775     # -----------------------------------------------------------------------------
01776 
01777     def build_lritems(self):
01778         for p in self.Productions:
01779             lastlri = p
01780             i = 0
01781             lr_items = []
01782             while 1:
01783                 if i > len(p):
01784                     lri = None
01785                 else:
01786                     lri = LRItem(p,i)
01787                     # Precompute the list of productions immediately following
01788                     try:
01789                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
01790                     except (IndexError,KeyError):
01791                         lri.lr_after = []
01792                     try:
01793                         lri.lr_before = lri.prod[i-1]
01794                     except IndexError:
01795                         lri.lr_before = None
01796 
01797                 lastlri.lr_next = lri
01798                 if not lri: break
01799                 lr_items.append(lri)
01800                 lastlri = lri
01801                 i += 1
01802             p.lr_items = lr_items
01803 
01804 # -----------------------------------------------------------------------------
01805 #                            == Class LRTable ==
01806 #
01807 # This basic class represents a basic table of LR parsing information.  
01808 # Methods for generating the tables are not defined here.  They are defined
01809 # in the derived class LRGeneratedTable.
01810 # -----------------------------------------------------------------------------
01811 
01812 class VersionError(YaccError): pass
01813 
01814 class LRTable(object):
01815     def __init__(self):
01816         self.lr_action = None
01817         self.lr_goto = None
01818         self.lr_productions = None
01819         self.lr_method = None
01820 
01821     def read_table(self,module):
01822         if isinstance(module,types.ModuleType):
01823             parsetab = module
01824         else:
01825             if sys.version_info[0] < 3:
01826                 exec("import %s as parsetab" % module)
01827             else:
01828                 env = { }
01829                 exec("import %s as parsetab" % module, env, env)
01830                 parsetab = env['parsetab']
01831 
01832         if parsetab._tabversion != __tabversion__:
01833             raise VersionError("yacc table file version is out of date")
01834 
01835         self.lr_action = parsetab._lr_action
01836         self.lr_goto = parsetab._lr_goto
01837 
01838         self.lr_productions = []
01839         for p in parsetab._lr_productions:
01840             self.lr_productions.append(MiniProduction(*p))
01841 
01842         self.lr_method = parsetab._lr_method
01843         return parsetab._lr_signature
01844 
01845     def read_pickle(self,filename):
01846         try:
01847             import cPickle as pickle
01848         except ImportError:
01849             import pickle
01850 
01851         in_f = open(filename,"rb")
01852 
01853         tabversion = pickle.load(in_f)
01854         if tabversion != __tabversion__:
01855             raise VersionError("yacc table file version is out of date")
01856         self.lr_method = pickle.load(in_f)
01857         signature      = pickle.load(in_f)
01858         self.lr_action = pickle.load(in_f)
01859         self.lr_goto   = pickle.load(in_f)
01860         productions    = pickle.load(in_f)
01861 
01862         self.lr_productions = []
01863         for p in productions:
01864             self.lr_productions.append(MiniProduction(*p))
01865 
01866         in_f.close()
01867         return signature
01868 
01869     # Bind all production function names to callable objects in pdict
01870     def bind_callables(self,pdict):
01871         for p in self.lr_productions:
01872             p.bind(pdict)
01873     
01874 # -----------------------------------------------------------------------------
01875 #                           === LR Generator ===
01876 #
01877 # The following classes and functions are used to generate LR parsing tables on 
01878 # a grammar.
01879 # -----------------------------------------------------------------------------
01880 
01881 # -----------------------------------------------------------------------------
01882 # digraph()
01883 # traverse()
01884 #
01885 # The following two functions are used to compute set valued functions
01886 # of the form:
01887 #
01888 #     F(x) = F'(x) U U{F(y) | x R y}
01889 #
01890 # This is used to compute the values of Read() sets as well as FOLLOW sets
01891 # in LALR(1) generation.
01892 #
01893 # Inputs:  X    - An input set
01894 #          R    - A relation
01895 #          FP   - Set-valued function
01896 # ------------------------------------------------------------------------------
01897 
01898 def digraph(X,R,FP):
01899     N = { }
01900     for x in X:
01901        N[x] = 0
01902     stack = []
01903     F = { }
01904     for x in X:
01905         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
01906     return F
01907 
01908 def traverse(x,N,stack,F,X,R,FP):
01909     stack.append(x)
01910     d = len(stack)
01911     N[x] = d
01912     F[x] = FP(x)             # F(X) <- F'(x)
01913 
01914     rel = R(x)               # Get y's related to x
01915     for y in rel:
01916         if N[y] == 0:
01917              traverse(y,N,stack,F,X,R,FP)
01918         N[x] = min(N[x],N[y])
01919         for a in F.get(y,[]):
01920             if a not in F[x]: F[x].append(a)
01921     if N[x] == d:
01922        N[stack[-1]] = MAXINT
01923        F[stack[-1]] = F[x]
01924        element = stack.pop()
01925        while element != x:
01926            N[stack[-1]] = MAXINT
01927            F[stack[-1]] = F[x]
01928            element = stack.pop()
01929 
01930 class LALRError(YaccError): pass
01931 
01932 # -----------------------------------------------------------------------------
01933 #                             == LRGeneratedTable ==
01934 #
01935 # This class implements the LR table generation algorithm.  There are no
01936 # public methods except for write()
01937 # -----------------------------------------------------------------------------
01938 
01939 class LRGeneratedTable(LRTable):
01940     def __init__(self,grammar,method='LALR',log=None):
01941         if method not in ['SLR','LALR']:
01942             raise LALRError("Unsupported method %s" % method)
01943 
01944         self.grammar = grammar
01945         self.lr_method = method
01946 
01947         # Set up the logger
01948         if not log:
01949             log = NullLogger()
01950         self.log = log
01951 
01952         # Internal attributes
01953         self.lr_action     = {}        # Action table
01954         self.lr_goto       = {}        # Goto table
01955         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
01956         self.lr_goto_cache = {}        # Cache of computed gotos
01957         self.lr0_cidhash   = {}        # Cache of closures
01958 
01959         self._add_count    = 0         # Internal counter used to detect cycles
01960 
01961         # Diagonistic information filled in by the table generator
01962         self.sr_conflict   = 0
01963         self.rr_conflict   = 0
01964         self.conflicts     = []        # List of conflicts
01965 
01966         self.sr_conflicts  = []
01967         self.rr_conflicts  = []
01968 
01969         # Build the tables
01970         self.grammar.build_lritems()
01971         self.grammar.compute_first()
01972         self.grammar.compute_follow()
01973         self.lr_parse_table()
01974 
01975     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
01976 
01977     def lr0_closure(self,I):
01978         self._add_count += 1
01979 
01980         # Add everything in I to J
01981         J = I[:]
01982         didadd = 1
01983         while didadd:
01984             didadd = 0
01985             for j in J:
01986                 for x in j.lr_after:
01987                     if getattr(x,"lr0_added",0) == self._add_count: continue
01988                     # Add B --> .G to J
01989                     J.append(x.lr_next)
01990                     x.lr0_added = self._add_count
01991                     didadd = 1
01992 
01993         return J
01994 
01995     # Compute the LR(0) goto function goto(I,X) where I is a set
01996     # of LR(0) items and X is a grammar symbol.   This function is written
01997     # in a way that guarantees uniqueness of the generated goto sets
01998     # (i.e. the same goto set will never be returned as two different Python
01999     # objects).  With uniqueness, we can later do fast set comparisons using
02000     # id(obj) instead of element-wise comparison.
02001 
02002     def lr0_goto(self,I,x):
02003         # First we look for a previously cached entry
02004         g = self.lr_goto_cache.get((id(I),x),None)
02005         if g: return g
02006 
02007         # Now we generate the goto set in a way that guarantees uniqueness
02008         # of the result
02009 
02010         s = self.lr_goto_cache.get(x,None)
02011         if not s:
02012             s = { }
02013             self.lr_goto_cache[x] = s
02014 
02015         gs = [ ]
02016         for p in I:
02017             n = p.lr_next
02018             if n and n.lr_before == x:
02019                 s1 = s.get(id(n),None)
02020                 if not s1:
02021                     s1 = { }
02022                     s[id(n)] = s1
02023                 gs.append(n)
02024                 s = s1
02025         g = s.get('$end',None)
02026         if not g:
02027             if gs:
02028                 g = self.lr0_closure(gs)
02029                 s['$end'] = g
02030             else:
02031                 s['$end'] = gs
02032         self.lr_goto_cache[(id(I),x)] = g
02033         return g
02034 
02035     # Compute the LR(0) sets of item function
02036     def lr0_items(self):
02037 
02038         C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
02039         i = 0
02040         for I in C:
02041             self.lr0_cidhash[id(I)] = i
02042             i += 1
02043 
02044         # Loop over the items in C and each grammar symbols
02045         i = 0
02046         while i < len(C):
02047             I = C[i]
02048             i += 1
02049 
02050             # Collect all of the symbols that could possibly be in the goto(I,X) sets
02051             asyms = { }
02052             for ii in I:
02053                 for s in ii.usyms:
02054                     asyms[s] = None
02055 
02056             for x in asyms:
02057                 g = self.lr0_goto(I,x)
02058                 if not g:  continue
02059                 if id(g) in self.lr0_cidhash: continue
02060                 self.lr0_cidhash[id(g)] = len(C)
02061                 C.append(g)
02062 
02063         return C
02064 
02065     # -----------------------------------------------------------------------------
02066     #                       ==== LALR(1) Parsing ====
02067     #
02068     # LALR(1) parsing is almost exactly the same as SLR except that instead of
02069     # relying upon Follow() sets when performing reductions, a more selective
02070     # lookahead set that incorporates the state of the LR(0) machine is utilized.
02071     # Thus, we mainly just have to focus on calculating the lookahead sets.
02072     #
02073     # The method used here is due to DeRemer and Pennelo (1982).
02074     #
02075     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
02076     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
02077     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
02078     #
02079     # Further details can also be found in:
02080     #
02081     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
02082     #      McGraw-Hill Book Company, (1985).
02083     #
02084     # -----------------------------------------------------------------------------
02085 
02086     # -----------------------------------------------------------------------------
02087     # compute_nullable_nonterminals()
02088     #
02089     # Creates a dictionary containing all of the non-terminals that might produce
02090     # an empty production.
02091     # -----------------------------------------------------------------------------
02092 
02093     def compute_nullable_nonterminals(self):
02094         nullable = {}
02095         num_nullable = 0
02096         while 1:
02097            for p in self.grammar.Productions[1:]:
02098                if p.len == 0:
02099                     nullable[p.name] = 1
02100                     continue
02101                for t in p.prod:
02102                     if not t in nullable: break
02103                else:
02104                     nullable[p.name] = 1
02105            if len(nullable) == num_nullable: break
02106            num_nullable = len(nullable)
02107         return nullable
02108 
02109     # -----------------------------------------------------------------------------
02110     # find_nonterminal_trans(C)
02111     #
02112     # Given a set of LR(0) items, this functions finds all of the non-terminal
02113     # transitions.    These are transitions in which a dot appears immediately before
02114     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
02115     # is the state number and N is the nonterminal symbol.
02116     #
02117     # The input C is the set of LR(0) items.
02118     # -----------------------------------------------------------------------------
02119 
02120     def find_nonterminal_transitions(self,C):
02121          trans = []
02122          for state in range(len(C)):
02123              for p in C[state]:
02124                  if p.lr_index < p.len - 1:
02125                       t = (state,p.prod[p.lr_index+1])
02126                       if t[1] in self.grammar.Nonterminals:
02127                             if t not in trans: trans.append(t)
02128              state = state + 1
02129          return trans
02130 
02131     # -----------------------------------------------------------------------------
02132     # dr_relation()
02133     #
02134     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
02135     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
02136     #
02137     # Returns a list of terminals.
02138     # -----------------------------------------------------------------------------
02139 
02140     def dr_relation(self,C,trans,nullable):
02141         dr_set = { }
02142         state,N = trans
02143         terms = []
02144 
02145         g = self.lr0_goto(C[state],N)
02146         for p in g:
02147            if p.lr_index < p.len - 1:
02148                a = p.prod[p.lr_index+1]
02149                if a in self.grammar.Terminals:
02150                    if a not in terms: terms.append(a)
02151 
02152         # This extra bit is to handle the start state
02153         if state == 0 and N == self.grammar.Productions[0].prod[0]:
02154            terms.append('$end')
02155 
02156         return terms
02157 
02158     # -----------------------------------------------------------------------------
02159     # reads_relation()
02160     #
02161     # Computes the READS() relation (p,A) READS (t,C).
02162     # -----------------------------------------------------------------------------
02163 
02164     def reads_relation(self,C, trans, empty):
02165         # Look for empty transitions
02166         rel = []
02167         state, N = trans
02168 
02169         g = self.lr0_goto(C[state],N)
02170         j = self.lr0_cidhash.get(id(g),-1)
02171         for p in g:
02172             if p.lr_index < p.len - 1:
02173                  a = p.prod[p.lr_index + 1]
02174                  if a in empty:
02175                       rel.append((j,a))
02176 
02177         return rel
02178 
02179     # -----------------------------------------------------------------------------
02180     # compute_lookback_includes()
02181     #
02182     # Determines the lookback and includes relations
02183     #
02184     # LOOKBACK:
02185     #
02186     # This relation is determined by running the LR(0) state machine forward.
02187     # For example, starting with a production "N : . A B C", we run it forward
02188     # to obtain "N : A B C ."   We then build a relationship between this final
02189     # state and the starting state.   These relationships are stored in a dictionary
02190     # lookdict.
02191     #
02192     # INCLUDES:
02193     #
02194     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
02195     #
02196     # This relation is used to determine non-terminal transitions that occur
02197     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
02198     # if the following holds:
02199     #
02200     #       B -> LAT, where T -> epsilon and p' -L-> p
02201     #
02202     # L is essentially a prefix (which may be empty), T is a suffix that must be
02203     # able to derive an empty string.  State p' must lead to state p with the string L.
02204     #
02205     # -----------------------------------------------------------------------------
02206 
02207     def compute_lookback_includes(self,C,trans,nullable):
02208 
02209         lookdict = {}          # Dictionary of lookback relations
02210         includedict = {}       # Dictionary of include relations
02211 
02212         # Make a dictionary of non-terminal transitions
02213         dtrans = {}
02214         for t in trans:
02215             dtrans[t] = 1
02216 
02217         # Loop over all transitions and compute lookbacks and includes
02218         for state,N in trans:
02219             lookb = []
02220             includes = []
02221             for p in C[state]:
02222                 if p.name != N: continue
02223 
02224                 # Okay, we have a name match.  We now follow the production all the way
02225                 # through the state machine until we get the . on the right hand side
02226 
02227                 lr_index = p.lr_index
02228                 j = state
02229                 while lr_index < p.len - 1:
02230                      lr_index = lr_index + 1
02231                      t = p.prod[lr_index]
02232 
02233                      # Check to see if this symbol and state are a non-terminal transition
02234                      if (j,t) in dtrans:
02235                            # Yes.  Okay, there is some chance that this is an includes relation
02236                            # the only way to know for certain is whether the rest of the
02237                            # production derives empty
02238 
02239                            li = lr_index + 1
02240                            while li < p.len:
02241                                 if p.prod[li] in self.grammar.Terminals: break      # No forget it
02242                                 if not p.prod[li] in nullable: break
02243                                 li = li + 1
02244                            else:
02245                                 # Appears to be a relation between (j,t) and (state,N)
02246                                 includes.append((j,t))
02247 
02248                      g = self.lr0_goto(C[j],t)               # Go to next set
02249                      j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
02250 
02251                 # When we get here, j is the final state, now we have to locate the production
02252                 for r in C[j]:
02253                      if r.name != p.name: continue
02254                      if r.len != p.len:   continue
02255                      i = 0
02256                      # This look is comparing a production ". A B C" with "A B C ."
02257                      while i < r.lr_index:
02258                           if r.prod[i] != p.prod[i+1]: break
02259                           i = i + 1
02260                      else:
02261                           lookb.append((j,r))
02262             for i in includes:
02263                  if not i in includedict: includedict[i] = []
02264                  includedict[i].append((state,N))
02265             lookdict[(state,N)] = lookb
02266 
02267         return lookdict,includedict
02268 
02269     # -----------------------------------------------------------------------------
02270     # compute_read_sets()
02271     #
02272     # Given a set of LR(0) items, this function computes the read sets.
02273     #
02274     # Inputs:  C        =  Set of LR(0) items
02275     #          ntrans   = Set of nonterminal transitions
02276     #          nullable = Set of empty transitions
02277     #
02278     # Returns a set containing the read sets
02279     # -----------------------------------------------------------------------------
02280 
02281     def compute_read_sets(self,C, ntrans, nullable):
02282         FP = lambda x: self.dr_relation(C,x,nullable)
02283         R =  lambda x: self.reads_relation(C,x,nullable)
02284         F = digraph(ntrans,R,FP)
02285         return F
02286 
02287     # -----------------------------------------------------------------------------
02288     # compute_follow_sets()
02289     #
02290     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
02291     # and an include set, this function computes the follow sets
02292     #
02293     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
02294     #
02295     # Inputs:
02296     #            ntrans     = Set of nonterminal transitions
02297     #            readsets   = Readset (previously computed)
02298     #            inclsets   = Include sets (previously computed)
02299     #
02300     # Returns a set containing the follow sets
02301     # -----------------------------------------------------------------------------
02302 
02303     def compute_follow_sets(self,ntrans,readsets,inclsets):
02304          FP = lambda x: readsets[x]
02305          R  = lambda x: inclsets.get(x,[])
02306          F = digraph(ntrans,R,FP)
02307          return F
02308 
02309     # -----------------------------------------------------------------------------
02310     # add_lookaheads()
02311     #
02312     # Attaches the lookahead symbols to grammar rules.
02313     #
02314     # Inputs:    lookbacks         -  Set of lookback relations
02315     #            followset         -  Computed follow set
02316     #
02317     # This function directly attaches the lookaheads to productions contained
02318     # in the lookbacks set
02319     # -----------------------------------------------------------------------------
02320 
02321     def add_lookaheads(self,lookbacks,followset):
02322         for trans,lb in lookbacks.items():
02323             # Loop over productions in lookback
02324             for state,p in lb:
02325                  if not state in p.lookaheads:
02326                       p.lookaheads[state] = []
02327                  f = followset.get(trans,[])
02328                  for a in f:
02329                       if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
02330 
02331     # -----------------------------------------------------------------------------
02332     # add_lalr_lookaheads()
02333     #
02334     # This function does all of the work of adding lookahead information for use
02335     # with LALR parsing
02336     # -----------------------------------------------------------------------------
02337 
02338     def add_lalr_lookaheads(self,C):
02339         # Determine all of the nullable nonterminals
02340         nullable = self.compute_nullable_nonterminals()
02341 
02342         # Find all non-terminal transitions
02343         trans = self.find_nonterminal_transitions(C)
02344 
02345         # Compute read sets
02346         readsets = self.compute_read_sets(C,trans,nullable)
02347 
02348         # Compute lookback/includes relations
02349         lookd, included = self.compute_lookback_includes(C,trans,nullable)
02350 
02351         # Compute LALR FOLLOW sets
02352         followsets = self.compute_follow_sets(trans,readsets,included)
02353 
02354         # Add all of the lookaheads
02355         self.add_lookaheads(lookd,followsets)
02356 
02357     # -----------------------------------------------------------------------------
02358     # lr_parse_table()
02359     #
02360     # This function constructs the parse tables for SLR or LALR
02361     # -----------------------------------------------------------------------------
02362     def lr_parse_table(self):
02363         Productions = self.grammar.Productions
02364         Precedence  = self.grammar.Precedence
02365         goto   = self.lr_goto         # Goto array
02366         action = self.lr_action       # Action array
02367         log    = self.log             # Logger for output
02368 
02369         actionp = { }                 # Action production array (temporary)
02370         
02371         log.info("Parsing method: %s", self.lr_method)
02372 
02373         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
02374         # This determines the number of states
02375 
02376         C = self.lr0_items()
02377 
02378         if self.lr_method == 'LALR':
02379             self.add_lalr_lookaheads(C)
02380 
02381         # Build the parser table, state by state
02382         st = 0
02383         for I in C:
02384             # Loop over each production in I
02385             actlist = [ ]              # List of actions
02386             st_action  = { }
02387             st_actionp = { }
02388             st_goto    = { }
02389             log.info("")
02390             log.info("state %d", st)
02391             log.info("")
02392             for p in I:
02393                 log.info("    (%d) %s", p.number, str(p))
02394             log.info("")
02395 
02396             for p in I:
02397                     if p.len == p.lr_index + 1:
02398                         if p.name == "S'":
02399                             # Start symbol. Accept!
02400                             st_action["$end"] = 0
02401                             st_actionp["$end"] = p
02402                         else:
02403                             # We are at the end of a production.  Reduce!
02404                             if self.lr_method == 'LALR':
02405                                 laheads = p.lookaheads[st]
02406                             else:
02407                                 laheads = self.grammar.Follow[p.name]
02408                             for a in laheads:
02409                                 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
02410                                 r = st_action.get(a,None)
02411                                 if r is not None:
02412                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
02413                                     if r > 0:
02414                                         # Need to decide on shift or reduce here
02415                                         # By default we favor shifting. Need to add
02416                                         # some precedence rules here.
02417                                         sprec,slevel = Productions[st_actionp[a].number].prec
02418                                         rprec,rlevel = Precedence.get(a,('right',0))
02419                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
02420                                             # We really need to reduce here.
02421                                             st_action[a] = -p.number
02422                                             st_actionp[a] = p
02423                                             if not slevel and not rlevel:
02424                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
02425                                                 self.sr_conflicts.append((st,a,'reduce'))
02426                                             Productions[p.number].reduced += 1
02427                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
02428                                             st_action[a] = None
02429                                         else:
02430                                             # Hmmm. Guess we'll keep the shift
02431                                             if not rlevel:
02432                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
02433                                                 self.sr_conflicts.append((st,a,'shift'))
02434                                     elif r < 0:
02435                                         # Reduce/reduce conflict.   In this case, we favor the rule
02436                                         # that was defined first in the grammar file
02437                                         oldp = Productions[-r]
02438                                         pp = Productions[p.number]
02439                                         if oldp.line > pp.line:
02440                                             st_action[a] = -p.number
02441                                             st_actionp[a] = p
02442                                             chosenp,rejectp = pp,oldp
02443                                             Productions[p.number].reduced += 1
02444                                             Productions[oldp.number].reduced -= 1
02445                                         else:
02446                                             chosenp,rejectp = oldp,pp
02447                                         self.rr_conflicts.append((st,chosenp,rejectp))
02448                                         log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
02449                                     else:
02450                                         raise LALRError("Unknown conflict in state %d" % st)
02451                                 else:
02452                                     st_action[a] = -p.number
02453                                     st_actionp[a] = p
02454                                     Productions[p.number].reduced += 1
02455                     else:
02456                         i = p.lr_index
02457                         a = p.prod[i+1]       # Get symbol right after the "."
02458                         if a in self.grammar.Terminals:
02459                             g = self.lr0_goto(I,a)
02460                             j = self.lr0_cidhash.get(id(g),-1)
02461                             if j >= 0:
02462                                 # We are in a shift state
02463                                 actlist.append((a,p,"shift and go to state %d" % j))
02464                                 r = st_action.get(a,None)
02465                                 if r is not None:
02466                                     # Whoa have a shift/reduce or shift/shift conflict
02467                                     if r > 0:
02468                                         if r != j:
02469                                             raise LALRError("Shift/shift conflict in state %d" % st)
02470                                     elif r < 0:
02471                                         # Do a precedence check.
02472                                         #   -  if precedence of reduce rule is higher, we reduce.
02473                                         #   -  if precedence of reduce is same and left assoc, we reduce.
02474                                         #   -  otherwise we shift
02475                                         rprec,rlevel = Productions[st_actionp[a].number].prec
02476                                         sprec,slevel = Precedence.get(a,('right',0))
02477                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
02478                                             # We decide to shift here... highest precedence to shift
02479                                             Productions[st_actionp[a].number].reduced -= 1
02480                                             st_action[a] = j
02481                                             st_actionp[a] = p
02482                                             if not rlevel:
02483                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
02484                                                 self.sr_conflicts.append((st,a,'shift'))
02485                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
02486                                             st_action[a] = None
02487                                         else:
02488                                             # Hmmm. Guess we'll keep the reduce
02489                                             if not slevel and not rlevel:
02490                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
02491                                                 self.sr_conflicts.append((st,a,'reduce'))
02492 
02493                                     else:
02494                                         raise LALRError("Unknown conflict in state %d" % st)
02495                                 else:
02496                                     st_action[a] = j
02497                                     st_actionp[a] = p
02498 
02499             # Print the actions associated with each terminal
02500             _actprint = { }
02501             for a,p,m in actlist:
02502                 if a in st_action:
02503                     if p is st_actionp[a]:
02504                         log.info("    %-15s %s",a,m)
02505                         _actprint[(a,m)] = 1
02506             log.info("")
02507             # Print the actions that were not used. (debugging)
02508             not_used = 0
02509             for a,p,m in actlist:
02510                 if a in st_action:
02511                     if p is not st_actionp[a]:
02512                         if not (a,m) in _actprint:
02513                             log.debug("  ! %-15s [ %s ]",a,m)
02514                             not_used = 1
02515                             _actprint[(a,m)] = 1
02516             if not_used:
02517                 log.debug("")
02518 
02519             # Construct the goto table for this state
02520 
02521             nkeys = { }
02522             for ii in I:
02523                 for s in ii.usyms:
02524                     if s in self.grammar.Nonterminals:
02525                         nkeys[s] = None
02526             for n in nkeys:
02527                 g = self.lr0_goto(I,n)
02528                 j = self.lr0_cidhash.get(id(g),-1)
02529                 if j >= 0:
02530                     st_goto[n] = j
02531                     log.info("    %-30s shift and go to state %d",n,j)
02532 
02533             action[st] = st_action
02534             actionp[st] = st_actionp
02535             goto[st] = st_goto
02536             st += 1
02537 
02538 
02539     # -----------------------------------------------------------------------------
02540     # write()
02541     #
02542     # This function writes the LR parsing tables to a file
02543     # -----------------------------------------------------------------------------
02544 
02545     def write_table(self,modulename,outputdir='',signature=""):
02546         basemodulename = modulename.split(".")[-1]
02547         filename = os.path.join(outputdir,basemodulename) + ".py"
02548         try:
02549             f = open(filename,"w")
02550 
02551             f.write("""
02552 # %s
02553 # This file is automatically generated. Do not edit.
02554 _tabversion = %r
02555 
02556 _lr_method = %r
02557 
02558 _lr_signature = %r
02559     """ % (filename, __tabversion__, self.lr_method, signature))
02560 
02561             # Change smaller to 0 to go back to original tables
02562             smaller = 1
02563 
02564             # Factor out names to try and make smaller
02565             if smaller:
02566                 items = { }
02567 
02568                 for s,nd in self.lr_action.items():
02569                    for name,v in nd.items():
02570                       i = items.get(name)
02571                       if not i:
02572                          i = ([],[])
02573                          items[name] = i
02574                       i[0].append(s)
02575                       i[1].append(v)
02576 
02577                 f.write("\n_lr_action_items = {")
02578                 for k,v in items.items():
02579                     f.write("%r:([" % k)
02580                     for i in v[0]:
02581                         f.write("%r," % i)
02582                     f.write("],[")
02583                     for i in v[1]:
02584                         f.write("%r," % i)
02585 
02586                     f.write("]),")
02587                 f.write("}\n")
02588 
02589                 f.write("""
02590 _lr_action = { }
02591 for _k, _v in _lr_action_items.items():
02592    for _x,_y in zip(_v[0],_v[1]):
02593       if not _x in _lr_action:  _lr_action[_x] = { }
02594       _lr_action[_x][_k] = _y
02595 del _lr_action_items
02596 """)
02597 
02598             else:
02599                 f.write("\n_lr_action = { ");
02600                 for k,v in self.lr_action.items():
02601                     f.write("(%r,%r):%r," % (k[0],k[1],v))
02602                 f.write("}\n");
02603 
02604             if smaller:
02605                 # Factor out names to try and make smaller
02606                 items = { }
02607 
02608                 for s,nd in self.lr_goto.items():
02609                    for name,v in nd.items():
02610                       i = items.get(name)
02611                       if not i:
02612                          i = ([],[])
02613                          items[name] = i
02614                       i[0].append(s)
02615                       i[1].append(v)
02616 
02617                 f.write("\n_lr_goto_items = {")
02618                 for k,v in items.items():
02619                     f.write("%r:([" % k)
02620                     for i in v[0]:
02621                         f.write("%r," % i)
02622                     f.write("],[")
02623                     for i in v[1]:
02624                         f.write("%r," % i)
02625 
02626                     f.write("]),")
02627                 f.write("}\n")
02628 
02629                 f.write("""
02630 _lr_goto = { }
02631 for _k, _v in _lr_goto_items.items():
02632    for _x,_y in zip(_v[0],_v[1]):
02633        if not _x in _lr_goto: _lr_goto[_x] = { }
02634        _lr_goto[_x][_k] = _y
02635 del _lr_goto_items
02636 """)
02637             else:
02638                 f.write("\n_lr_goto = { ");
02639                 for k,v in self.lr_goto.items():
02640                     f.write("(%r,%r):%r," % (k[0],k[1],v))
02641                 f.write("}\n");
02642 
02643             # Write production table
02644             f.write("_lr_productions = [\n")
02645             for p in self.lr_productions:
02646                 if p.func:
02647                     f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
02648                 else:
02649                     f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
02650             f.write("]\n")
02651             f.close()
02652 
02653         except IOError:
02654             e = sys.exc_info()[1]
02655             sys.stderr.write("Unable to create '%s'\n" % filename)
02656             sys.stderr.write(str(e)+"\n")
02657             return
02658 
02659 
02660     # -----------------------------------------------------------------------------
02661     # pickle_table()
02662     #
02663     # This function pickles the LR parsing tables to a supplied file object
02664     # -----------------------------------------------------------------------------
02665 
02666     def pickle_table(self,filename,signature=""):
02667         try:
02668             import cPickle as pickle
02669         except ImportError:
02670             import pickle
02671         outf = open(filename,"wb")
02672         pickle.dump(__tabversion__,outf,pickle_protocol)
02673         pickle.dump(self.lr_method,outf,pickle_protocol)
02674         pickle.dump(signature,outf,pickle_protocol)
02675         pickle.dump(self.lr_action,outf,pickle_protocol)
02676         pickle.dump(self.lr_goto,outf,pickle_protocol)
02677 
02678         outp = []
02679         for p in self.lr_productions:
02680             if p.func:
02681                 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
02682             else:
02683                 outp.append((str(p),p.name,p.len,None,None,None))
02684         pickle.dump(outp,outf,pickle_protocol)
02685         outf.close()
02686 
02687 # -----------------------------------------------------------------------------
02688 #                            === INTROSPECTION ===
02689 #
02690 # The following functions and classes are used to implement the PLY
02691 # introspection features followed by the yacc() function itself.
02692 # -----------------------------------------------------------------------------
02693 
02694 # -----------------------------------------------------------------------------
02695 # get_caller_module_dict()
02696 #
02697 # This function returns a dictionary containing all of the symbols defined within
02698 # a caller further down the call stack.  This is used to get the environment
02699 # associated with the yacc() call if none was provided.
02700 # -----------------------------------------------------------------------------
02701 
02702 def get_caller_module_dict(levels):
02703     try:
02704         raise RuntimeError
02705     except RuntimeError:
02706         e,b,t = sys.exc_info()
02707         f = t.tb_frame
02708         while levels > 0:
02709             f = f.f_back                   
02710             levels -= 1
02711         ldict = f.f_globals.copy()
02712         if f.f_globals != f.f_locals:
02713             ldict.update(f.f_locals)
02714 
02715         return ldict
02716 
02717 # -----------------------------------------------------------------------------
02718 # parse_grammar()
02719 #
02720 # This takes a raw grammar rule string and parses it into production data
02721 # -----------------------------------------------------------------------------
02722 def parse_grammar(doc,file,line):
02723     grammar = []
02724     # Split the doc string into lines
02725     pstrings = doc.splitlines()
02726     lastp = None
02727     dline = line
02728     for ps in pstrings:
02729         dline += 1
02730         p = ps.split()
02731         if not p: continue
02732         try:
02733             if p[0] == '|':
02734                 # This is a continuation of a previous rule
02735                 if not lastp:
02736                     raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
02737                 prodname = lastp
02738                 syms = p[1:]
02739             else:
02740                 prodname = p[0]
02741                 lastp = prodname
02742                 syms   = p[2:]
02743                 assign = p[1]
02744                 if assign != ':' and assign != '::=':
02745                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
02746 
02747             grammar.append((file,dline,prodname,syms))
02748         except SyntaxError:
02749             raise
02750         except Exception:
02751             raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
02752 
02753     return grammar
02754 
02755 # -----------------------------------------------------------------------------
02756 # ParserReflect()
02757 #
02758 # This class represents information extracted for building a parser including
02759 # start symbol, error function, tokens, precedence list, action functions,
02760 # etc.
02761 # -----------------------------------------------------------------------------
02762 class ParserReflect(object):
02763     def __init__(self,pdict,log=None):
02764         self.pdict      = pdict
02765         self.start      = None
02766         self.error_func = None
02767         self.tokens     = None
02768         self.files      = {}
02769         self.grammar    = []
02770         self.error      = 0
02771 
02772         if log is None:
02773             self.log = PlyLogger(sys.stderr)
02774         else:
02775             self.log = log
02776 
02777     # Get all of the basic information
02778     def get_all(self):
02779         self.get_start()
02780         self.get_error_func()
02781         self.get_tokens()
02782         self.get_precedence()
02783         self.get_pfunctions()
02784         
02785     # Validate all of the information
02786     def validate_all(self):
02787         self.validate_start()
02788         self.validate_error_func()
02789         self.validate_tokens()
02790         self.validate_precedence()
02791         self.validate_pfunctions()
02792         self.validate_files()
02793         return self.error
02794 
02795     # Compute a signature over the grammar
02796     def signature(self):
02797         try:
02798             from hashlib import md5
02799         except ImportError:
02800             from md5 import md5
02801         try:
02802             sig = md5()
02803             if self.start:
02804                 sig.update(self.start.encode('latin-1'))
02805             if self.prec:
02806                 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
02807             if self.tokens:
02808                 sig.update(" ".join(self.tokens).encode('latin-1'))
02809             for f in self.pfuncs:
02810                 if f[3]:
02811                     sig.update(f[3].encode('latin-1'))
02812         except (TypeError,ValueError):
02813             pass
02814         return sig.digest()
02815 
02816     # -----------------------------------------------------------------------------
02817     # validate_file()
02818     #
02819     # This method checks to see if there are duplicated p_rulename() functions
02820     # in the parser module file.  Without this function, it is really easy for
02821     # users to make mistakes by cutting and pasting code fragments (and it's a real
02822     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
02823     # we just do a little regular expression pattern matching of def statements
02824     # to try and detect duplicates.
02825     # -----------------------------------------------------------------------------
02826 
02827     def validate_files(self):
02828         # Match def p_funcname(
02829         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
02830 
02831         for filename in self.files.keys():
02832             base,ext = os.path.splitext(filename)
02833             if ext != '.py': return 1          # No idea. Assume it's okay.
02834 
02835             try:
02836                 f = open(filename)
02837                 lines = f.readlines()
02838                 f.close()
02839             except IOError:
02840                 continue
02841 
02842             counthash = { }
02843             for linen,l in enumerate(lines):
02844                 linen += 1
02845                 m = fre.match(l)
02846                 if m:
02847                     name = m.group(1)
02848                     prev = counthash.get(name)
02849                     if not prev:
02850                         counthash[name] = linen
02851                     else:
02852                         self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
02853 
02854     # Get the start symbol
02855     def get_start(self):
02856         self.start = self.pdict.get('start')
02857 
02858     # Validate the start symbol
02859     def validate_start(self):
02860         if self.start is not None:
02861             if not isinstance(self.start,str):
02862                 self.log.error("'start' must be a string")
02863 
02864     # Look for error handler
02865     def get_error_func(self):
02866         self.error_func = self.pdict.get('p_error')
02867 
02868     # Validate the error function
02869     def validate_error_func(self):
02870         if self.error_func:
02871             if isinstance(self.error_func,types.FunctionType):
02872                 ismethod = 0
02873             elif isinstance(self.error_func, types.MethodType):
02874                 ismethod = 1
02875             else:
02876                 self.log.error("'p_error' defined, but is not a function or method")
02877                 self.error = 1
02878                 return
02879 
02880             eline = func_code(self.error_func).co_firstlineno
02881             efile = func_code(self.error_func).co_filename
02882             self.files[efile] = 1
02883 
02884             if (func_code(self.error_func).co_argcount != 1+ismethod):
02885                 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
02886                 self.error = 1
02887 
02888     # Get the tokens map
02889     def get_tokens(self):
02890         tokens = self.pdict.get("tokens",None)
02891         if not tokens:
02892             self.log.error("No token list is defined")
02893             self.error = 1
02894             return
02895 
02896         if not isinstance(tokens,(list, tuple)):
02897             self.log.error("tokens must be a list or tuple")
02898             self.error = 1
02899             return
02900         
02901         if not tokens:
02902             self.log.error("tokens is empty")
02903             self.error = 1
02904             return
02905 
02906         self.tokens = tokens
02907 
02908     # Validate the tokens
02909     def validate_tokens(self):
02910         # Validate the tokens.
02911         if 'error' in self.tokens:
02912             self.log.error("Illegal token name 'error'. Is a reserved word")
02913             self.error = 1
02914             return
02915 
02916         terminals = {}
02917         for n in self.tokens:
02918             if n in terminals:
02919                 self.log.warning("Token '%s' multiply defined", n)
02920             terminals[n] = 1
02921 
02922     # Get the precedence map (if any)
02923     def get_precedence(self):
02924         self.prec = self.pdict.get("precedence",None)
02925 
02926     # Validate and parse the precedence map
02927     def validate_precedence(self):
02928         preclist = []
02929         if self.prec:
02930             if not isinstance(self.prec,(list,tuple)):
02931                 self.log.error("precedence must be a list or tuple")
02932                 self.error = 1
02933                 return
02934             for level,p in enumerate(self.prec):
02935                 if not isinstance(p,(list,tuple)):
02936                     self.log.error("Bad precedence table")
02937                     self.error = 1
02938                     return
02939 
02940                 if len(p) < 2:
02941                     self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
02942                     self.error = 1
02943                     return
02944                 assoc = p[0]
02945                 if not isinstance(assoc,str):
02946                     self.log.error("precedence associativity must be a string")
02947                     self.error = 1
02948                     return
02949                 for term in p[1:]:
02950                     if not isinstance(term,str):
02951                         self.log.error("precedence items must be strings")
02952                         self.error = 1
02953                         return
02954                     preclist.append((term,assoc,level+1))
02955         self.preclist = preclist
02956 
02957     # Get all p_functions from the grammar
02958     def get_pfunctions(self):
02959         p_functions = []
02960         for name, item in self.pdict.items():
02961             if name[:2] != 'p_': continue
02962             if name == 'p_error': continue
02963             if isinstance(item,(types.FunctionType,types.MethodType)):
02964                 line = func_code(item).co_firstlineno
02965                 file = func_code(item).co_filename
02966                 p_functions.append((line,file,name,item.__doc__))
02967 
02968         # Sort all of the actions by line number
02969         p_functions.sort()
02970         self.pfuncs = p_functions
02971 
02972 
02973     # Validate all of the p_functions
02974     def validate_pfunctions(self):
02975         grammar = []
02976         # Check for non-empty symbols
02977         if len(self.pfuncs) == 0:
02978             self.log.error("no rules of the form p_rulename are defined")
02979             self.error = 1
02980             return 
02981         
02982         for line, file, name, doc in self.pfuncs:
02983             func = self.pdict[name]
02984             if isinstance(func, types.MethodType):
02985                 reqargs = 2
02986             else:
02987                 reqargs = 1
02988             if func_code(func).co_argcount > reqargs:
02989                 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
02990                 self.error = 1
02991             elif func_code(func).co_argcount < reqargs:
02992                 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
02993                 self.error = 1
02994             elif not func.__doc__:
02995                 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
02996             else:
02997                 try:
02998                     parsed_g = parse_grammar(doc,file,line)
02999                     for g in parsed_g:
03000                         grammar.append((name, g))
03001                 except SyntaxError:
03002                     e = sys.exc_info()[1]
03003                     self.log.error(str(e))
03004                     self.error = 1
03005 
03006                 # Looks like a valid grammar rule
03007                 # Mark the file in which defined.
03008                 self.files[file] = 1
03009 
03010         # Secondary validation step that looks for p_ definitions that are not functions
03011         # or functions that look like they might be grammar rules.
03012 
03013         for n,v in self.pdict.items():
03014             if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
03015             if n[0:2] == 't_': continue
03016             if n[0:2] == 'p_' and n != 'p_error':
03017                 self.log.warning("'%s' not defined as a function", n)
03018             if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
03019                 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
03020                 try:
03021                     doc = v.__doc__.split(" ")
03022                     if doc[1] == ':':
03023                         self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
03024                                          func_code(v).co_filename, func_code(v).co_firstlineno,n)
03025                 except Exception:
03026                     pass
03027 
03028         self.grammar = grammar
03029 
03030 # -----------------------------------------------------------------------------
03031 # yacc(module)
03032 #
03033 # Build a parser
03034 # -----------------------------------------------------------------------------
03035 
03036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 
03037          check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
03038          debuglog=None, errorlog = None, picklefile=None):
03039 
03040     global parse                 # Reference to the parsing method of the last built parser
03041 
03042     # If pickling is enabled, table files are not created
03043 
03044     if picklefile:
03045         write_tables = 0
03046 
03047     if errorlog is None:
03048         errorlog = PlyLogger(sys.stderr)
03049 
03050     # Get the module dictionary used for the parser
03051     if module:
03052         _items = [(k,getattr(module,k)) for k in dir(module)]
03053         pdict = dict(_items)
03054     else:
03055         pdict = get_caller_module_dict(2)
03056 
03057     # Collect parser information from the dictionary
03058     pinfo = ParserReflect(pdict,log=errorlog)
03059     pinfo.get_all()
03060 
03061     if pinfo.error:
03062         raise YaccError("Unable to build parser")
03063 
03064     # Check signature against table files (if any)
03065     signature = pinfo.signature()
03066 
03067     # Read the tables
03068     try:
03069         lr = LRTable()
03070         if picklefile:
03071             read_signature = lr.read_pickle(picklefile)
03072         else:
03073             read_signature = lr.read_table(tabmodule)
03074         if optimize or (read_signature == signature):
03075             try:
03076                 lr.bind_callables(pinfo.pdict)
03077                 parser = LRParser(lr,pinfo.error_func)
03078                 parse = parser.parse
03079                 return parser
03080             except Exception:
03081                 e = sys.exc_info()[1]
03082                 errorlog.warning("There was a problem loading the table file: %s", repr(e))
03083     except VersionError:
03084         e = sys.exc_info()
03085         errorlog.warning(str(e))
03086     except Exception:
03087         pass
03088 
03089     if debuglog is None:
03090         if debug:
03091             debuglog = PlyLogger(open(debugfile,"w"))
03092         else:
03093             debuglog = NullLogger()
03094 
03095     debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
03096 
03097 
03098     errors = 0
03099 
03100     # Validate the parser information
03101     if pinfo.validate_all():
03102         raise YaccError("Unable to build parser")
03103     
03104     if not pinfo.error_func:
03105         errorlog.warning("no p_error() function is defined")
03106 
03107     # Create a grammar object
03108     grammar = Grammar(pinfo.tokens)
03109 
03110     # Set precedence level for terminals
03111     for term, assoc, level in pinfo.preclist:
03112         try:
03113             grammar.set_precedence(term,assoc,level)
03114         except GrammarError:
03115             e = sys.exc_info()[1]
03116             errorlog.warning("%s",str(e))
03117 
03118     # Add productions to the grammar
03119     for funcname, gram in pinfo.grammar:
03120         file, line, prodname, syms = gram
03121         try:
03122             grammar.add_production(prodname,syms,funcname,file,line)
03123         except GrammarError:
03124             e = sys.exc_info()[1]
03125             errorlog.error("%s",str(e))
03126             errors = 1
03127 
03128     # Set the grammar start symbols
03129     try:
03130         if start is None:
03131             grammar.set_start(pinfo.start)
03132         else:
03133             grammar.set_start(start)
03134     except GrammarError:
03135         e = sys.exc_info()[1]
03136         errorlog.error(str(e))
03137         errors = 1
03138 
03139     if errors:
03140         raise YaccError("Unable to build parser")
03141 
03142     # Verify the grammar structure
03143     undefined_symbols = grammar.undefined_symbols()
03144     for sym, prod in undefined_symbols:
03145         errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
03146         errors = 1
03147 
03148     unused_terminals = grammar.unused_terminals()
03149     if unused_terminals:
03150         debuglog.info("")
03151         debuglog.info("Unused terminals:")
03152         debuglog.info("")
03153         for term in unused_terminals:
03154             errorlog.warning("Token '%s' defined, but not used", term)
03155             debuglog.info("    %s", term)
03156 
03157     # Print out all productions to the debug log
03158     if debug:
03159         debuglog.info("")
03160         debuglog.info("Grammar")
03161         debuglog.info("")
03162         for n,p in enumerate(grammar.Productions):
03163             debuglog.info("Rule %-5d %s", n, p)
03164 
03165     # Find unused non-terminals
03166     unused_rules = grammar.unused_rules()
03167     for prod in unused_rules:
03168         errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
03169 
03170     if len(unused_terminals) == 1:
03171         errorlog.warning("There is 1 unused token")
03172     if len(unused_terminals) > 1:
03173         errorlog.warning("There are %d unused tokens", len(unused_terminals))
03174 
03175     if len(unused_rules) == 1:
03176         errorlog.warning("There is 1 unused rule")
03177     if len(unused_rules) > 1:
03178         errorlog.warning("There are %d unused rules", len(unused_rules))
03179 
03180     if debug:
03181         debuglog.info("")
03182         debuglog.info("Terminals, with rules where they appear")
03183         debuglog.info("")
03184         terms = list(grammar.Terminals)
03185         terms.sort()
03186         for term in terms:
03187             debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
03188         
03189         debuglog.info("")
03190         debuglog.info("Nonterminals, with rules where they appear")
03191         debuglog.info("")
03192         nonterms = list(grammar.Nonterminals)
03193         nonterms.sort()
03194         for nonterm in nonterms:
03195             debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
03196         debuglog.info("")
03197 
03198     if check_recursion:
03199         unreachable = grammar.find_unreachable()
03200         for u in unreachable:
03201             errorlog.warning("Symbol '%s' is unreachable",u)
03202 
03203         infinite = grammar.infinite_cycles()
03204         for inf in infinite:
03205             errorlog.error("Infinite recursion detected for symbol '%s'", inf)
03206             errors = 1
03207         
03208     unused_prec = grammar.unused_precedence()
03209     for term, assoc in unused_prec:
03210         errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
03211         errors = 1
03212 
03213     if errors:
03214         raise YaccError("Unable to build parser")
03215     
03216     # Run the LRGeneratedTable on the grammar
03217     if debug:
03218         errorlog.debug("Generating %s tables", method)
03219             
03220     lr = LRGeneratedTable(grammar,method,debuglog)
03221 
03222     if debug:
03223         num_sr = len(lr.sr_conflicts)
03224 
03225         # Report shift/reduce and reduce/reduce conflicts
03226         if num_sr == 1:
03227             errorlog.warning("1 shift/reduce conflict")
03228         elif num_sr > 1:
03229             errorlog.warning("%d shift/reduce conflicts", num_sr)
03230 
03231         num_rr = len(lr.rr_conflicts)
03232         if num_rr == 1:
03233             errorlog.warning("1 reduce/reduce conflict")
03234         elif num_rr > 1:
03235             errorlog.warning("%d reduce/reduce conflicts", num_rr)
03236 
03237     # Write out conflicts to the output file
03238     if debug and (lr.sr_conflicts or lr.rr_conflicts):
03239         debuglog.warning("")
03240         debuglog.warning("Conflicts:")
03241         debuglog.warning("")
03242 
03243         for state, tok, resolution in lr.sr_conflicts:
03244             debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
03245         
03246         already_reported = {}
03247         for state, rule, rejected in lr.rr_conflicts:
03248             if (state,id(rule),id(rejected)) in already_reported:
03249                 continue
03250             debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
03251             debuglog.warning("rejected rule (%s) in state %d", rejected,state)
03252             errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
03253             errorlog.warning("rejected rule (%s) in state %d", rejected, state)
03254             already_reported[state,id(rule),id(rejected)] = 1
03255         
03256         warned_never = []
03257         for state, rule, rejected in lr.rr_conflicts:
03258             if not rejected.reduced and (rejected not in warned_never):
03259                 debuglog.warning("Rule (%s) is never reduced", rejected)
03260                 errorlog.warning("Rule (%s) is never reduced", rejected)
03261                 warned_never.append(rejected)
03262 
03263     # Write the table file if requested
03264     if write_tables:
03265         lr.write_table(tabmodule,outputdir,signature)
03266 
03267     # Write a pickled version of the tables
03268     if picklefile:
03269         lr.pickle_table(picklefile,signature)
03270 
03271     # Build the parser
03272     lr.bind_callables(pinfo.pdict)
03273     parser = LRParser(lr,pinfo.error_func)
03274 
03275     parse = parser.parse
03276     return parser