Back to index

enigmail  1.4.3
Classes | Functions | Variables
ply.lex Namespace Reference

Classes

class  LexError
class  LexToken
class  PlyLogger
class  NullLogger
class  Lexer
class  LexerReflect

Functions

def func_code
def get_caller_module_dict
def _funcs_to_names
def _names_to_funcs
def _form_master_re
def _statetoken
def lex
def runmain
def TOKEN

Variables

string __version__ = "3.3"
string __tabversion__ = "3.2"
tuple StringTypes = (types.StringType, types.UnicodeType)
tuple _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
 Token = TOKEN

Function Documentation

def ply.lex._form_master_re (   relist,
  reflags,
  ldict,
  toknames 
) [private]

Definition at line 482 of file lex.py.

00482 
00483 def _form_master_re(relist,reflags,ldict,toknames):
00484     if not relist: return []
00485     regex = "|".join(relist)
00486     try:
00487         lexre = re.compile(regex,re.VERBOSE | reflags)
00488 
00489         # Build the index to function map for the matching engine
00490         lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
00491         lexindexnames = lexindexfunc[:]
00492 
00493         for f,i in lexre.groupindex.items():
00494             handle = ldict.get(f,None)
00495             if type(handle) in (types.FunctionType, types.MethodType):
00496                 lexindexfunc[i] = (handle,toknames[f])
00497                 lexindexnames[i] = f
00498             elif handle is not None:
00499                 lexindexnames[i] = f
00500                 if f.find("ignore_") > 0:
00501                     lexindexfunc[i] = (None,None)
00502                 else:
00503                     lexindexfunc[i] = (None, toknames[f])
00504         
00505         return [(lexre,lexindexfunc)],[regex],[lexindexnames]
00506     except Exception:
00507         m = int(len(relist)/2)
00508         if m == 0: m = 1
00509         llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
00510         rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
00511         return llist+rlist, lre+rre, lnames+rnames
00512 
00513 # -----------------------------------------------------------------------------
00514 # def _statetoken(s,names)
00515 #
00516 # Given a declaration name s of the form "t_" and a dictionary whose keys are
00517 # state names, this function returns a tuple (states,tokenname) where states
00518 # is a tuple of state names and tokenname is the name of the token.  For example,
00519 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
00520 # -----------------------------------------------------------------------------

Here is the caller graph for this function:

def ply.lex._funcs_to_names (   funclist,
  namelist 
) [private]

Definition at line 449 of file lex.py.

00449 
00450 def _funcs_to_names(funclist,namelist):
00451     result = []
00452     for f,name in zip(funclist,namelist):
00453          if f and f[0]:
00454              result.append((name, f[1]))
00455          else:
00456              result.append(f)
00457     return result
00458 
00459 # -----------------------------------------------------------------------------
00460 # _names_to_funcs()
00461 #
00462 # Given a list of regular expression function names, this converts it back to
00463 # functions.
00464 # -----------------------------------------------------------------------------

Here is the caller graph for this function:

def ply.lex._names_to_funcs (   namelist,
  fdict 
) [private]

Definition at line 465 of file lex.py.

00465 
00466 def _names_to_funcs(namelist,fdict):
00467      result = []
00468      for n in namelist:
00469           if n and n[0]:
00470               result.append((fdict[n[0]],n[1]))
00471           else:
00472               result.append(n)
00473      return result
00474 
00475 # -----------------------------------------------------------------------------
00476 # _form_master_re()
00477 #
00478 # This function takes a list of all of the regex components and attempts to
00479 # form the master regular expression.  Given limitations in the Python re
00480 # module, it may be necessary to break the master regex into separate expressions.
00481 # -----------------------------------------------------------------------------

Here is the caller graph for this function:

def ply.lex._statetoken (   s,
  names 
) [private]

Definition at line 521 of file lex.py.

00521 
00522 def _statetoken(s,names):
00523     nonstate = 1
00524     parts = s.split("_")
00525     for i in range(1,len(parts)):
00526          if not parts[i] in names and parts[i] != 'ANY': break
00527     if i > 1:
00528        states = tuple(parts[1:i])
00529     else:
00530        states = ('INITIAL',)
00531 
00532     if 'ANY' in states:
00533        states = tuple(names)
00534 
00535     tokenname = "_".join(parts[i:])
00536     return (states,tokenname)
00537 
00538 
00539 # -----------------------------------------------------------------------------
00540 # LexerReflect()
00541 #
00542 # This class represents information needed to build a lexer as extracted from a
00543 # user's input file.
# -----------------------------------------------------------------------------
def ply.lex.func_code (   f)

Definition at line 51 of file lex.py.

00051 
00052     def func_code(f):
00053         return f.func_code
else:

Here is the caller graph for this function:

Definition at line 427 of file lex.py.

00427 
00428 def get_caller_module_dict(levels):
00429     try:
00430         raise RuntimeError
00431     except RuntimeError:
00432         e,b,t = sys.exc_info()
00433         f = t.tb_frame
00434         while levels > 0:
00435             f = f.f_back                   
00436             levels -= 1
00437         ldict = f.f_globals.copy()
00438         if f.f_globals != f.f_locals:
00439             ldict.update(f.f_locals)
00440 
00441         return ldict
00442 
00443 # -----------------------------------------------------------------------------
00444 # _funcs_to_names()
00445 #
00446 # Given a list of regular expression functions, this converts it to a list
00447 # suitable for output to a table file
00448 # -----------------------------------------------------------------------------

Here is the caller graph for this function:

def ply.lex.lex (   module = None,
  object = None,
  debug = 0,
  optimize = 0,
  lextab = "lextab",
  reflags = 0,
  nowarn = 0,
  outputdir = "",
  debuglog = None,
  errorlog = None 
)

Definition at line 865 of file lex.py.

00865 
00866 def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
00867     global lexer
00868     ldict = None
00869     stateinfo  = { 'INITIAL' : 'inclusive'}
00870     lexobj = Lexer()
00871     lexobj.lexoptimize = optimize
00872     global token,input
00873 
00874     if errorlog is None:
00875         errorlog = PlyLogger(sys.stderr)
00876 
00877     if debug:
00878         if debuglog is None:
00879             debuglog = PlyLogger(sys.stderr)
00880 
00881     # Get the module dictionary used for the lexer
00882     if object: module = object
00883 
00884     if module:
00885         _items = [(k,getattr(module,k)) for k in dir(module)]
00886         ldict = dict(_items)
00887     else:
00888         ldict = get_caller_module_dict(2)
00889 
00890     # Collect parser information from the dictionary
00891     linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
00892     linfo.get_all()
00893     if not optimize:
00894         if linfo.validate_all():
00895             raise SyntaxError("Can't build lexer")
00896 
00897     if optimize and lextab:
00898         try:
00899             lexobj.readtab(lextab,ldict)
00900             token = lexobj.token
00901             input = lexobj.input
00902             lexer = lexobj
00903             return lexobj
00904 
00905         except ImportError:
00906             pass
00907 
00908     # Dump some basic debugging information
00909     if debug:
00910         debuglog.info("lex: tokens   = %r", linfo.tokens)
00911         debuglog.info("lex: literals = %r", linfo.literals)
00912         debuglog.info("lex: states   = %r", linfo.stateinfo)
00913 
00914     # Build a dictionary of valid token names
00915     lexobj.lextokens = { }
00916     for n in linfo.tokens:
00917         lexobj.lextokens[n] = 1
00918 
00919     # Get literals specification
00920     if isinstance(linfo.literals,(list,tuple)):
00921         lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
00922     else:
00923         lexobj.lexliterals = linfo.literals
00924 
00925     # Get the stateinfo dictionary
00926     stateinfo = linfo.stateinfo
00927 
00928     regexs = { }
00929     # Build the master regular expressions
00930     for state in stateinfo:
00931         regex_list = []
00932 
00933         # Add rules defined by functions first
00934         for fname, f in linfo.funcsym[state]:
00935             line = func_code(f).co_firstlineno
00936             file = func_code(f).co_filename
00937             regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
00938             if debug:
00939                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
00940 
00941         # Now add all of the simple rules
00942         for name,r in linfo.strsym[state]:
00943             regex_list.append("(?P<%s>%s)" % (name,r))
00944             if debug:
00945                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
00946 
00947         regexs[state] = regex_list
00948 
00949     # Build the master regular expressions
00950 
00951     if debug:
00952         debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
00953 
00954     for state in regexs:
00955         lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
00956         lexobj.lexstatere[state] = lexre
00957         lexobj.lexstateretext[state] = re_text
00958         lexobj.lexstaterenames[state] = re_names
00959         if debug:
00960             for i in range(len(re_text)):
00961                 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
00962 
00963     # For inclusive states, we need to add the regular expressions from the INITIAL state
00964     for state,stype in stateinfo.items():
00965         if state != "INITIAL" and stype == 'inclusive':
00966              lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
00967              lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
00968              lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
00969 
00970     lexobj.lexstateinfo = stateinfo
00971     lexobj.lexre = lexobj.lexstatere["INITIAL"]
00972     lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
00973     lexobj.lexreflags = reflags
00974 
00975     # Set up ignore variables
00976     lexobj.lexstateignore = linfo.ignore
00977     lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
00978 
00979     # Set up error functions
00980     lexobj.lexstateerrorf = linfo.errorf
00981     lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
00982     if not lexobj.lexerrorf:
00983         errorlog.warning("No t_error rule is defined")
00984 
00985     # Check state information for ignore and error rules
00986     for s,stype in stateinfo.items():
00987         if stype == 'exclusive':
00988               if not s in linfo.errorf:
00989                    errorlog.warning("No error rule is defined for exclusive state '%s'", s)
00990               if not s in linfo.ignore and lexobj.lexignore:
00991                    errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
00992         elif stype == 'inclusive':
00993               if not s in linfo.errorf:
00994                    linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
00995               if not s in linfo.ignore:
00996                    linfo.ignore[s] = linfo.ignore.get("INITIAL","")
00997 
00998     # Create global versions of the token() and input() functions
00999     token = lexobj.token
01000     input = lexobj.input
01001     lexer = lexobj
01002 
01003     # If in optimize mode, we write the lextab
01004     if lextab and optimize:
01005         lexobj.writetab(lextab,outputdir)
01006 
01007     return lexobj
01008 
01009 # -----------------------------------------------------------------------------
01010 # runmain()
01011 #
01012 # This runs the lexer as a main program
01013 # -----------------------------------------------------------------------------

Here is the call graph for this function:

def ply.lex.runmain (   lexer = None,
  data = None 
)

Definition at line 1014 of file lex.py.

01014 
01015 def runmain(lexer=None,data=None):
01016     if not data:
01017         try:
01018             filename = sys.argv[1]
01019             f = open(filename)
01020             data = f.read()
01021             f.close()
01022         except IndexError:
01023             sys.stdout.write("Reading from standard input (type EOF to end):\n")
01024             data = sys.stdin.read()
01025 
01026     if lexer:
01027         _input = lexer.input
01028     else:
01029         _input = input
01030     _input(data)
01031     if lexer:
01032         _token = lexer.token
01033     else:
01034         _token = token
01035 
01036     while 1:
01037         tok = _token()
01038         if not tok: break
01039         sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
01040 
01041 # -----------------------------------------------------------------------------
01042 # @TOKEN(regex)
01043 #
01044 # This decorator function can be used to set the regex expression on a function
01045 # when its docstring might need to be set in an alternative way
01046 # -----------------------------------------------------------------------------

def ply.lex.TOKEN (   r)

Definition at line 1047 of file lex.py.

01047 
01048 def TOKEN(r):
01049     def set_doc(f):
01050         if hasattr(r,"__call__"):
01051             f.__doc__ = r.__doc__
01052         else:
01053             f.__doc__ = r
01054         return f
01055     return set_doc
01056 
# Alternative spelling of the TOKEN decorator

Variable Documentation

string ply.lex.__tabversion__ = "3.2"

Definition at line 35 of file lex.py.

string ply.lex.__version__ = "3.3"

Definition at line 34 of file lex.py.

tuple ply.lex._is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

Definition at line 58 of file lex.py.

tuple ply.lex.StringTypes = (types.StringType, types.UnicodeType)

Definition at line 42 of file lex.py.

Definition at line 1057 of file lex.py.