Back to index

python3.2  3.2.2
Public Member Functions | Public Attributes
lib2to3.pgen2.pgen.ParserGenerator Class Reference
Inheritance diagram for lib2to3.pgen2.pgen.ParserGenerator:
Inheritance graph
[legend]
Collaboration diagram for lib2to3.pgen2.pgen.ParserGenerator:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def __init__
def make_grammar
def make_first
def make_label
def addfirstsets
def calcfirst
def parse
def make_dfa
def dump_nfa
def dump_dfa
def simplify_dfa
def parse_rhs
def parse_alt
def parse_item
def parse_atom
def expect
def gettoken
def raise_error

Public Attributes

 filename
 stream
 generator
 startsymbol
 first
 type
 value
 line
_PyObject_HEAD_EXTRA Py_ssize_t ob_refcnt
struct _typeobjectob_type

Detailed Description

Definition at line 10 of file pgen.py.


Constructor & Destructor Documentation

def lib2to3.pgen2.pgen.ParserGenerator.__init__ (   self,
  filename,
  stream = None 
)

Definition at line 12 of file pgen.py.

00012 
00013     def __init__(self, filename, stream=None):
00014         close_stream = None
00015         if stream is None:
00016             stream = open(filename)
00017             close_stream = stream.close
00018         self.filename = filename
00019         self.stream = stream
00020         self.generator = tokenize.generate_tokens(stream.readline)
00021         self.gettoken() # Initialize lookahead
00022         self.dfas, self.startsymbol = self.parse()
00023         if close_stream is not None:
00024             close_stream()
00025         self.first = {} # map from symbol name to set of tokens
00026         self.addfirstsets()

Here is the caller graph for this function:


Member Function Documentation

Definition at line 107 of file pgen.py.

00107 
00108     def addfirstsets(self):
00109         names = list(self.dfas.keys())
00110         names.sort()
00111         for name in names:
00112             if name not in self.first:
00113                 self.calcfirst(name)
00114             #print name, self.first[name].keys()

Here is the call graph for this function:

Definition at line 115 of file pgen.py.

00115 
00116     def calcfirst(self, name):
00117         dfa = self.dfas[name]
00118         self.first[name] = None # dummy to detect left recursion
00119         state = dfa[0]
00120         totalset = {}
00121         overlapcheck = {}
00122         for label, next in state.arcs.items():
00123             if label in self.dfas:
00124                 if label in self.first:
00125                     fset = self.first[label]
00126                     if fset is None:
00127                         raise ValueError("recursion for rule %r" % name)
00128                 else:
00129                     self.calcfirst(label)
00130                     fset = self.first[label]
00131                 totalset.update(fset)
00132                 overlapcheck[label] = fset
00133             else:
00134                 totalset[label] = 1
00135                 overlapcheck[label] = {label: 1}
00136         inverse = {}
00137         for label, itsfirst in overlapcheck.items():
00138             for symbol in itsfirst:
00139                 if symbol in inverse:
00140                     raise ValueError("rule %s is ambiguous; %s is in the"
00141                                      " first sets of %s as well as %s" %
00142                                      (name, symbol, label, inverse[symbol]))
00143                 inverse[symbol] = label
00144         self.first[name] = totalset

Here is the call graph for this function:

Here is the caller graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.dump_dfa (   self,
  name,
  dfa 
)

Definition at line 221 of file pgen.py.

00221 
00222     def dump_dfa(self, name, dfa):
00223         print("Dump of DFA for", name)
00224         for i, state in enumerate(dfa):
00225             print("  State", i, state.isfinal and "(final)" or "")
00226             for label, next in state.arcs.items():
00227                 print("    %s -> %d" % (label, dfa.index(next)))

Here is the call graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.dump_nfa (   self,
  name,
  start,
  finish 
)

Definition at line 205 of file pgen.py.

00205 
00206     def dump_nfa(self, name, start, finish):
00207         print("Dump of NFA for", name)
00208         todo = [start]
00209         for i, state in enumerate(todo):
00210             print("  State", i, state is finish and "(final)" or "")
00211             for label, next in state.arcs:
00212                 if next in todo:
00213                     j = todo.index(next)
00214                 else:
00215                     j = len(todo)
00216                     todo.append(next)
00217                 if label is None:
00218                     print("    -> %d" % j)
00219                 else:
00220                     print("    %s -> %d" % (label, j))

Here is the call graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.expect (   self,
  type,
  value = None 
)

Definition at line 313 of file pgen.py.

00313 
00314     def expect(self, type, value=None):
00315         if self.type != type or (value is not None and self.value != value):
00316             self.raise_error("expected %s/%s, got %s/%s",
00317                              type, value, self.type, self.value)
00318         value = self.value
00319         self.gettoken()
00320         return value

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 321 of file pgen.py.

00321 
00322     def gettoken(self):
00323         tup = next(self.generator)
00324         while tup[0] in (tokenize.COMMENT, tokenize.NL):
00325             tup = next(self.generator)
00326         self.type, self.value, self.begin, self.end, self.line = tup
00327         #print token.tok_name[self.type], repr(self.value)

Here is the caller graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.make_dfa (   self,
  start,
  finish 
)

Definition at line 169 of file pgen.py.

00169 
00170     def make_dfa(self, start, finish):
00171         # To turn an NFA into a DFA, we define the states of the DFA
00172         # to correspond to *sets* of states of the NFA.  Then do some
00173         # state reduction.  Let's represent sets as dicts with 1 for
00174         # values.
00175         assert isinstance(start, NFAState)
00176         assert isinstance(finish, NFAState)
00177         def closure(state):
00178             base = {}
00179             addclosure(state, base)
00180             return base
00181         def addclosure(state, base):
00182             assert isinstance(state, NFAState)
00183             if state in base:
00184                 return
00185             base[state] = 1
00186             for label, next in state.arcs:
00187                 if label is None:
00188                     addclosure(next, base)
00189         states = [DFAState(closure(start), finish)]
00190         for state in states: # NB states grows while we're iterating
00191             arcs = {}
00192             for nfastate in state.nfaset:
00193                 for label, next in nfastate.arcs:
00194                     if label is not None:
00195                         addclosure(next, arcs.setdefault(label, {}))
00196             for label, nfaset in arcs.items():
00197                 for st in states:
00198                     if st.nfaset == nfaset:
00199                         break
00200                 else:
00201                     st = DFAState(nfaset, finish)
00202                     states.append(st)
00203                 state.addarc(st, label)
00204         return states # List of DFAState instances; first one is start

Here is the call graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.make_first (   self,
  c,
  name 
)

Definition at line 52 of file pgen.py.

00052 
00053     def make_first(self, c, name):
00054         rawfirst = self.first[name]
00055         first = {}
00056         for label in rawfirst:
00057             ilabel = self.make_label(c, label)
00058             ##assert ilabel not in first # XXX failed on <> ... !=
00059             first[ilabel] = 1
00060         return first

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 27 of file pgen.py.

00027 
00028     def make_grammar(self):
00029         c = PgenGrammar()
00030         names = list(self.dfas.keys())
00031         names.sort()
00032         names.remove(self.startsymbol)
00033         names.insert(0, self.startsymbol)
00034         for name in names:
00035             i = 256 + len(c.symbol2number)
00036             c.symbol2number[name] = i
00037             c.number2symbol[i] = name
00038         for name in names:
00039             dfa = self.dfas[name]
00040             states = []
00041             for state in dfa:
00042                 arcs = []
00043                 for label, next in state.arcs.items():
00044                     arcs.append((self.make_label(c, label), dfa.index(next)))
00045                 if state.isfinal:
00046                     arcs.append((0, dfa.index(state)))
00047                 states.append(arcs)
00048             c.states.append(states)
00049             c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
00050         c.start = c.symbol2number[self.startsymbol]
00051         return c

Here is the call graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.make_label (   self,
  c,
  label 
)

Definition at line 61 of file pgen.py.

00061 
00062     def make_label(self, c, label):
00063         # XXX Maybe this should be a method on a subclass of converter?
00064         ilabel = len(c.labels)
00065         if label[0].isalpha():
00066             # Either a symbol name or a named token
00067             if label in c.symbol2number:
00068                 # A symbol name (a non-terminal)
00069                 if label in c.symbol2label:
00070                     return c.symbol2label[label]
00071                 else:
00072                     c.labels.append((c.symbol2number[label], None))
00073                     c.symbol2label[label] = ilabel
00074                     return ilabel
00075             else:
00076                 # A named token (NAME, NUMBER, STRING)
00077                 itoken = getattr(token, label, None)
00078                 assert isinstance(itoken, int), label
00079                 assert itoken in token.tok_name, label
00080                 if itoken in c.tokens:
00081                     return c.tokens[itoken]
00082                 else:
00083                     c.labels.append((itoken, None))
00084                     c.tokens[itoken] = ilabel
00085                     return ilabel
00086         else:
00087             # Either a keyword or an operator
00088             assert label[0] in ('"', "'"), label
00089             value = eval(label)
00090             if value[0].isalpha():
00091                 # A keyword
00092                 if value in c.keywords:
00093                     return c.keywords[value]
00094                 else:
00095                     c.labels.append((token.NAME, value))
00096                     c.keywords[value] = ilabel
00097                     return ilabel
00098             else:
00099                 # An operator (any non-numeric token)
00100                 itoken = grammar.opmap[value] # Fails if unknown token
00101                 if itoken in c.tokens:
00102                     return c.tokens[itoken]
00103                 else:
00104                     c.labels.append((itoken, None))
00105                     c.tokens[itoken] = ilabel
00106                     return ilabel

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 145 of file pgen.py.

00145 
00146     def parse(self):
00147         dfas = {}
00148         startsymbol = None
00149         # MSTART: (NEWLINE | RULE)* ENDMARKER
00150         while self.type != token.ENDMARKER:
00151             while self.type == token.NEWLINE:
00152                 self.gettoken()
00153             # RULE: NAME ':' RHS NEWLINE
00154             name = self.expect(token.NAME)
00155             self.expect(token.OP, ":")
00156             a, z = self.parse_rhs()
00157             self.expect(token.NEWLINE)
00158             #self.dump_nfa(name, a, z)
00159             dfa = self.make_dfa(a, z)
00160             #self.dump_dfa(name, dfa)
00161             oldlen = len(dfa)
00162             self.simplify_dfa(dfa)
00163             newlen = len(dfa)
00164             dfas[name] = dfa
00165             #print name, oldlen, newlen
00166             if startsymbol is None:
00167                 startsymbol = name
00168         return dfas, startsymbol

Here is the caller graph for this function:

Definition at line 266 of file pgen.py.

00266 
00267     def parse_alt(self):
00268         # ALT: ITEM+
00269         a, b = self.parse_item()
00270         while (self.value in ("(", "[") or
00271                self.type in (token.NAME, token.STRING)):
00272             c, d = self.parse_item()
00273             b.addarc(c)
00274             b = d
00275         return a, b

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 296 of file pgen.py.

00296 
00297     def parse_atom(self):
00298         # ATOM: '(' RHS ')' | NAME | STRING
00299         if self.value == "(":
00300             self.gettoken()
00301             a, z = self.parse_rhs()
00302             self.expect(token.OP, ")")
00303             return a, z
00304         elif self.type in (token.NAME, token.STRING):
00305             a = NFAState()
00306             z = NFAState()
00307             a.addarc(z, self.value)
00308             self.gettoken()
00309             return a, z
00310         else:
00311             self.raise_error("expected (...) or NAME or STRING, got %s/%s",
00312                              self.type, self.value)

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 276 of file pgen.py.

00276 
00277     def parse_item(self):
00278         # ITEM: '[' RHS ']' | ATOM ['+' | '*']
00279         if self.value == "[":
00280             self.gettoken()
00281             a, z = self.parse_rhs()
00282             self.expect(token.OP, "]")
00283             a.addarc(z)
00284             return a, z
00285         else:
00286             a, z = self.parse_atom()
00287             value = self.value
00288             if value not in ("+", "*"):
00289                 return a, z
00290             self.gettoken()
00291             z.addarc(a)
00292             if value == "+":
00293                 return a, z
00294             else:
00295                 return a, a

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 249 of file pgen.py.

00249 
00250     def parse_rhs(self):
00251         # RHS: ALT ('|' ALT)*
00252         a, z = self.parse_alt()
00253         if self.value != "|":
00254             return a, z
00255         else:
00256             aa = NFAState()
00257             zz = NFAState()
00258             aa.addarc(a)
00259             z.addarc(zz)
00260             while self.value == "|":
00261                 self.gettoken()
00262                 a, z = self.parse_alt()
00263                 aa.addarc(a)
00264                 z.addarc(zz)
00265             return aa, zz

Here is the call graph for this function:

Here is the caller graph for this function:

def lib2to3.pgen2.pgen.ParserGenerator.raise_error (   self,
  msg,
  args 
)

Definition at line 328 of file pgen.py.

00328 
00329     def raise_error(self, msg, *args):
00330         if args:
00331             try:
00332                 msg = msg % args
00333             except:
00334                 msg = " ".join([msg] + list(map(str, args)))
00335         raise SyntaxError(msg, (self.filename, self.end[0],
00336                                 self.end[1], self.line))

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 228 of file pgen.py.

00228 
00229     def simplify_dfa(self, dfa):
00230         # This is not theoretically optimal, but works well enough.
00231         # Algorithm: repeatedly look for two states that have the same
00232         # set of arcs (same labels pointing to the same nodes) and
00233         # unify them, until things stop changing.
00234 
00235         # dfa is a list of DFAState instances
00236         changes = True
00237         while changes:
00238             changes = False
00239             for i, state_i in enumerate(dfa):
00240                 for j in range(i+1, len(dfa)):
00241                     state_j = dfa[j]
00242                     if state_i == state_j:
00243                         #print "  unify", i, j
00244                         del dfa[j]
00245                         for state in dfa:
00246                             state.unifystate(state_j, state_i)
00247                         changes = True
00248                         break

Here is the call graph for this function:


Member Data Documentation

Definition at line 17 of file pgen.py.

Definition at line 24 of file pgen.py.

Definition at line 19 of file pgen.py.

Definition at line 325 of file pgen.py.

Definition at line 107 of file object.h.

struct _typeobject* _object::ob_type [inherited]

Definition at line 108 of file object.h.

Definition at line 21 of file pgen.py.

Definition at line 18 of file pgen.py.

Definition at line 150 of file pgen.py.

Definition at line 259 of file pgen.py.


The documentation for this class was generated from the following file: