Back to index

python3.2  3.2.2
Classes | Functions | Variables
sre_parse Namespace Reference

Classes

class  Pattern
class  SubPattern
class  Tokenizer

Functions

def isident
def isdigit
def isname
def _class_escape
def _escape
def _parse_sub
def _parse_sub_cond
def _parse
def fix_flags
def parse
def parse_template
def expand_template

Variables

string SPECIAL_CHARS = ".\\[{()*+?^$|"
string REPEAT_CHARS = "*+?{"
tuple DIGITS = set("0123456789")
tuple OCTDIGITS = set("01234567")
tuple HEXDIGITS = set("0123456789abcdefABCDEF")
tuple WHITESPACE = set(" \t\n\r\v\f")
dictionary ESCAPES
dictionary CATEGORIES
dictionary FLAGS
tuple _PATTERNENDERS = set("|)")
tuple _ASSERTCHARS = set("=!<")
tuple _LOOKBEHINDASSERTCHARS = set("=!")
tuple _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
list start = set[:]
 if sourcematch(":"): pass # handle character classes
tuple this = sourceget()
tuple code1 = _class_escape(source, this)
tuple code2 = _class_escape(source, this)
list lo = code1[1]
list hi = code2[1]
tuple here = source.tell()
tuple min = int(lo)
tuple max = int(hi)
list item = subpattern[-1:]
int group = 1
 name = None
 condgroup = None
tuple char = sourceget()
tuple gid = state.groupdict.get(name)
int dir = 1
tuple p = _parse_sub(source, state)
string condname = ""
tuple code = _escape(source, this, state)

Function Documentation

def sre_parse._class_escape (   source,
  escape 
) [private]

Definition at line 232 of file sre_parse.py.

00232 
00233 def _class_escape(source, escape):
00234     # handle escape code inside character class
00235     code = ESCAPES.get(escape)
00236     if code:
00237         return code
00238     code = CATEGORIES.get(escape)
00239     if code:
00240         return code
00241     try:
00242         c = escape[1:2]
00243         if c == "x":
00244             # hexadecimal escape (exactly two digits)
00245             while source.next in HEXDIGITS and len(escape) < 4:
00246                 escape = escape + source.get()
00247             escape = escape[2:]
00248             if len(escape) != 2:
00249                 raise error("bogus escape: %s" % repr("\\" + escape))
00250             return LITERAL, int(escape, 16) & 0xff
00251         elif c in OCTDIGITS:
00252             # octal escape (up to three digits)
00253             while source.next in OCTDIGITS and len(escape) < 4:
00254                 escape = escape + source.get()
00255             escape = escape[1:]
00256             return LITERAL, int(escape, 8) & 0xff
00257         elif c in DIGITS:
00258             raise error("bogus escape: %s" % repr(escape))
00259         if len(escape) == 2:
00260             return LITERAL, ord(escape[1])
00261     except ValueError:
00262         pass
00263     raise error("bogus escape: %s" % repr(escape))

Here is the call graph for this function:

def sre_parse._escape (   source,
  escape,
  state 
) [private]

Definition at line 264 of file sre_parse.py.

00264 
00265 def _escape(source, escape, state):
00266     # handle escape code in expression
00267     code = CATEGORIES.get(escape)
00268     if code:
00269         return code
00270     code = ESCAPES.get(escape)
00271     if code:
00272         return code
00273     try:
00274         c = escape[1:2]
00275         if c == "x":
00276             # hexadecimal escape
00277             while source.next in HEXDIGITS and len(escape) < 4:
00278                 escape = escape + source.get()
00279             if len(escape) != 4:
00280                 raise ValueError
00281             return LITERAL, int(escape[2:], 16) & 0xff
00282         elif c == "0":
00283             # octal escape
00284             while source.next in OCTDIGITS and len(escape) < 4:
00285                 escape = escape + source.get()
00286             return LITERAL, int(escape[1:], 8) & 0xff
00287         elif c in DIGITS:
00288             # octal escape *or* decimal group reference (sigh)
00289             if source.next in DIGITS:
00290                 escape = escape + source.get()
00291                 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
00292                     source.next in OCTDIGITS):
00293                     # got three octal digits; this is an octal escape
00294                     escape = escape + source.get()
00295                     return LITERAL, int(escape[1:], 8) & 0xff
00296             # not an octal escape, so this is a group reference
00297             group = int(escape[1:])
00298             if group < state.groups:
00299                 if not state.checkgroup(group):
00300                     raise error("cannot refer to open group")
00301                 return GROUPREF, group
00302             raise ValueError
00303         if len(escape) == 2:
00304             return LITERAL, ord(escape[1])
00305     except ValueError:
00306         pass
00307     raise error("bogus escape: %s" % repr(escape))

Here is the call graph for this function:

def sre_parse._parse (   source,
  state 
) [private]

Definition at line 386 of file sre_parse.py.

00386 
00387 def _parse(source, state):
00388     # parse a simple pattern
00389     subpattern = SubPattern(state)
00390 
00391     # precompute constants into local variables
00392     subpatternappend = subpattern.append
00393     sourceget = source.get
00394     sourcematch = source.match
00395     _len = len
00396     PATTERNENDERS = _PATTERNENDERS
00397     ASSERTCHARS = _ASSERTCHARS
00398     LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
00399     REPEATCODES = _REPEATCODES
00400 
00401     while 1:
00402 
00403         if source.next in PATTERNENDERS:
00404             break # end of subpattern
00405         this = sourceget()
00406         if this is None:
00407             break # end of pattern
00408 
00409         if state.flags & SRE_FLAG_VERBOSE:
00410             # skip whitespace and comments
00411             if this in WHITESPACE:
00412                 continue
00413             if this == "#":
00414                 while 1:
00415                     this = sourceget()
00416                     if this in (None, "\n"):
00417                         break
00418                 continue
00419 
00420         if this and this[0] not in SPECIAL_CHARS:
00421             subpatternappend((LITERAL, ord(this)))
00422 
00423         elif this == "[":
00424             # character set
00425             set = []
            setappend = set.append

Here is the call graph for this function:

Here is the caller graph for this function:

def sre_parse._parse_sub (   source,
  state,
  nested = 1 
) [private]

Definition at line 308 of file sre_parse.py.

00308 
00309 def _parse_sub(source, state, nested=1):
00310     # parse an alternation: a|b|c
00311 
00312     items = []
00313     itemsappend = items.append
00314     sourcematch = source.match
00315     while 1:
00316         itemsappend(_parse(source, state))
00317         if sourcematch("|"):
00318             continue
00319         if not nested:
00320             break
00321         if not source.next or sourcematch(")", 0):
00322             break
00323         else:
00324             raise error("pattern not properly closed")
00325 
00326     if len(items) == 1:
00327         return items[0]
00328 
00329     subpattern = SubPattern(state)
00330     subpatternappend = subpattern.append
00331 
00332     # check if all items share a common prefix
00333     while 1:
00334         prefix = None
00335         for item in items:
00336             if not item:
00337                 break
00338             if prefix is None:
00339                 prefix = item[0]
00340             elif item[0] != prefix:
00341                 break
00342         else:
00343             # all subitems start with a common "prefix".
00344             # move it out of the branch
00345             for item in items:
00346                 del item[0]
00347             subpatternappend(prefix)
00348             continue # check next one
00349         break
00350 
00351     # check if the branch can be replaced by a character set
00352     for item in items:
00353         if len(item) != 1 or item[0][0] != LITERAL:
00354             break
00355     else:
00356         # we can store this as a character set instead of a
00357         # branch (the compiler may optimize this even more)
00358         set = []
00359         setappend = set.append
00360         for item in items:
00361             setappend(item[0])
00362         subpatternappend((IN, set))
00363         return subpattern
00364 
00365     subpattern.append((BRANCH, (None, items)))
00366     return subpattern

Here is the call graph for this function:

Here is the caller graph for this function:

def sre_parse._parse_sub_cond (   source,
  state,
  condgroup 
) [private]

Definition at line 367 of file sre_parse.py.

00367 
00368 def _parse_sub_cond(source, state, condgroup):
00369     item_yes = _parse(source, state)
00370     if source.match("|"):
00371         item_no = _parse(source, state)
00372         if source.match("|"):
00373             raise error("conditional backref with more than two branches")
00374     else:
00375         item_no = None
00376     if source.next and not source.match(")", 0):
00377         raise error("pattern not properly closed")
00378     subpattern = SubPattern(state)
00379     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
00380     return subpattern

Here is the call graph for this function:

def sre_parse.expand_template (   template,
  match 
)

Definition at line 804 of file sre_parse.py.

00804 
00805 def expand_template(template, match):
00806     g = match.group
00807     sep = match.string[:0]
00808     groups, literals = template
00809     literals = literals[:]
00810     try:
00811         for index, group in groups:
00812             literals[index] = s = g(group)
00813             if s is None:
00814                 raise error("unmatched group")
00815     except IndexError:
00816         raise error("invalid group reference")
00817     return sep.join(literals)

Here is the caller graph for this function:

def sre_parse.fix_flags (   src,
  flags 
)

Definition at line 670 of file sre_parse.py.

00670 
00671 def fix_flags(src, flags):
00672     # Check and fix flags according to the type of pattern (str or bytes)
00673     if isinstance(src, str):
00674         if not flags & SRE_FLAG_ASCII:
00675             flags |= SRE_FLAG_UNICODE
00676         elif flags & SRE_FLAG_UNICODE:
00677             raise ValueError("ASCII and UNICODE flags are incompatible")
00678     else:
00679         if flags & SRE_FLAG_UNICODE:
00680             raise ValueError("can't use UNICODE flag with a bytes pattern")
00681     return flags

Here is the caller graph for this function:

def sre_parse.isdigit (   char)

Definition at line 220 of file sre_parse.py.

00220 
00221 def isdigit(char):
00222     return "0" <= char <= "9"

Here is the caller graph for this function:

def sre_parse.isident (   char)

Definition at line 217 of file sre_parse.py.

00217 
00218 def isident(char):
00219     return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"

Here is the caller graph for this function:

def sre_parse.isname (   name)

Definition at line 223 of file sre_parse.py.

00223 
00224 def isname(name):
00225     # check that group name is a valid string
00226     if not isident(name[0]):
00227         return False
00228     for char in name[1:]:
00229         if not isident(char) and not isdigit(char):
00230             return False
00231     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def sre_parse.parse (   str,
  flags = 0,
  pattern = None 
)

Definition at line 682 of file sre_parse.py.

00682 
00683 def parse(str, flags=0, pattern=None):
00684     # parse 're' pattern into list of (opcode, argument) tuples
00685 
00686     source = Tokenizer(str)
00687 
00688     if pattern is None:
00689         pattern = Pattern()
00690     pattern.flags = flags
00691     pattern.str = str
00692 
00693     p = _parse_sub(source, pattern, 0)
00694     p.pattern.flags = fix_flags(str, p.pattern.flags)
00695 
00696     tail = source.get()
00697     if tail == ")":
00698         raise error("unbalanced parenthesis")
00699     elif tail:
00700         raise error("bogus characters at end of regular expression")
00701 
00702     if flags & SRE_FLAG_DEBUG:
00703         p.dump()
00704 
00705     if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
00706         # the VERBOSE flag was switched on inside the pattern.  to be
00707         # on the safe side, we'll parse the whole thing again...
00708         return parse(str, p.pattern.flags)
00709 
00710     return p

Here is the call graph for this function:

Here is the caller graph for this function:

def sre_parse.parse_template (   source,
  pattern 
)

Definition at line 711 of file sre_parse.py.

00711 
00712 def parse_template(source, pattern):
00713     # parse 're' replacement string into list of literals and
00714     # group references
00715     s = Tokenizer(source)
00716     sget = s.get
00717     p = []
00718     a = p.append
00719     def literal(literal, p=p, pappend=a):
00720         if p and p[-1][0] is LITERAL:
00721             p[-1] = LITERAL, p[-1][1] + literal
00722         else:
00723             pappend((LITERAL, literal))
00724     sep = source[:0]
00725     if isinstance(sep, str):
00726         makechar = chr
00727     else:
00728         makechar = chr
00729     while 1:
00730         this = sget()
00731         if this is None:
00732             break # end of replacement string
00733         if this and this[0] == "\\":
00734             # group
00735             c = this[1:2]
00736             if c == "g":
00737                 name = ""
00738                 if s.match("<"):
00739                     while 1:
00740                         char = sget()
00741                         if char is None:
00742                             raise error("unterminated group name")
00743                         if char == ">":
00744                             break
00745                         name = name + char
00746                 if not name:
00747                     raise error("bad group name")
00748                 try:
00749                     index = int(name)
00750                     if index < 0:
00751                         raise error("negative group number")
00752                 except ValueError:
00753                     if not isname(name):
00754                         raise error("bad character in group name")
00755                     try:
00756                         index = pattern.groupindex[name]
00757                     except KeyError:
00758                         raise IndexError("unknown group name")
00759                 a((MARK, index))
00760             elif c == "0":
00761                 if s.next in OCTDIGITS:
00762                     this = this + sget()
00763                     if s.next in OCTDIGITS:
00764                         this = this + sget()
00765                 literal(makechar(int(this[1:], 8) & 0xff))
00766             elif c in DIGITS:
00767                 isoctal = False
00768                 if s.next in DIGITS:
00769                     this = this + sget()
00770                     if (c in OCTDIGITS and this[2] in OCTDIGITS and
00771                         s.next in OCTDIGITS):
00772                         this = this + sget()
00773                         isoctal = True
00774                         literal(makechar(int(this[1:], 8) & 0xff))
00775                 if not isoctal:
00776                     a((MARK, int(this[1:])))
00777             else:
00778                 try:
00779                     this = makechar(ESCAPES[this][1])
00780                 except KeyError:
00781                     pass
00782                 literal(this)
00783         else:
00784             literal(this)
00785     # convert template to groups and literals lists
00786     i = 0
00787     groups = []
00788     groupsappend = groups.append
00789     literals = [None] * len(p)
00790     if isinstance(source, str):
00791         encode = lambda x: x
00792     else:
00793         # The tokenizer implicitly decodes bytes objects as latin-1, we must
00794         # therefore re-encode the final representation.
00795         encode = lambda x: x.encode('latin1')
00796     for c, s in p:
00797         if c is MARK:
00798             groupsappend((i, s))
00799             # literal[i] is already None
00800         else:
00801             literals[i] = encode(s)
00802         i = i + 1
00803     return groups, literals

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

tuple sre_parse._ASSERTCHARS = set("=!<")

Definition at line 382 of file sre_parse.py.

Definition at line 383 of file sre_parse.py.

tuple sre_parse._PATTERNENDERS = set("|)")

Definition at line 381 of file sre_parse.py.

tuple sre_parse._REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])

Definition at line 384 of file sre_parse.py.

Initial value:
00001 {
00002     r"\A": (AT, AT_BEGINNING_STRING), # start of string
00003     r"\b": (AT, AT_BOUNDARY),
00004     r"\B": (AT, AT_NON_BOUNDARY),
00005     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
00006     r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
00007     r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
00008     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
00009     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
00010     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
00011     r"\Z": (AT, AT_END_STRING), # end of string
00012 }

Definition at line 40 of file sre_parse.py.

tuple sre_parse.char = sourceget()

Definition at line 544 of file sre_parse.py.

tuple sre_parse.code = _escape(source, this, state)

Definition at line 662 of file sre_parse.py.

Definition at line 437 of file sre_parse.py.

Definition at line 453 of file sre_parse.py.

tuple sre_parse.condgroup = None

Definition at line 534 of file sre_parse.py.

Definition at line 606 of file sre_parse.py.

tuple sre_parse.DIGITS = set("0123456789")

Definition at line 22 of file sre_parse.py.

Definition at line 590 of file sre_parse.py.

Initial value:
00001 {
00002     r"\a": (LITERAL, ord("\a")),
00003     r"\b": (LITERAL, ord("\b")),
00004     r"\f": (LITERAL, ord("\f")),
00005     r"\n": (LITERAL, ord("\n")),
00006     r"\r": (LITERAL, ord("\r")),
00007     r"\t": (LITERAL, ord("\t")),
00008     r"\v": (LITERAL, ord("\v")),
00009     r"\\": (LITERAL, ord("\\"))
00010 }

Definition at line 29 of file sre_parse.py.

Initial value:
00001 {
00002     # standard flags
00003     "i": SRE_FLAG_IGNORECASE,
00004     "L": SRE_FLAG_LOCALE,
00005     "m": SRE_FLAG_MULTILINE,
00006     "s": SRE_FLAG_DOTALL,
00007     "x": SRE_FLAG_VERBOSE,
00008     # extensions
00009     "a": SRE_FLAG_ASCII,
00010     "t": SRE_FLAG_TEMPLATE,
00011     "u": SRE_FLAG_UNICODE,
00012 }

Definition at line 53 of file sre_parse.py.

tuple sre_parse.gid = state.groupdict.get(name)

Definition at line 565 of file sre_parse.py.

tuple sre_parse.group = 1

Definition at line 532 of file sre_parse.py.

tuple sre_parse.here = source.tell()

Definition at line 492 of file sre_parse.py.

tuple sre_parse.HEXDIGITS = set("0123456789abcdefABCDEF")

Definition at line 25 of file sre_parse.py.

tuple sre_parse.hi = code2[1]

Definition at line 459 of file sre_parse.py.

sre_parse.item = subpattern[-1:]

Definition at line 516 of file sre_parse.py.

tuple sre_parse.lo = code1[1]

Definition at line 458 of file sre_parse.py.

tuple sre_parse.max = int(hi)

Definition at line 509 of file sre_parse.py.

tuple sre_parse.min = int(lo)

Definition at line 507 of file sre_parse.py.

Definition at line 533 of file sre_parse.py.

tuple sre_parse.OCTDIGITS = set("01234567")

Definition at line 24 of file sre_parse.py.

tuple sre_parse.p = _parse_sub(source, state)

Definition at line 596 of file sre_parse.py.

Definition at line 20 of file sre_parse.py.

string sre_parse.SPECIAL_CHARS = ".\\[{()*+?^$|"

Definition at line 19 of file sre_parse.py.

list sre_parse.start = set[:]

if sourcematch(":"): pass # handle character classes

Definition at line 431 of file sre_parse.py.

tuple sre_parse.this = sourceget()

Definition at line 433 of file sre_parse.py.

tuple sre_parse.WHITESPACE = set(" \t\n\r\v\f")

Definition at line 27 of file sre_parse.py.