Back to index

moin  1.9.0~rc2
scanner.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.scanner
00004     ~~~~~~~~~~~~~~~~
00005 
00006     This library implements a regex based scanner. Some languages
00007     like Pascal are easy to parse but have some keywords that
00008     depend on the context. Because of this it's impossible to lex
00009     that just by using a regular expression lexer like the
00010     `RegexLexer`.
00011 
00012     Have a look at the `DelphiLexer` to get an idea of how to use
00013     this scanner.
00014 
00015     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00016     :license: BSD, see LICENSE for details.
00017 """
00018 import re
00019 
00020 
00021 class EndOfText(RuntimeError):
00022     """
00023     Raise if end of text is reached and the user
00024     tried to call a match function.
00025     """
00026 
00027 
00028 class Scanner(object):
00029     """
00030     Simple scanner
00031 
00032     All method patterns are regular expression strings (not
00033     compiled expressions!)
00034     """
00035 
00036     def __init__(self, text, flags=0):
00037         """
00038         :param text:    The text which should be scanned
00039         :param flags:   default regular expression flags
00040         """
00041         self.data = text
00042         self.data_length = len(text)
00043         self.start_pos = 0
00044         self.pos = 0
00045         self.flags = flags
00046         self.last = None
00047         self.match = None
00048         self._re_cache = {}
00049 
00050     def eos(self):
00051         """`True` if the scanner reached the end of text."""
00052         return self.pos >= self.data_length
00053     eos = property(eos, eos.__doc__)
00054 
00055     def check(self, pattern):
00056         """
00057         Apply `pattern` on the current position and return
00058         the match object. (Doesn't touch pos). Use this for
00059         lookahead.
00060         """
00061         if self.eos:
00062             raise EndOfText()
00063         if pattern not in self._re_cache:
00064             self._re_cache[pattern] = re.compile(pattern, self.flags)
00065         return self._re_cache[pattern].match(self.data, self.pos)
00066 
00067     def test(self, pattern):
00068         """Apply a pattern on the current position and check
00069         if it patches. Doesn't touch pos."""
00070         return self.check(pattern) is not None
00071 
00072     def scan(self, pattern):
00073         """
00074         Scan the text for the given pattern and update pos/match
00075         and related fields. The return value is a boolen that
00076         indicates if the pattern matched. The matched value is
00077         stored on the instance as ``match``, the last value is
00078         stored as ``last``. ``start_pos`` is the position of the
00079         pointer before the pattern was matched, ``pos`` is the
00080         end position.
00081         """
00082         if self.eos:
00083             raise EndOfText()
00084         if pattern not in self._re_cache:
00085             self._re_cache[pattern] = re.compile(pattern, self.flags)
00086         self.last = self.match
00087         m = self._re_cache[pattern].match(self.data, self.pos)
00088         if m is None:
00089             return False
00090         self.start_pos = m.start()
00091         self.pos = m.end()
00092         self.match = m.group()
00093         return True
00094 
00095     def get_char(self):
00096         """Scan exactly one char."""
00097         self.scan('.')
00098 
00099     def __repr__(self):
00100         return '<%s %d/%d>' % (
00101             self.__class__.__name__,
00102             self.pos,
00103             self.data_length
00104         )