Back to index

moin  1.9.0~rc2
_ParserBase.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Base Source Parser
00004 
00005     @copyright: 2002 by Taesu Pyo <bigflood@hitel.net>,
00006                 2005 by Oliver Graf <ograf@bitart.de>,
00007                 2005-2008 MoinMoin:ThomasWaldmann
00008 
00009     @license: GNU GPL, see COPYING for details.
00010 
00011 
00012 basic css:
00013 
00014 pre.codearea     { font-style: sans-serif; color: #000000; }
00015 
00016 pre.codearea span.ID       { color: #000000; }
00017 pre.codearea span.Char     { color: #004080; }
00018 pre.codearea span.Comment  { color: #808080; }
00019 pre.codearea span.Number   { color: #008080; font-weight: bold; }
00020 pre.codearea span.String   { color: #004080; }
00021 pre.codearea span.SPChar   { color: #0000C0; }
00022 pre.codearea span.ResWord  { color: #4040ff; font-weight: bold; }
00023 pre.codearea span.ConsWord { color: #008080; font-weight: bold; }
00024 
00025 """
00026 
00027 import re
00028 
00029 from MoinMoin import log
00030 logging = log.getLogger(__name__)
00031 
00032 from MoinMoin import config, wikiutil
00033 from MoinMoin.support.python_compatibility import hash_new
00034 from MoinMoin.parser import parse_start_step
00035 
00036 
00037 class FormatTextBase:
00038     pass
00039 
00040 class FormatBeginLine(FormatTextBase):
00041     def formatString(self, formatter, word):
00042         return formatter.code_line(1)
00043 
00044 class FormatEndLine(FormatTextBase):
00045     def formatString(self, formatter, word):
00046         return formatter.code_line(0)
00047 
00048 class FormatText(FormatTextBase):
00049 
00050     def __init__(self, fmt):
00051         self.fmt = fmt
00052 
00053     def formatString(self, formatter, word):
00054         return (formatter.code_token(1, self.fmt) +
00055                 formatter.text(word) +
00056                 formatter.code_token(0, self.fmt))
00057 
00058 class FormatTextID(FormatTextBase):
00059 
00060     def __init__(self, fmt, icase=False):
00061         if not isinstance(fmt, FormatText):
00062             fmt = FormatText(fmt)
00063         self.setDefaultFormat(fmt)
00064         self._ignore_case = icase
00065         self.fmt = {}
00066 
00067     def setDefaultFormat(self, fmt):
00068         self._def_fmt = fmt
00069 
00070     def addFormat(self, word, fmt):
00071         if self._ignore_case:
00072             word = word.lower()
00073         self.fmt[word] = fmt
00074 
00075     def formatString(self, formatter, word):
00076         if self._ignore_case:
00077             sword = word.lower()
00078         else:
00079             sword = word
00080         return self.fmt.get(sword, self._def_fmt).formatString(formatter, word)
00081 
00082 
00083 class FormattingRuleSingle:
00084 
00085     def __init__(self, name, str_re, icase=False):
00086         self.name = name
00087         self.str_re = str_re
00088 
00089     def getStartRe(self):
00090         return self.str_re
00091 
00092     def getText(self, parser, hit):
00093         return hit
00094 
00095 
00096 class FormattingRulePair:
00097 
00098     def __init__(self, name, str_begin, str_end, icase=False):
00099         self.name = name
00100         self.str_begin = str_begin
00101         self.str_end = str_end
00102         re_flags = re.M
00103         if icase:
00104             re_flags |= re.I
00105         self.end_re = re.compile(str_end, re_flags)
00106 
00107     def getStartRe(self):
00108         return self.str_begin
00109 
00110     def getText(self, parser, hit):
00111         match = self.end_re.search(parser.text, parser.lastpos)
00112         if not match:
00113             next_lastpos = parser.text_len
00114         else:
00115             next_lastpos = match.end() + (match.end() == parser.lastpos)
00116         r = parser.text[parser.lastpos:next_lastpos]
00117         parser.lastpos = next_lastpos
00118         return hit + r
00119 
00120 
00121 class ParserBase:
00122     """ DEPRECATED highlighting parser - please use/extend pygments library """
00123     logging.warning('Using ParserBase is deprecated - please use/extend pygments syntax highlighting library.')
00124 
00125     parsername = 'ParserBase'
00126     tabwidth = 4
00127 
00128     # for dirty tricks, see comment in format():
00129     STARTL, STARTL_RE = u"^\n", ur"\^\n"
00130     ENDL, ENDL_RE = u"\n$", ur"\n\$"
00131     LINESEP = ENDL + STARTL
00132 
00133     def __init__(self, raw, request, **kw):
00134         self.raw = raw
00135         self.request = request
00136         self.show_nums, self.num_start, self.num_step, attrs = parse_start_step(request, kw.get('format_args', ''))
00137 
00138         self._ignore_case = False
00139         self._formatting_rules = []
00140         self._formatting_rules_n2r = {}
00141         self._formatting_rule_index = 0
00142         self.rule_fmt = {}
00143         #self.line_count = len(raw.split('\n')) + 1
00144 
00145     def setupRules(self):
00146         self.addRuleFormat("BEGINLINE", FormatBeginLine())
00147         self.addRuleFormat("ENDLINE", FormatEndLine())
00148         # we need a little dirty trick here, see comment in format():
00149         self.addRule("BEGINLINE", self.STARTL_RE)
00150         self.addRule("ENDLINE", self.ENDL_RE)
00151 
00152         self.def_format = FormatText('Default')
00153         self.reserved_word_format = FormatText('ResWord')
00154         self.constant_word_format = FormatText('ConsWord')
00155         self.ID_format = FormatTextID('ID', self._ignore_case)
00156         self.addRuleFormat("ID", self.ID_format)
00157         self.addRuleFormat("Operator")
00158         self.addRuleFormat("Char")
00159         self.addRuleFormat("Comment")
00160         self.addRuleFormat("Number")
00161         self.addRuleFormat("String")
00162         self.addRuleFormat("SPChar")
00163         self.addRuleFormat("ResWord")
00164         self.addRuleFormat("ResWord2")
00165         self.addRuleFormat("ConsWord")
00166         self.addRuleFormat("Special")
00167         self.addRuleFormat("Preprc")
00168         self.addRuleFormat("Error")
00169 
00170     def _addRule(self, name, fmt):
00171         self._formatting_rule_index += 1
00172         name = "%s_%s" % (name, self._formatting_rule_index) # create unique name
00173         self._formatting_rules.append((name, fmt))
00174         self._formatting_rules_n2r[name] = fmt
00175 
00176     def addRule(self, name, str_re):
00177         self._addRule(name, FormattingRuleSingle(name, str_re, self._ignore_case))
00178 
00179     def addRulePair(self, name, start_re, end_re):
00180         self._addRule(name, FormattingRulePair(name, start_re, end_re, self._ignore_case))
00181 
00182     def addWords(self, words, fmt):
00183         if not isinstance(fmt, FormatTextBase):
00184             fmt = FormatText(fmt)
00185         for w in words:
00186             self.ID_format.addFormat(w, fmt)
00187 
00188     def addReserved(self, words):
00189         self.addWords(words, self.reserved_word_format)
00190 
00191     def addConstant(self, words):
00192         self.addWords(words, self.constant_word_format)
00193 
00194     def addRuleFormat(self, name, fmt=None):
00195         if fmt is None:
00196             fmt = FormatText(name)
00197         self.rule_fmt[name] = fmt
00198 
00199     def format(self, formatter, form=None):
00200         """ Send the text.
00201         """
00202 
00203         self.setupRules()
00204 
00205         formatting_regexes = ["(?P<%s>%s)" % (n, f.getStartRe())
00206                               for n, f in self._formatting_rules]
00207         re_flags = re.M
00208         if self._ignore_case:
00209             re_flags |= re.I
00210         scan_re = re.compile("|".join(formatting_regexes), re_flags)
00211 
00212         self.text = self.raw
00213 
00214         # dirty little trick to work around re lib's limitations (it can't have
00215         # zero length matches at line beginning for ^ and at the same time match
00216         # something else at the beginning of the line):
00217         self.text = self.LINESEP.join([line.replace('\r', '') for line in self.text.splitlines()])
00218         self.text = self.STARTL + self.text + self.ENDL
00219         self.text_len = len(self.text)
00220 
00221         result = [] # collects output
00222 
00223         self._code_id = hash_new('sha1', self.raw.encode(config.charset)).hexdigest()
00224         result.append(formatter.code_area(1, self._code_id, self.parsername, self.show_nums, self.num_start, self.num_step))
00225 
00226         self.lastpos = 0
00227         match = scan_re.search(self.text)
00228         while match and self.lastpos < self.text_len:
00229             # add the rendering of the text left of the match we found
00230             text = self.text[self.lastpos:match.start()]
00231             if text:
00232                 result.extend(self.format_normal_text(formatter, text))
00233             self.lastpos = match.end() + (match.end() == self.lastpos)
00234 
00235             # add the rendering of the match we found
00236             result.extend(self.format_match(formatter, match))
00237 
00238             # search for the next one
00239             match = scan_re.search(self.text, self.lastpos)
00240 
00241         # add the rendering of the text right of the last match we found
00242         text = self.text[self.lastpos:]
00243         if text:
00244             result.extend(self.format_normal_text(formatter, text))
00245 
00246         result.append(formatter.code_area(0, self._code_id))
00247         self.request.write(''.join(result))
00248 
00249     def format_normal_text(self, formatter, text):
00250         return [formatter.text(text.expandtabs(self.tabwidth))]
00251 
00252     def format_match(self, formatter, match):
00253         result = []
00254         for n, hit in match.groupdict().items():
00255             if hit is None:
00256                 continue
00257             r = self._formatting_rules_n2r[n]
00258             s = r.getText(self, hit)
00259             c = self.rule_fmt.get(r.name, None)
00260             if not c:
00261                 c = self.def_format
00262             if s:
00263                 lines = s.expandtabs(self.tabwidth).split(self.LINESEP)
00264                 for line in lines[:-1]:
00265                     result.append(c.formatString(formatter, line))
00266                     result.append(FormatEndLine().formatString(formatter, ''))
00267                     result.append(FormatBeginLine().formatString(formatter, ''))
00268                 result.append(c.formatString(formatter, lines[-1]))
00269         return result
00270