Back to index

moin  1.9.0~rc2
text_moin_wiki.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - MoinMoin Wiki Markup Parser
00004 
00005     @copyright: 2000-2002 Juergen Hermann <jh@web.de>,
00006                 2006-2008 MoinMoin:ThomasWaldmann,
00007                 2007 by MoinMoin:ReimarBauer
00008     @license: GNU GPL, see COPYING for details.
00009 """
00010 
00011 import re
00012 
00013 from MoinMoin import log
00014 logging = log.getLogger(__name__)
00015 
00016 from MoinMoin import config, wikiutil, macro
00017 from MoinMoin.Page import Page
00018 from MoinMoin.support.python_compatibility import set
00019 
00020 Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings
00021 
00022 
00023 _ = lambda x: x
00024 
00025 class Parser:
00026     """
00027         Parse wiki format markup (and call the formatter to generate output).
00028 
00029         All formatting commands can be parsed one line at a time, though
00030         some state is carried over between lines.
00031 
00032         Methods named like _*_repl() are responsible to handle the named regex patterns.
00033     """
00034 
00035     # allow caching
00036     caching = 1
00037     Dependencies = Dependencies
00038     quickhelp = _(u"""\
00039  Emphasis:: <<Verbatim('')>>''italics''<<Verbatim('')>>; <<Verbatim(''')>>'''bold'''<<Verbatim(''')>>; <<Verbatim(''''')>>'''''bold italics'''''<<Verbatim(''''')>>; <<Verbatim('')>>''mixed ''<<Verbatim(''')>>'''''bold'''<<Verbatim(''')>> and italics''<<Verbatim('')>>; <<Verbatim(----)>> horizontal rule.
00040  Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
00041  Lists:: space and one of: * bullets; 1., a., A., i., I. numbered items; 1.#n start numbering at n; space alone indents.
00042  Links:: <<Verbatim(JoinCapitalizedWords)>>; <<Verbatim([[target|linktext]])>>.
00043  Tables:: || cell text |||| cell text spanning 2 columns ||;    no trailing white space allowed after tables or titles.
00044 
00045 (!) For more help, see HelpOnEditing or HelpOnMoinWikiSyntax.
00046 """)
00047 
00048     # some common strings
00049     CHILD_PREFIX = wikiutil.CHILD_PREFIX
00050     CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN
00051     PARENT_PREFIX = wikiutil.PARENT_PREFIX
00052     PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN
00053 
00054     punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
00055     url_scheme = u'|'.join(config.url_schemas)
00056 
00057     # some common rules
00058     url_rule = ur'''
00059         (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
00060         (?P<url_target>  # capture whole url there
00061          (?P<url_scheme>%(url_scheme)s)  # some scheme
00062          \:
00063          \S+?  # anything non-whitespace
00064         )
00065         (?:$|(?=\s|[%(punct)s]+(\s|$)))  # require either end of line or some whitespace or some punctuation+blank/eol afterwards
00066     ''' % {
00067         'url_scheme': url_scheme,
00068         'punct': punct_pattern,
00069     }
00070 
00071     # this is for a free (non-bracketed) interwiki link - to avoid false positives,
00072     # we are rather restrictive here (same as in moin 1.5: require that the
00073     # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code
00074     # also checks whether the wiki name is in the interwiki map (if not, it renders
00075     # normal text, no link):
00076     interwiki_rule = ur'''
00077         (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
00078         (?P<interwiki_wiki>[A-Z][a-zA-Z]+)  # interwiki wiki name
00079         \:
00080         (?P<interwiki_page>  # interwiki page name
00081          (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ )  # make sure there is something non-blank with at least one alphanum letter following
00082          [^\s%(punct)s]+  # we take all until we hit some blank or punctuation char ...
00083         )
00084     ''' % {
00085         'u': config.chars_upper,
00086         'l': config.chars_lower,
00087         'punct': punct_pattern,
00088     }
00089 
00090     # BE CAREFUL: if you do changes to word_rule, consider doing them also to word_rule_js (see below)
00091     word_rule = ur'''
00092         (?:
00093          (?<![%(u)s%(l)s/])  # require anything not upper/lower/slash before
00094          |
00095          ^  # ... or beginning of line
00096         )
00097         (?P<word_bang>\!)?  # configurable: avoid getting CamelCase rendered as link
00098         (?P<word_name>
00099          (?:
00100           (%(parent)s)*  # there might be either ../ parent prefix(es)
00101           |
00102           ((?<!%(child)s)%(child)s)?  # or maybe a single / child prefix (but not if we already had it before)
00103          )
00104          (
00105           ((?<!%(child)s)%(child)s)?  # there might be / child prefix (but not if we already had it before)
00106           (?:[%(u)s][%(l)s]+){2,}  # at least 2 upper>lower transitions make CamelCase
00107          )+  # we can have MainPage/SubPage/SubSubPage ...
00108          (?:
00109           \#  # anchor separator          TODO check if this does not make trouble at places where word_rule is used
00110           (?P<word_anchor>\S+)  # some anchor name
00111          )?
00112         )
00113         (?:
00114          (?![%(u)s%(l)s/])  # require anything not upper/lower/slash following
00115          |
00116          $  # ... or end of line
00117         )
00118     ''' % {
00119         'u': config.chars_upper,
00120         'l': config.chars_lower,
00121         'child': re.escape(CHILD_PREFIX),
00122         'parent': re.escape(PARENT_PREFIX),
00123     }
00124     # simplified word_rule for FCKeditor's "unlink" plugin (puts a ! in front of a WikiName if WikiName matches word_rule_js),
00125     # because JavaScript can not use group names and verbose regular expressions!
00126     word_rule_js = (
00127         ur'''(?:(?<![%(u)s%(l)s/])|^)'''
00128         ur'''(?:'''
00129          ur'''(?:(%(parent)s)*|((?<!%(child)s)%(child)s)?)'''
00130          ur'''(((?<!%(child)s)%(child)s)?(?:[%(u)s][%(l)s]+){2,})+'''
00131          ur'''(?:\#(?:\S+))?'''
00132         ur''')'''
00133         ur'''(?:(?![%(u)s%(l)s/])|$)'''
00134     ) % {
00135         'u': config.chars_upper,
00136         'l': config.chars_lower,
00137         'child': re.escape(CHILD_PREFIX),
00138         'parent': re.escape(PARENT_PREFIX),
00139     }
00140 
00141     # link targets:
00142     extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme
00143     attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)'
00144     page_rule = r'(?P<page_name>.*)'
00145 
00146     link_target_rules = r'|'.join([
00147         extern_rule,
00148         attach_rule,
00149         page_rule,
00150     ])
00151     link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE)
00152 
00153     link_rule = r"""
00154         (?P<link>
00155             \[\[  # link target
00156             \s*  # strip space
00157             (?P<link_target>[^|]+?)
00158             \s*  # strip space
00159             (
00160                 \|  # link description
00161                 \s*  # strip space
00162                 (?P<link_desc>
00163                     (?:  # 1. we have either a transclusion here (usually a image)
00164                         \{\{
00165                         \s*[^|]+?\s*  # usually image target (strip space)
00166                         (\|\s*[^|]*?\s*  # usually image alt text (optional, strip space)
00167                             (\|\s*[^|]*?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
00168                             )?
00169                         )?
00170                         \}\}
00171                     )
00172                     |
00173                     (?:  # 2. or we have simple text here.
00174                         [^|]+?
00175                     )
00176                 )?
00177                 \s*  # strip space
00178                 (
00179                     \|  # link parameters
00180                     \s*  # strip space
00181                     (?P<link_params>[^|]+?)?
00182                     \s*  # strip space
00183                 )?
00184             )?
00185             \]\]
00186         )
00187     """
00188 
00189     transclude_rule = r"""
00190         (?P<transclude>
00191             \{\{
00192             \s*(?P<transclude_target>[^|]+?)\s*  # usually image target (strip space)
00193             (\|\s*(?P<transclude_desc>[^|]+?)?\s*  # usually image alt text (optional, strip space)
00194                 (\|\s*(?P<transclude_params>[^|]+?)?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
00195                 )?
00196             )?
00197             \}\}
00198         )
00199     """
00200     text_rule = r"""
00201         (?P<simple_text>
00202             [^|]+  # some text (not empty, does not contain separator)
00203         )
00204     """
00205     # link descriptions:
00206     link_desc_rules = r'|'.join([
00207             transclude_rule,
00208             text_rule,
00209     ])
00210     link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE)
00211 
00212     # transclude descriptions:
00213     transclude_desc_rules = r'|'.join([
00214             text_rule,
00215     ])
00216     transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE)
00217 
00218     # lists:
00219     ol_rule = ur"""
00220         ^\s+  # indentation
00221         (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting
00222         (?:\#\d+)?  # optional start number
00223         \s  # require one blank afterwards
00224     """
00225     ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE)
00226 
00227     dl_rule = ur"""
00228         ^\s+  # indentation
00229         .*?::  # definition term::
00230         \s  # require on blank afterwards
00231     """
00232     dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE)
00233 
00234     # others
00235     indent_re = re.compile(ur"^\s*", re.UNICODE)
00236     eol_re = re.compile(r'\r?\n', re.UNICODE)
00237 
00238     # this is used inside parser/pre sections (we just want to know when it's over):
00239     parser_unique = u''
00240     parser_scan_rule = ur"""
00241 (?P<parser_end>
00242     %s\}\}\}  # in parser/pre, we only look for the end of the parser/pre
00243 )
00244 """
00245 
00246 
00247     # the big, fat, less ugly one ;)
00248     # please be very careful: blanks and # must be escaped with \ !
00249     scan_rules = ur"""
00250 (?P<emph_ibb>
00251     '''''(?=[^']+''')  # italic on, bold on, ..., bold off
00252 )|(?P<emph_ibi>
00253     '''''(?=[^']+'')  # italic on, bold on, ..., italic off
00254 )|(?P<emph_ib_or_bi>
00255     '{5}(?=[^'])  # italic and bold or bold and italic
00256 )|(?P<emph>
00257     '{2,3}  # italic or bold
00258 )|(?P<u>
00259     __ # underline
00260 )|(?P<small>
00261     (
00262      (?P<small_on>\~-\ ?)  # small on (we eat a trailing blank if it is there)
00263     |
00264      (?P<small_off>-\~)  # small off
00265     )
00266 )|(?P<big>
00267     (
00268      (?P<big_on>\~\+\ ?)  # big on (eat trailing blank)
00269     |
00270      (?P<big_off>\+\~)  # big off
00271     )
00272 )|(?P<strike>
00273     (
00274      (?P<strike_on>--\()  # strike-through on
00275     |
00276      (?P<strike_off>\)--)  # strike-through off
00277     )
00278 )|(?P<remark>
00279     (
00280      (^|(?<=\s))  # we require either beginning of line or some whitespace before a remark begin
00281      (?P<remark_on>/\*\s)  # inline remark on (require and eat whitespace after it)
00282     )
00283     |
00284     (
00285      (?P<remark_off>\s\*/)  # off (require and eat whitespace before it)
00286      (?=\s)  # we require some whitespace after a remark end
00287     )
00288 )|(?P<sup>
00289     \^  # superscript on
00290     (?P<sup_text>.*?)  # capture the text
00291     \^  # off
00292 )|(?P<sub>
00293     ,,  # subscript on
00294     (?P<sub_text>.*?)  # capture the text
00295     ,,  # off
00296 )|(?P<tt>
00297     \{\{\{  # teletype on
00298     (?P<tt_text>.*?)  # capture the text
00299     \}\}\}  # off
00300 )|(?P<tt_bt>
00301     `  # teletype (using a backtick) on
00302     (?P<tt_bt_text>.*?)  # capture the text
00303     `  # off
00304 )|(?P<interwiki>
00305     %(interwiki_rule)s  # OtherWiki:PageName
00306 )|(?P<word>  # must come AFTER interwiki rule!
00307     %(word_rule)s  # CamelCase wiki words
00308 )|
00309 %(link_rule)s
00310 |
00311 %(transclude_rule)s
00312 |(?P<url>
00313     %(url_rule)s
00314 )|(?P<email>
00315     [-\w._+]+  # name
00316     \@  # at
00317     [\w-]+(\.[\w-]+)+  # server/domain
00318 )|(?P<smiley>
00319     (^|(?<=\s))  # we require either beginning of line or some space before a smiley
00320     (%(smiley)s)  # one of the smileys
00321     (?=\s)  # we require some space after the smiley
00322 )|(?P<macro>
00323     <<
00324     (?P<macro_name>\w+)  # name of the macro
00325     (?:\((?P<macro_args>.*?)\))?  # optionally macro arguments
00326     >>
00327 )|(?P<heading>
00328     ^(?P<hmarker>=+)\s+  # some === at beginning of line, eat trailing blanks
00329     (?P<heading_text>.*?)  # capture heading text
00330     \s+(?P=hmarker)\s$  # some === at end of line (matching amount as we have seen), eat blanks
00331 )|(?P<parser>
00332     \{\{\{  # parser on
00333     (?P<parser_unique>(\{*|\w*))  # either some more {{{{ or some chars to solve the nesting problem
00334     (?P<parser_line>
00335      (
00336       \#!  # hash bang
00337       (?P<parser_name>\w*)  # we have a parser name (can be empty) directly following the {{{
00338       (
00339        \s+  # some space ...
00340        (?P<parser_args>.+?)  # followed by parser args
00341       )?  # parser args are optional
00342       \s*  # followed by whitespace (eat it) until EOL
00343      )
00344     |
00345      (?P<parser_nothing>\s*)  # no parser name, only whitespace up to EOL (eat it)
00346     )$
00347     # "parser off" detection is done with parser_scan_rule!
00348 )|(?P<comment>
00349     ^\#\#.*$  # src code comment, rest of line
00350 )|(?P<ol>
00351     %(ol_rule)s  # ordered list
00352 )|(?P<dl>
00353     %(dl_rule)s  # definition list
00354 )|(?P<li>
00355     ^\s+\*\s*  # unordered list
00356 )|(?P<li_none>
00357     ^\s+\.\s*  # unordered list, no bullets
00358 )|(?P<indent>
00359     ^\s+  # indented by some spaces
00360 )|(?P<tableZ>
00361     \|\|\ $  # the right end of a table row
00362 )|(?P<table>
00363     (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table
00364 )|(?P<rule>
00365     -{4,}  # hor. rule, min. 4 -
00366 )|(?P<entity>
00367     &(
00368       ([a-zA-Z]+)  # symbolic entity, like &uuml;
00369       |
00370       (\#(\d{1,5}|x[0-9a-fA-F]+))  # numeric entities, like &#42; or &#x42;
00371      );
00372 )|(?P<sgml_entity>  # must come AFTER entity rule!
00373     [<>&]  # needs special treatment for html/xml
00374 )"""  % {
00375         'url_scheme': url_scheme,
00376         'url_rule': url_rule,
00377         'punct': punct_pattern,
00378         'ol_rule': ol_rule,
00379         'dl_rule': dl_rule,
00380         'interwiki_rule': interwiki_rule,
00381         'word_rule': word_rule,
00382         'link_rule': link_rule,
00383         'transclude_rule': transclude_rule,
00384         'u': config.chars_upper,
00385         'l': config.chars_lower,
00386         'smiley': u'|'.join([re.escape(s) for s in config.smileys])}
00387     scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE)
00388 
00389     # Don't start p before these
00390     no_new_p_before = ("heading rule table tableZ tr td "
00391                        "ul ol dl dt dd li li_none indent "
00392                        "macro parser")
00393     no_new_p_before = no_new_p_before.split()
00394     no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
00395 
00396     def __init__(self, raw, request, **kw):
00397         self.raw = raw
00398         self.request = request
00399         self.form = request.form # Macro object uses this
00400         self._ = request.getText
00401         self.cfg = request.cfg
00402         self.line_anchors = kw.get('line_anchors', True)
00403         self.start_line = kw.get('start_line', 0)
00404         self.macro = None
00405 
00406         # currently, there is only a single, optional argument to this parser and
00407         # (when given), it is used as class(es) for a div wrapping the formatter output
00408         # either use a single class like "comment" or multiple like "comment/red/dotted"
00409         self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ')
00410 
00411         self.is_em = 0 # must be int
00412         self.is_b = 0 # must be int
00413         self.is_u = False
00414         self.is_strike = False
00415         self.is_big = False
00416         self.is_small = False
00417         self.is_remark = False
00418 
00419         self.lineno = 0
00420         self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
00421         self.in_li = 0 # between <li> and </li>
00422         self.in_dd = 0 # between <dd> and </dd>
00423 
00424         # states of the parser concerning being inside/outside of some "pre" section:
00425         # None == we are not in any kind of pre section (was: 0)
00426         # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1)
00427         # 'found_parser' == we found a valid parser (was: 2)
00428         self.in_pre = None
00429 
00430         self.in_table = 0
00431         self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
00432 
00433         # holds the nesting level (in chars) of open lists
00434         self.list_indents = []
00435         self.list_types = []
00436 
00437     def _close_item(self, result):
00438         #result.append("<!-- close item begin -->\n")
00439         if self.in_table:
00440             result.append(self.formatter.table(0))
00441             self.in_table = 0
00442         if self.in_li:
00443             self.in_li = 0
00444             if self.formatter.in_p:
00445                 result.append(self.formatter.paragraph(0))
00446             result.append(self.formatter.listitem(0))
00447         if self.in_dd:
00448             self.in_dd = 0
00449             if self.formatter.in_p:
00450                 result.append(self.formatter.paragraph(0))
00451             result.append(self.formatter.definition_desc(0))
00452         #result.append("<!-- close item end -->\n")
00453 
00454     def _u_repl(self, word, groups):
00455         """Handle underline."""
00456         self.is_u = not self.is_u
00457         return self.formatter.underline(self.is_u)
00458 
00459     def _remark_repl(self, word, groups):
00460         """Handle remarks."""
00461         on = groups.get('remark_on')
00462         if on and self.is_remark:
00463             return self.formatter.text(word)
00464         off = groups.get('remark_off')
00465         if off and not self.is_remark:
00466             return self.formatter.text(word)
00467         self.is_remark = not self.is_remark
00468         return self.formatter.span(self.is_remark, css_class='comment')
00469     _remark_on_repl = _remark_repl
00470     _remark_off_repl = _remark_repl
00471 
00472     def _strike_repl(self, word, groups):
00473         """Handle strikethrough."""
00474         on = groups.get('strike_on')
00475         if on and self.is_strike:
00476             return self.formatter.text(word)
00477         off = groups.get('strike_off')
00478         if off and not self.is_strike:
00479             return self.formatter.text(word)
00480         self.is_strike = not self.is_strike
00481         return self.formatter.strike(self.is_strike)
00482     _strike_on_repl = _strike_repl
00483     _strike_off_repl = _strike_repl
00484 
00485     def _small_repl(self, word, groups):
00486         """Handle small."""
00487         on = groups.get('small_on')
00488         if on and self.is_small:
00489             return self.formatter.text(word)
00490         off = groups.get('small_off')
00491         if off and not self.is_small:
00492             return self.formatter.text(word)
00493         self.is_small = not self.is_small
00494         return self.formatter.small(self.is_small)
00495     _small_on_repl = _small_repl
00496     _small_off_repl = _small_repl
00497 
00498     def _big_repl(self, word, groups):
00499         """Handle big."""
00500         on = groups.get('big_on')
00501         if on and self.is_big:
00502             return self.formatter.text(word)
00503         off = groups.get('big_off')
00504         if off and not self.is_big:
00505             return self.formatter.text(word)
00506         self.is_big = not self.is_big
00507         return self.formatter.big(self.is_big)
00508     _big_on_repl = _big_repl
00509     _big_off_repl = _big_repl
00510 
00511     def _emph_repl(self, word, groups):
00512         """Handle emphasis, i.e. '' and '''."""
00513         if len(word) == 3:
00514             self.is_b = not self.is_b
00515             if self.is_em and self.is_b:
00516                 self.is_b = 2
00517             return self.formatter.strong(self.is_b)
00518         else:
00519             self.is_em = not self.is_em
00520             if self.is_em and self.is_b:
00521                 self.is_em = 2
00522             return self.formatter.emphasis(self.is_em)
00523 
00524     def _emph_ibb_repl(self, word, groups):
00525         """Handle mixed emphasis, i.e. ''''' followed by '''."""
00526         self.is_b = not self.is_b
00527         self.is_em = not self.is_em
00528         if self.is_em and self.is_b:
00529             self.is_b = 2
00530         return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00531 
00532     def _emph_ibi_repl(self, word, groups):
00533         """Handle mixed emphasis, i.e. ''''' followed by ''."""
00534         self.is_b = not self.is_b
00535         self.is_em = not self.is_em
00536         if self.is_em and self.is_b:
00537             self.is_em = 2
00538         return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00539 
00540     def _emph_ib_or_bi_repl(self, word, groups):
00541         """Handle mixed emphasis, exactly five '''''."""
00542         b_before_em = self.is_b > self.is_em > 0
00543         self.is_b = not self.is_b
00544         self.is_em = not self.is_em
00545         if b_before_em:
00546             return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00547         else:
00548             return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00549 
00550     def _sup_repl(self, word, groups):
00551         """Handle superscript."""
00552         text = groups.get('sup_text', '')
00553         return (self.formatter.sup(1) +
00554                 self.formatter.text(text) +
00555                 self.formatter.sup(0))
00556     _sup_text_repl = _sup_repl
00557 
00558     def _sub_repl(self, word, groups):
00559         """Handle subscript."""
00560         text = groups.get('sub_text', '')
00561         return (self.formatter.sub(1) +
00562                self.formatter.text(text) +
00563                self.formatter.sub(0))
00564     _sub_text_repl = _sub_repl
00565 
00566     def _tt_repl(self, word, groups):
00567         """Handle inline code."""
00568         tt_text = groups.get('tt_text', '')
00569         return (self.formatter.code(1) +
00570                 self.formatter.text(tt_text) +
00571                 self.formatter.code(0))
00572     _tt_text_repl = _tt_repl
00573 
00574     def _tt_bt_repl(self, word, groups):
00575         """Handle backticked inline code."""
00576         tt_bt_text = groups.get('tt_bt_text', '')
00577         return (self.formatter.code(1, css="backtick") +
00578                 self.formatter.text(tt_bt_text) +
00579                 self.formatter.code(0))
00580     _tt_bt_text_repl = _tt_bt_repl
00581 
00582     def _rule_repl(self, word, groups):
00583         """Handle sequences of dashes."""
00584         result = self._undent() + self._closeP()
00585         if len(word) <= 4:
00586             result += self.formatter.rule()
00587         else:
00588             # Create variable rule size 1 - 6. Actual size defined in css.
00589             size = min(len(word), 10) - 4
00590             result += self.formatter.rule(size)
00591         return result
00592 
00593     def _interwiki_repl(self, word, groups):
00594         """Handle InterWiki links."""
00595         wiki = groups.get('interwiki_wiki')
00596         page = groups.get('interwiki_page')
00597 
00598         wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3]
00599         if wikitag_bad:
00600             text = groups.get('interwiki')
00601             return self.formatter.text(text)
00602         else:
00603             page, anchor = wikiutil.split_anchor(page)
00604             return (self.formatter.interwikilink(1, wiki, page, anchor=anchor) +
00605                     self.formatter.text(page) +
00606                     self.formatter.interwikilink(0, wiki, page))
00607     _interwiki_wiki_repl = _interwiki_repl
00608     _interwiki_page_repl = _interwiki_repl
00609 
00610     def _word_repl(self, word, groups):
00611         """Handle WikiNames."""
00612         bang = ''
00613         bang_present = groups.get('word_bang')
00614         if bang_present:
00615             if self.cfg.bang_meta:
00616                 # handle !NotWikiNames
00617                 return self.formatter.nowikiword(word)
00618             else:
00619                 bang = self.formatter.text('!')
00620         name = groups.get('word_name')
00621         current_page = self.formatter.page.page_name
00622         abs_name = wikiutil.AbsPageName(current_page, name)
00623         # if a simple, self-referencing link, emit it as plain text
00624         if abs_name == current_page:
00625             return self.formatter.text(word)
00626         else:
00627             abs_name, anchor = wikiutil.split_anchor(abs_name)
00628             return (bang +
00629                     self.formatter.pagelink(1, abs_name, anchor=anchor) +
00630                     self.formatter.text(word) +
00631                     self.formatter.pagelink(0, abs_name))
00632     _word_bang_repl = _word_repl
00633     _word_name_repl = _word_repl
00634     _word_anchor_repl = _word_repl
00635 
00636     def _url_repl(self, word, groups):
00637         """Handle literal URLs."""
00638         scheme = groups.get('url_scheme', 'http')
00639         target = groups.get('url_target', '')
00640         return (self.formatter.url(1, target, css=scheme) +
00641                 self.formatter.text(target) +
00642                 self.formatter.url(0))
00643     _url_target_repl = _url_repl
00644     _url_scheme_repl = _url_repl
00645 
00646     def _transclude_description(self, desc, default_text=''):
00647         """ parse a string <desc> valid as transclude description (text, ...)
00648             and return the description.
00649 
00650             We do NOT use wikiutil.escape here because it is html specific (the
00651             html formatter, if used, does this for all html attributes).
00652 
00653             We do NOT call formatter.text here because it sometimes is just used
00654             for some alt and/or title attribute, but not emitted as text.
00655 
00656             @param desc: the transclude description to parse
00657             @param default_text: use this text if parsing desc returns nothing.
00658         """
00659         m = self.transclude_desc_re.match(desc)
00660         if m:
00661             if m.group('simple_text'):
00662                 desc = m.group('simple_text')
00663         else:
00664             desc = default_text
00665         return desc
00666 
00667     def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None):
00668         """ parse the parameters of link/transclusion markup,
00669             defaults can be a dict with some default key/values
00670             that will be in the result as given, unless overriden
00671             by the params.
00672         """
00673         if tag_attrs is None:
00674             tag_attrs = {}
00675         if query_args is None:
00676             query_args = {}
00677         if params:
00678             fixed, kw, trailing = wikiutil.parse_quoted_separated(params)
00679             # we ignore fixed and trailing args and only use kw args:
00680             if acceptable_attrs is None:
00681                 acceptable_attrs = []
00682             for key, val in kw.items():
00683                 # wikiutil.escape for key/val must be done by (html) formatter!
00684                 if key in acceptable_attrs:
00685                     # tag attributes must be string type
00686                     tag_attrs[str(key)] = val
00687                 elif key.startswith('&'):
00688                     key = key[1:]
00689                     query_args[key] = val
00690         return tag_attrs, query_args
00691 
00692     def _transclude_repl(self, word, groups):
00693         """Handles transcluding content, usually embedding images."""
00694         target = groups.get('transclude_target', '')
00695         target = wikiutil.url_unquote(target)
00696         desc = groups.get('transclude_desc', '') or ''
00697         params = groups.get('transclude_params', u'') or u''
00698         acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS
00699         acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS
00700                                   'type', 'standby', ] # we maybe need a hack for <PARAM> here
00701         m = self.link_target_re.match(target)
00702         if m:
00703             if m.group('extern_addr'):
00704                 # currently only supports ext. image inclusion
00705                 target = m.group('extern_addr')
00706                 desc = self._transclude_description(desc, target)
00707                 tag_attrs, query_args = self._get_params(params,
00708                                                          tag_attrs={'class': 'external_image',
00709                                                                     'alt': desc,
00710                                                                     'title': desc, },
00711                                                          acceptable_attrs=acceptable_attrs_img)
00712                 return self.formatter.image(src=target, **tag_attrs)
00713                 # FF2 has a bug with target mimetype detection, it looks at the url path
00714                 # and expects to find some "filename extension" there (like .png) and this
00715                 # (not the response http headers) will set the default content-type of
00716                 # the object. This will often work for staticly served files, but
00717                 # fails for MoinMoin attachments (they don't have the filename.ext in the
00718                 # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2
00719                 # also works.
00720                 #return (self.formatter.transclusion(1, data=target) +
00721                 #        desc +
00722                 #        self.formatter.transclusion(0))
00723 
00724             elif m.group('attach_scheme'):
00725                 scheme = m.group('attach_scheme')
00726                 url = wikiutil.url_unquote(m.group('attach_addr'))
00727                 if scheme == 'attachment':
00728                     mt = wikiutil.MimeType(filename=url)
00729                     if mt.major == 'text':
00730                         desc = self._transclude_description(desc, url)
00731                         return self.formatter.attachment_inlined(url, desc)
00732                     # destinguishs if browser need a plugin in place
00733                     elif mt.major == 'image' and mt.minor in config.browser_supported_images:
00734                         desc = self._transclude_description(desc, url)
00735                         tag_attrs, query_args = self._get_params(params,
00736                                                                  tag_attrs={'alt': desc,
00737                                                                             'title': desc, },
00738                                                                  acceptable_attrs=acceptable_attrs_img)
00739                         return self.formatter.attachment_image(url, **tag_attrs)
00740                     else:
00741                         from MoinMoin.action import AttachFile
00742                         pagename = self.formatter.page.page_name
00743                         if AttachFile.exists(self.request, pagename, url):
00744                             href = AttachFile.getAttachUrl(pagename, url, self.request)
00745                             tag_attrs, query_args = self._get_params(params,
00746                                                                      tag_attrs={'title': desc, },
00747                                                                      acceptable_attrs=acceptable_attrs_object)
00748                             return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) +
00749                                     self.formatter.text(self._transclude_description(desc, url)) +
00750                                     self.formatter.transclusion(0))
00751                         else:
00752                             return (self.formatter.attachment_link(1, url) +
00753                                     self.formatter.text(self._transclude_description(desc, url)) +
00754                                     self.formatter.attachment_link(0))
00755 
00756                         #NOT USED CURRENTLY:
00757 
00758                         # use EmbedObject for other mimetypes
00759                         if mt is not None:
00760                             from MoinMoin import macro
00761                             macro.request = self.request
00762                             macro.formatter = self.request.html_formatter
00763                             p = Parser("##\n", request)
00764                             m = macro.Macro(p)
00765                             pagename = self.formatter.page.page_name
00766                             return m.execute('EmbedObject', u'target=%s' % url)
00767                 elif scheme == 'drawing':
00768                     url = wikiutil.drawing2fname(url)
00769                     desc = self._transclude_description(desc, url)
00770                     if desc:
00771                         tag_attrs= {'alt': desc, 'title': desc, }
00772                     else:
00773                         tag_attrs = {}
00774                     tag_attrs, query_args = self._get_params(params,
00775                                                              tag_attrs=tag_attrs,
00776                                                              acceptable_attrs=acceptable_attrs_img)
00777                     return self.formatter.attachment_drawing(url, desc, **tag_attrs)
00778 
00779             elif m.group('page_name'):
00780                 # experimental client side transclusion
00781                 page_name_all = m.group('page_name')
00782                 if ':' in page_name_all:
00783                     wiki_name, page_name = page_name_all.split(':', 1)
00784                     wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
00785                 else:
00786                     err = True
00787                 if err: # not a interwiki link / not in interwiki map
00788                     tag_attrs, query_args = self._get_params(params,
00789                                                              tag_attrs={'type': 'text/html',
00790                                                                         'width': '100%', },
00791                                                              acceptable_attrs=acceptable_attrs_object)
00792                     if 'action' not in query_args:
00793                         query_args['action'] = 'content'
00794                     url = Page(self.request, page_name_all).url(self.request, querystr=query_args)
00795                     return (self.formatter.transclusion(1, data=url, **tag_attrs) +
00796                             self.formatter.text(self._transclude_description(desc, page_name_all)) +
00797                             self.formatter.transclusion(0))
00798                     #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args)
00799                 else: # looks like a valid interwiki link
00800                     url = wikiutil.join_wiki(wikiurl, wikitail)
00801                     tag_attrs, query_args = self._get_params(params,
00802                                                              tag_attrs={'type': 'text/html',
00803                                                                         'width': '100%', },
00804                                                              acceptable_attrs=acceptable_attrs_object)
00805                     if 'action' not in query_args:
00806                         query_args['action'] = 'content' # XXX moin specific
00807                     url += '?%s' % wikiutil.makeQueryString(query_args)
00808                     return (self.formatter.transclusion(1, data=url, **tag_attrs) +
00809                             self.formatter.text(self._transclude_description(desc, page_name)) +
00810                             self.formatter.transclusion(0))
00811                     #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args)
00812 
00813             else:
00814                 desc = self._transclude_description(desc, target)
00815                 return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params))
00816         return word +'???'
00817     _transclude_target_repl = _transclude_repl
00818     _transclude_desc_repl = _transclude_repl
00819     _transclude_params_repl = _transclude_repl
00820 
00821     def _link_description(self, desc, target='', default_text=''):
00822         """ parse a string <desc> valid as link description (text, transclusion, ...)
00823             and return formatted content.
00824 
00825             @param desc: the link description to parse
00826             @param default_text: use this text (formatted as text) if parsing
00827                                  desc returns nothing.
00828             @param target: target of the link (as readable markup) - used for
00829                            transcluded image's description
00830         """
00831         m = self.link_desc_re.match(desc)
00832         if m:
00833             if m.group('simple_text'):
00834                 desc = m.group('simple_text')
00835                 desc = self.formatter.text(desc)
00836             elif m.group('transclude'):
00837                 groupdict = m.groupdict()
00838                 if groupdict.get('transclude_desc') is None:
00839                     # if transcluded obj (image) has no description, use target for it
00840                     groupdict['transclude_desc'] = target
00841                 desc = m.group('transclude')
00842                 desc = self._transclude_repl(desc, groupdict)
00843         else:
00844             desc = default_text
00845             if desc:
00846                 desc = self.formatter.text(desc)
00847         return desc
00848 
00849     def _link_repl(self, word, groups):
00850         """Handle [[target|text]] links."""
00851         target = groups.get('link_target', '')
00852         desc = groups.get('link_desc', '') or ''
00853         params = groups.get('link_params', u'') or u''
00854         acceptable_attrs = ['class', 'title', 'target', 'accesskey', ] # no style because of JS
00855         mt = self.link_target_re.match(target)
00856         if mt:
00857             if mt.group('page_name'):
00858                 page_name_and_anchor = mt.group('page_name')
00859                 if ':' in page_name_and_anchor:
00860                     wiki_name, page_name = page_name_and_anchor.split(':', 1)
00861                     wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
00862                 else:
00863                     err = True
00864                 if err: # not a interwiki link / not in interwiki map
00865                     page_name, anchor = wikiutil.split_anchor(page_name_and_anchor)
00866                     current_page = self.formatter.page.page_name
00867                     if not page_name:
00868                         page_name = current_page
00869                     # handle relative links
00870                     abs_page_name = wikiutil.AbsPageName(current_page, page_name)
00871                     tag_attrs, query_args = self._get_params(params,
00872                                                              tag_attrs={},
00873                                                              acceptable_attrs=acceptable_attrs)
00874                     return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
00875                             self._link_description(desc, target, page_name_and_anchor) +
00876                             self.formatter.pagelink(0, abs_page_name))
00877                 else: # interwiki link
00878                     page_name, anchor = wikiutil.split_anchor(page_name)
00879                     tag_attrs, query_args = self._get_params(params,
00880                                                              tag_attrs={},
00881                                                              acceptable_attrs=acceptable_attrs)
00882                     return (self.formatter.interwikilink(1, wiki_name, page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
00883                             self._link_description(desc, target, page_name) +
00884                             self.formatter.interwikilink(0, wiki_name, page_name))
00885 
00886             elif mt.group('extern_addr'):
00887                 scheme = mt.group('extern_scheme')
00888                 target = mt.group('extern_addr')
00889                 tag_attrs, query_args = self._get_params(params,
00890                                                          tag_attrs={'class': scheme, },
00891                                                          acceptable_attrs=acceptable_attrs)
00892                 return (self.formatter.url(1, target, **tag_attrs) +
00893                         self._link_description(desc, target, target) +
00894                         self.formatter.url(0))
00895 
00896             elif mt.group('attach_scheme'):
00897                 scheme = mt.group('attach_scheme')
00898                 url = wikiutil.url_unquote(mt.group('attach_addr'))
00899                 tag_attrs, query_args = self._get_params(params,
00900                                                          tag_attrs={'title': desc, },
00901                                                          acceptable_attrs=acceptable_attrs)
00902                 if scheme == 'attachment':
00903                     return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) +
00904                             self._link_description(desc, target, url) +
00905                             self.formatter.attachment_link(0))
00906                 elif scheme == 'drawing':
00907                     url = wikiutil.drawing2fname(url)
00908                     return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs)
00909             else:
00910                 if desc:
00911                     desc = '|' + desc
00912                 return self.formatter.text('[[%s%s]]' % (target, desc))
00913     _link_target_repl = _link_repl
00914     _link_desc_repl = _link_repl
00915     _link_params_repl = _link_repl
00916 
00917     def _email_repl(self, word, groups):
00918         """Handle email addresses (without a leading mailto:)."""
00919         return (self.formatter.url(1, "mailto:%s" % word, css='mailto') +
00920                 self.formatter.text(word) +
00921                 self.formatter.url(0))
00922 
00923     def _sgml_entity_repl(self, word, groups):
00924         """Handle SGML entities."""
00925         return self.formatter.text(word)
00926 
00927     def _entity_repl(self, word, groups):
00928         """Handle numeric (decimal and hexadecimal) and symbolic SGML entities."""
00929         return self.formatter.rawHTML(word)
00930 
00931     def _indent_repl(self, match, groups):
00932         """Handle pure indentation (no - * 1. markup)."""
00933         result = []
00934         if not (self.in_li or self.in_dd):
00935             self._close_item(result)
00936             self.in_li = 1
00937             css_class = None
00938             if self.line_was_empty and not self.first_list_item:
00939                 css_class = 'gap'
00940             result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00941         return ''.join(result)
00942 
00943     def _li_none_repl(self, match, groups):
00944         """Handle type=none (" .") lists."""
00945         result = []
00946         self._close_item(result)
00947         self.in_li = 1
00948         css_class = None
00949         if self.line_was_empty and not self.first_list_item:
00950             css_class = 'gap'
00951         result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00952         return ''.join(result)
00953 
00954     def _li_repl(self, match, groups):
00955         """Handle bullet (" *") lists."""
00956         result = []
00957         self._close_item(result)
00958         self.in_li = 1
00959         css_class = None
00960         if self.line_was_empty and not self.first_list_item:
00961             css_class = 'gap'
00962         result.append(self.formatter.listitem(1, css_class=css_class))
00963         return ''.join(result)
00964 
00965     def _ol_repl(self, match, groups):
00966         """Handle numbered lists."""
00967         return self._li_repl(match, groups)
00968 
00969     def _dl_repl(self, match, groups):
00970         """Handle definition lists."""
00971         result = []
00972         self._close_item(result)
00973         self.in_dd = 1
00974         result.extend([
00975             self.formatter.definition_term(1),
00976             self.formatter.text(match[1:-3].lstrip(' ')),
00977             self.formatter.definition_term(0),
00978             self.formatter.definition_desc(1),
00979         ])
00980         return ''.join(result)
00981 
00982     def _indent_level(self):
00983         """Return current char-wise indent level."""
00984         return len(self.list_indents) and self.list_indents[-1]
00985 
00986     def _indent_to(self, new_level, list_type, numtype, numstart):
00987         """Close and open lists."""
00988         openlist = []   # don't make one out of these two statements!
00989         closelist = []
00990 
00991         if self._indent_level() != new_level and self.in_table:
00992             closelist.append(self.formatter.table(0))
00993             self.in_table = 0
00994 
00995         while self._indent_level() > new_level:
00996             self._close_item(closelist)
00997             if self.list_types[-1] == 'ol':
00998                 tag = self.formatter.number_list(0)
00999             elif self.list_types[-1] == 'dl':
01000                 tag = self.formatter.definition_list(0)
01001             else:
01002                 tag = self.formatter.bullet_list(0)
01003             closelist.append(tag)
01004 
01005             del self.list_indents[-1]
01006             del self.list_types[-1]
01007 
01008             if self.list_types: # we are still in a list
01009                 if self.list_types[-1] == 'dl':
01010                     self.in_dd = 1
01011                 else:
01012                     self.in_li = 1
01013 
01014         # Open new list, if necessary
01015         if self._indent_level() < new_level:
01016             self.list_indents.append(new_level)
01017             self.list_types.append(list_type)
01018 
01019             if self.formatter.in_p:
01020                 closelist.append(self.formatter.paragraph(0))
01021 
01022             if list_type == 'ol':
01023                 tag = self.formatter.number_list(1, numtype, numstart)
01024             elif list_type == 'dl':
01025                 tag = self.formatter.definition_list(1)
01026             else:
01027                 tag = self.formatter.bullet_list(1)
01028             openlist.append(tag)
01029 
01030             self.first_list_item = 1
01031             self.in_li = 0
01032             self.in_dd = 0
01033 
01034         # If list level changes, close an open table
01035         if self.in_table and (openlist or closelist):
01036             closelist[0:0] = [self.formatter.table(0)]
01037             self.in_table = 0
01038 
01039         self.in_list = self.list_types != []
01040         return ''.join(closelist) + ''.join(openlist)
01041 
01042     def _undent(self):
01043         """Close all open lists."""
01044         result = []
01045         #result.append("<!-- _undent start -->\n")
01046         self._close_item(result)
01047         for type in self.list_types[::-1]:
01048             if type == 'ol':
01049                 result.append(self.formatter.number_list(0))
01050             elif type == 'dl':
01051                 result.append(self.formatter.definition_list(0))
01052             else:
01053                 result.append(self.formatter.bullet_list(0))
01054         #result.append("<!-- _undent end -->\n")
01055         self.list_indents = []
01056         self.list_types = []
01057         return ''.join(result)
01058 
01059     def _getTableAttrs(self, attrdef):
01060         attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>'
01061         m = re.match(attr_rule, attrdef, re.U)
01062         if not m:
01063             return {}, ''
01064         attrdef = m.group('attrs')
01065 
01066         # extension for special table markup
01067         def table_extension(key, parser, attrs, wiki_parser=self):
01068             """ returns: tuple (found_flag, msg)
01069                 found_flag: whether we found something and were able to process it here
01070                   true for special stuff like 100% or - or #AABBCC
01071                   false for style xxx="yyy" attributes
01072                 msg: "" or an error msg
01073             """
01074             _ = wiki_parser._
01075             found = False
01076             msg = ''
01077             if key[0] in "0123456789":
01078                 token = parser.get_token()
01079                 if token != '%':
01080                     wanted = '%'
01081                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
01082                         'wanted': wanted, 'key': key, 'token': token}
01083                 else:
01084                     try:
01085                         dummy = int(key)
01086                     except ValueError:
01087                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
01088                             'key': key, 'token': token}
01089                     else:
01090                         found = True
01091                         attrs['width'] = '"%s%%"' % key
01092             elif key == '-':
01093                 arg = parser.get_token()
01094                 try:
01095                     dummy = int(arg)
01096                 except ValueError:
01097                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
01098                         'arg': arg, 'key': key}
01099                 else:
01100                     found = True
01101                     attrs['colspan'] = '"%s"' % arg
01102             elif key == '|':
01103                 arg = parser.get_token()
01104                 try:
01105                     dummy = int(arg)
01106                 except ValueError:
01107                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
01108                         'arg': arg, 'key': key}
01109                 else:
01110                     found = True
01111                     attrs['rowspan'] = '"%s"' % arg
01112             elif key == '(':
01113                 found = True
01114                 attrs['align'] = '"left"'
01115             elif key == ':':
01116                 found = True
01117                 attrs['align'] = '"center"'
01118             elif key == ')':
01119                 found = True
01120                 attrs['align'] = '"right"'
01121             elif key == '^':
01122                 found = True
01123                 attrs['valign'] = '"top"'
01124             elif key == 'v':
01125                 found = True
01126                 attrs['valign'] = '"bottom"'
01127             elif key == '#':
01128                 arg = parser.get_token()
01129                 try:
01130                     if len(arg) != 6:
01131                         raise ValueError
01132                     dummy = int(arg, 16)
01133                 except ValueError:
01134                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
01135                         'arg': arg, 'key': key}
01136                 else:
01137                     found = True
01138                     attrs['bgcolor'] = '"#%s"' % arg
01139             return found, self.formatter.rawHTML(msg)
01140 
01141         # scan attributes
01142         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
01143         if msg:
01144             msg = '<strong class="highlight">%s</strong>' % msg
01145         #logging.debug("parseAttributes returned %r" % attr)
01146         return attr, msg
01147 
01148     def _tableZ_repl(self, word, groups):
01149         """Handle table row end."""
01150         if self.in_table:
01151             result = ''
01152             # REMOVED: check for self.in_li, p should always close
01153             if self.formatter.in_p:
01154                 result = self.formatter.paragraph(0)
01155             result += self.formatter.table_cell(0) + self.formatter.table_row(0)
01156             return result
01157         else:
01158             return self.formatter.text(word)
01159 
01160     def _table_repl(self, word, groups):
01161         """Handle table cell separator."""
01162         if self.in_table:
01163             result = []
01164             # check for attributes
01165             attrs, attrerr = self._getTableAttrs(word)
01166 
01167             # start the table row?
01168             if self.table_rowstart:
01169                 self.table_rowstart = 0
01170                 result.append(self.formatter.table_row(1, attrs))
01171             else:
01172                 # Close table cell, first closing open p
01173                 # REMOVED check for self.in_li, paragraph should close always!
01174                 if self.formatter.in_p:
01175                     result.append(self.formatter.paragraph(0))
01176                 result.append(self.formatter.table_cell(0))
01177 
01178             # check for adjacent cell markers
01179             if word.count("|") > 2:
01180                 if 'align' not in attrs and \
01181                    not ('style' in attrs and 'text-align' in attrs['style'].lower()):
01182                     # add center alignment if we don't have some alignment already
01183                     attrs['align'] = '"center"'
01184                 if 'colspan' not in attrs:
01185                     attrs['colspan'] = '"%d"' % (word.count("|")/2)
01186 
01187             # return the complete cell markup
01188             result.append(self.formatter.table_cell(1, attrs) + attrerr)
01189             result.append(self._line_anchordef())
01190             return ''.join(result)
01191         else:
01192             return self.formatter.text(word)
01193 
01194     def _heading_repl(self, word, groups):
01195         """Handle section headings."""
01196         heading_text = groups.get('heading_text', '')
01197         depth = min(len(groups.get('hmarker')), 5)
01198         return ''.join([
01199             self._closeP(),
01200             self.formatter.heading(1, depth, id=heading_text),
01201             self.formatter.text(heading_text),
01202             self.formatter.heading(0, depth),
01203         ])
01204     _heading_text_repl = _heading_repl
01205 
01206     def _parser_repl(self, word, groups):
01207         """Handle parsed code displays."""
01208         self.parser = None
01209         self.parser_name = None
01210         self.parser_lines = []
01211         parser_line = word = groups.get('parser_line', u'')
01212         parser_name = groups.get('parser_name', None)
01213         parser_args = groups.get('parser_args', None)
01214         parser_nothing = groups.get('parser_nothing', None)
01215         parser_unique = groups.get('parser_unique', u'') or u''
01216         #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique))
01217         if set(parser_unique) == set('{'): # just some more {{{{{{
01218             parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons
01219         self.parser_unique = parser_unique
01220         if parser_name is not None:
01221             # First try to find a parser for this
01222             if parser_name == u'':
01223                 # empty bang paths lead to a normal code display
01224                 # can be used to escape real, non-empty bang paths
01225                 #logging.debug("_parser_repl: empty bangpath")
01226                 parser_name = 'text'
01227                 word = ''
01228         elif parser_nothing is None:
01229             # there was something non-whitespace following the {{{
01230             parser_name = 'text'
01231 
01232         self.setParser(parser_name)
01233         if not self.parser and parser_name:
01234             # loading the desired parser didn't work, retry a safe option:
01235             wanted_parser = parser_name
01236             parser_name = 'text'
01237             self.setParser(parser_name)
01238             word = '%s %s (-)' % (wanted_parser, parser_args)  # indication that it did not work
01239 
01240         if self.parser:
01241             self.parser_name = parser_name
01242             self.in_pre = 'found_parser'
01243             if word:
01244                 self.parser_lines.append(word)
01245         else:
01246             self.in_pre = 'search_parser'
01247 
01248         #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
01249         return ''
01250     _parser_unique_repl = _parser_repl
01251     _parser_line_repl = _parser_repl
01252     _parser_name_repl = _parser_repl
01253     _parser_args_repl = _parser_repl
01254     _parser_nothing_repl = _parser_repl
01255 
01256     def _parser_content(self, line):
01257         """ handle state and collecting lines for parser in pre/parser sections """
01258         #logging.debug("parser_content: %r" % line)
01259         if self.in_pre == 'search_parser' and line.strip():
01260             # try to find a parser specification
01261             parser_name = ''
01262             if line.strip().startswith("#!"):
01263                 parser_name = line.strip()[2:]
01264             if parser_name:
01265                 parser_name = parser_name.split()[0]
01266             else:
01267                 parser_name = 'text'
01268             self.setParser(parser_name)
01269 
01270             if not self.parser:
01271                 parser_name = 'text'
01272                 self.setParser(parser_name)
01273 
01274             if self.parser:
01275                 self.in_pre = 'found_parser'
01276                 self.parser_lines.append(line)
01277                 self.parser_name = parser_name
01278 
01279         elif self.in_pre == 'found_parser':
01280             # collect the content lines
01281             self.parser_lines.append(line)
01282 
01283         return ''  # we emit the content after reaching the end of the parser/pre section
01284 
01285     def _parser_end_repl(self, word, groups):
01286         """ when we reach the end of a parser/pre section,
01287             we call the parser with the lines we collected
01288         """
01289         #if self.in_pre:
01290         self.in_pre = None
01291         self.inhibit_p = 0
01292         #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
01293         self.request.write(self._closeP())
01294         if self.parser_name is None:
01295             # we obviously did not find a parser specification
01296             self.parser_name = 'text'
01297         result = self.formatter.parser(self.parser_name, self.parser_lines)
01298         del self.parser_lines
01299         self.in_pre = None
01300         self.parser = None
01301         return result
01302 
01303     def _smiley_repl(self, word, groups):
01304         """Handle smileys."""
01305         return self.formatter.smiley(word)
01306 
01307     def _comment_repl(self, word, groups):
01308         # if we are in a paragraph, we must close it so that normal text following
01309         # in the line below the comment will reopen a new paragraph.
01310         if self.formatter.in_p:
01311             self.formatter.paragraph(0)
01312         self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
01313         return self.formatter.comment(word)
01314 
01315     def _closeP(self):
01316         if self.formatter.in_p:
01317             return self.formatter.paragraph(0)
01318         return ''
01319 
01320     def _macro_repl(self, word, groups):
01321         """Handle macros."""
01322         macro_name = groups.get('macro_name')
01323         macro_args = groups.get('macro_args')
01324         self.inhibit_p = 0 # 1 fixed macros like UserPreferences (in the past, gone now), 0 fixes paragraph formatting for macros
01325 
01326         # create macro instance
01327         if self.macro is None:
01328             self.macro = macro.Macro(self)
01329         return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro'))
01330     _macro_name_repl = _macro_repl
01331     _macro_args_repl = _macro_repl
01332 
01333     def scan(self, line, inhibit_p=False):
01334         """ Scans one line
01335         Append text before match, invoke replace() with match, and add text after match.
01336         """
01337         result = []
01338         lastpos = 0 # absolute position within line
01339         line_length = len(line)
01340 
01341         ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
01342         while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also
01343             parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE)
01344             scan_re = self.in_pre and parser_scan_re or self.scan_re
01345             match = scan_re.search(line, lastpos)
01346             if match:
01347                 start = match.start()
01348                 if lastpos < start:
01349                     if self.in_pre:
01350                         self._parser_content(line[lastpos:start])
01351                     else:
01352                         ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
01353                         if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p):
01354                             result.append(self.formatter.paragraph(1, css_class="line862"))
01355                         # add the simple text in between lastpos and beginning of current match
01356                         result.append(self.formatter.text(line[lastpos:start]))
01357 
01358                 # Replace match with markup
01359                 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
01360                         self.in_table or self.in_list):
01361                     result.append(self.formatter.paragraph(1, css_class="line867"))
01362                 result.append(self.replace(match, inhibit_p))
01363                 end = match.end()
01364                 lastpos = end
01365                 if start == end:
01366                     # we matched an empty string
01367                     lastpos += 1 # proceed, we don't want to match this again
01368             else:
01369                 if self.in_pre:
01370                     # ilastpos is more then 0 and result of line slice is empty make useless line
01371                     if not (lastpos > 0 and line[lastpos:] == ''):
01372                         self._parser_content(line[lastpos:])
01373                 elif line[lastpos:]:
01374                     ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
01375                     if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
01376                             self.in_li or self.in_dd):
01377                         result.append(self.formatter.paragraph(1, css_class="line874"))
01378                     # add the simple text (no markup) after last match
01379                     result.append(self.formatter.text(line[lastpos:]))
01380                 break # nothing left to do!
01381         return u''.join(result)
01382 
01383     def _replace(self, match):
01384         """ Same as replace() but with no magic """
01385         for name, text in match.groupdict().iteritems():
01386             if text is not None:
01387                 # Get replace method and replace text
01388                 replace_func = getattr(self, '_%s_repl' % name)
01389                 result = replace_func(text, match.groupdict())
01390                 return result
01391 
01392     def replace(self, match, inhibit_p=False):
01393         """ Replace match using type name """
01394         result = []
01395         for type, hit in match.groupdict().items():
01396             if hit is not None and not type in ["hmarker", ]:
01397 
01398                 ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
01399                 # Open p for certain types
01400                 if not (inhibit_p or self.inhibit_p or self.formatter.in_p
01401                         or self.in_pre or (type in self.no_new_p_before)):
01402                     result.append(self.formatter.paragraph(1, css_class="line891"))
01403 
01404                 # Get replace method and replace hit
01405                 replace_func = getattr(self, '_%s_repl' % type)
01406                 result.append(replace_func(hit, match.groupdict()))
01407                 return ''.join(result)
01408         else:
01409             # We should never get here
01410             import pprint
01411             raise Exception("Can't handle match %r\n%s\n%s" % (
01412                 match,
01413                 pprint.pformat(match.groupdict()),
01414                 pprint.pformat(match.groups()),
01415             ))
01416 
01417         return ""
01418 
01419     def _line_anchordef(self):
01420         if self.line_anchors and not self.line_anchor_printed:
01421             self.line_anchor_printed = 1
01422             return self.formatter.line_anchordef(self.lineno)
01423         else:
01424             return ''
01425 
01426     def format(self, formatter, inhibit_p=False):
01427         """ For each line, scan through looking for magic
01428             strings, outputting verbatim any intervening text.
01429         """
01430         self.formatter = formatter
01431         self.hilite_re = self.formatter.page.hilite_re
01432 
01433         # get text and replace TABs
01434         rawtext = self.raw.expandtabs()
01435 
01436         # go through the lines
01437         self.lineno = self.start_line
01438         self.lines = self.eol_re.split(rawtext)
01439         self.line_is_empty = 0
01440 
01441         self.in_processing_instructions = 1
01442 
01443         if self.wrapping_div_class:
01444             self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class))
01445 
01446         # Main loop
01447         for line in self.lines:
01448             self.lineno += 1
01449 
01450             self.line_anchor_printed = 0
01451             if not self.in_table:
01452                 self.request.write(self._line_anchordef())
01453             self.table_rowstart = 1
01454             self.line_was_empty = self.line_is_empty
01455             self.line_is_empty = 0
01456             self.first_list_item = 0
01457             self.inhibit_p = 0
01458 
01459             # ignore processing instructions
01460             if self.in_processing_instructions:
01461                 found = False
01462                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
01463                            "#pragma", "#form", "#acl", "#language"):
01464                     if line.lower().startswith(pi):
01465                         self.request.write(self.formatter.comment(line))
01466                         found = True
01467                         break
01468                 if not found:
01469                     self.in_processing_instructions = 0
01470                 else:
01471                     continue # do not parse this line
01472 
01473             if not self.in_pre:
01474                 # we don't have \n as whitespace any more
01475                 # This is the space between lines we join to one paragraph
01476                 line += ' '
01477 
01478                 # Paragraph break on empty lines
01479                 if not line.strip():
01480                     if self.in_table:
01481                         self.request.write(self.formatter.table(0))
01482                         self.request.write(self._line_anchordef())
01483                         self.in_table = 0
01484                     # CHANGE: removed check for not self.list_types
01485                     # p should close on every empty line
01486                     if self.formatter.in_p:
01487                         self.request.write(self.formatter.paragraph(0))
01488                     self.line_is_empty = 1
01489                     continue
01490 
01491                 # Check indent level
01492                 indent = self.indent_re.match(line)
01493                 indlen = len(indent.group(0))
01494                 indtype = "ul"
01495                 numtype = None
01496                 numstart = None
01497                 if indlen:
01498                     match = self.ol_re.match(line)
01499                     if match:
01500                         numtype, numstart = match.group(0).strip().split('.')
01501                         numtype = numtype[0]
01502 
01503                         if numstart and numstart[0] == "#":
01504                             numstart = int(numstart[1:])
01505                         else:
01506                             numstart = None
01507 
01508                         indtype = "ol"
01509                     else:
01510                         match = self.dl_re.match(line)
01511                         if match:
01512                             indtype = "dl"
01513 
01514                 # output proper indentation tags
01515                 self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
01516 
01517                 # Table mode
01518                 # TODO: move into function?
01519                 if (not self.in_table and line[indlen:indlen + 2] == "||"
01520                     and line.endswith("|| ") and len(line) >= 5 + indlen):
01521                     # Start table
01522                     if self.list_types and not self.in_li:
01523                         self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
01524                         ## CHANGE: no automatic p on li
01525                         ##self.request.write(self.formatter.paragraph(1))
01526                         self.in_li = 1
01527 
01528                     # CHANGE: removed check for self.in_li
01529                     # paragraph should end before table, always!
01530                     if self.formatter.in_p:
01531                         self.request.write(self.formatter.paragraph(0))
01532                     attrs, attrerr = self._getTableAttrs(line[indlen+2:])
01533                     self.request.write(self.formatter.table(1, attrs) + attrerr)
01534                     self.in_table = True # self.lineno
01535                 elif (self.in_table and not
01536                       # intra-table comments should not break a table
01537                       (line.startswith("##") or
01538                        line[indlen:indlen + 2] == "||" and
01539                        line.endswith("|| ") and
01540                        len(line) >= 5 + indlen)):
01541 
01542                     # Close table
01543                     self.request.write(self.formatter.table(0))
01544                     self.request.write(self._line_anchordef())
01545                     self.in_table = 0
01546 
01547             # Scan line, format and write
01548             formatted_line = self.scan(line, inhibit_p=inhibit_p)
01549             self.request.write(formatted_line)
01550 
01551 
01552         # Close code displays, paragraphs, tables and open lists
01553         self.request.write(self._undent())
01554         if self.in_pre: self.request.write(self.formatter.preformatted(0))
01555         if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
01556         if self.in_table: self.request.write(self.formatter.table(0))
01557 
01558         if self.wrapping_div_class:
01559             self.request.write(self.formatter.div(0))
01560 
01561 
01562     # Private helpers ------------------------------------------------------------
01563 
01564     def setParser(self, name):
01565         """ Set parser to parser named 'name' """
01566         # XXX this is done by the formatter as well
01567         try:
01568             self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name)
01569         except wikiutil.PluginMissingError:
01570             self.parser = None
01571 
01572 del _