Back to index

moin  1.9.0~rc2
text_moin160a_wiki.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - MoinMoin Wiki Markup Parser
00004 
00005     @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de>,
00006                 2006 by MoinMoin:ThomasWaldmann
00007     @license: GNU GPL, see COPYING for details.
00008 """
00009 
00010 import re
00011 
00012 import wikiutil160a as wikiutil
00013 from MoinMoin import config, macro
00014 
00015 Dependencies = []
00016 
00017 class Parser:
00018     """
00019         Object that turns Wiki markup into HTML.
00020 
00021         All formatting commands can be parsed one line at a time, though
00022         some state is carried over between lines.
00023 
00024         Methods named like _*_repl() are responsible to handle the named regex
00025         patterns defined in print_html().
00026     """
00027 
00028     # allow caching
00029     caching = 1
00030     Dependencies = []
00031 
00032     # some common strings
00033     PARENT_PREFIX = wikiutil.PARENT_PREFIX
00034     # quoted strings (we require that there is at least one char (that is not the quoting char)
00035     # inside to not confuse stuff like '''Contact:''' (just a bold Contact:) with interwiki markup
00036     # OtherWiki:'Page with blanks'
00037     sq_string = ur"('[^']+?')" # single quoted string
00038     dq_string = ur"(\"[^\"]+?\")" # double quoted string
00039     q_string = ur"(%s|%s)" % (sq_string, dq_string) # quoted string
00040     attachment_schemas = ["attachment", "inline", "drawing"]
00041     punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
00042     punct_no_quote_pattern = re.escape(u'''}]|:,.)?!''')
00043     url_pattern = (u'http|https|ftp|nntp|news|mailto|telnet|wiki|file|irc|' +
00044             u'|'.join(attachment_schemas) +
00045             (config.url_schemas and u'|' + u'|'.join(config.url_schemas) or ''))
00046 
00047     # some common rules
00048     word_rule = ur'(?:(?<![%(u)s%(l)s])|^)%(parent)s(?:%(subpages)s(?:[%(u)s][%(l)s]+){2,})+(?![%(u)s%(l)s]+)' % {
00049         'u': config.chars_upper,
00050         'l': config.chars_lower,
00051         'subpages': wikiutil.CHILD_PREFIX + '?',
00052         'parent': ur'(?:%s)?' % re.escape(PARENT_PREFIX),
00053     }
00054     url_rule = ur'%(url_guard)s(%(url)s)\:(([^\s<%(punct)s]|([%(punctnq)s][^\s<%(punct)s]))+|%(q_string)s)' % {
00055         'url_guard': ur'(^|(?<!\w))',
00056         'url': url_pattern,
00057         'punct': punct_pattern,
00058         'punctnq': punct_no_quote_pattern,
00059         'q_string': q_string,
00060     }
00061 
00062     ol_rule = ur"^\s+(?:[0-9]+|[aAiI])\.(?:#\d+)?\s"
00063     dl_rule = ur"^\s+.*?::\s"
00064 
00065     # this is used inside <pre> / parser sections (we just want to know when it's over):
00066     pre_formatting_rules = ur"""(?P<pre>(\}\}\}))"""
00067 
00068     # the big, fat, ugly one ;)
00069     formatting_rules = ur"""(?P<ent_numeric>&#(\d{1,5}|x[0-9a-fA-F]+);)
00070 (?:(?P<emph_ibb>'''''(?=[^']+'''))
00071 (?P<emph_ibi>'''''(?=[^']+''))
00072 (?P<emph_ib_or_bi>'{5}(?=[^']))
00073 (?P<emph>'{2,3})
00074 (?P<u>__)
00075 (?P<sup>\^.*?\^)
00076 (?P<sub>,,[^,]{1,40},,)
00077 (?P<tt>\{\{\{.*?\}\}\})
00078 (?P<parser>(\{\{\{(#!.*|\s*$)))
00079 (?P<pre>(\{\{\{ ?|\}\}\}))
00080 (?P<small>(\~- ?|-\~))
00081 (?P<big>(\~\+ ?|\+\~))
00082 (?P<strike>(--\(|\)--))
00083 (?P<remark>(/\* ?| ?\*/))
00084 (?P<rule>-{4,})
00085 (?P<comment>^\#\#.*$)
00086 (?P<macro>\[\[(%%(macronames)s)(?:\(.*?\))?\]\]))
00087 (?P<ol>%(ol_rule)s)
00088 (?P<dl>%(dl_rule)s)
00089 (?P<li>^\s+\*\s*)
00090 (?P<li_none>^\s+\.\s*)
00091 (?P<indent>^\s+)
00092 (?P<tableZ>\|\| $)
00093 (?P<table>(?:\|\|)+(?:<[^>]*?>)?(?!\|? $))
00094 (?P<heading>^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)
00095 (?P<interwiki>[A-Z][a-zA-Z]+\:(%(q_string)s|([^\s'\"\:<\|]([^\s%(punct)s]|([%(punct)s][^\s%(punct)s]))+)))
00096 (?P<word>%(word_rule)s)
00097 (?P<url_bracket>\[((%(url)s)\:|#|\:)[^\s\]]+(\s[^\]]+)?\])
00098 (?P<url>%(url_rule)s)
00099 (?P<email>[-\w._+]+\@[\w-]+(\.[\w-]+)+)
00100 (?P<smiley>(?<=\s)(%(smiley)s)(?=\s))
00101 (?P<smileyA>^(%(smiley)s)(?=\s))
00102 (?P<ent_symbolic>&[a-zA-Z]+;)
00103 (?P<ent>[<>&])
00104 (?P<wikiname_bracket>\[%(q_string)s.*?\])
00105 (?P<tt_bt>`.*?`)"""  % {
00106 
00107         'url': url_pattern,
00108         'punct': punct_pattern,
00109         'q_string': q_string,
00110         'ol_rule': ol_rule,
00111         'dl_rule': dl_rule,
00112         'url_rule': url_rule,
00113         'word_rule': word_rule,
00114         'smiley': u'|'.join(map(re.escape, config.smileys))}
00115 
00116     # Don't start p before these
00117     no_new_p_before = ("heading rule table tableZ tr td "
00118                        "ul ol dl dt dd li li_none indent "
00119                        "macro parser pre")
00120     no_new_p_before = no_new_p_before.split()
00121     no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
00122 
00123     def __init__(self, raw, request, **kw):
00124         self.raw = raw
00125         self.request = request
00126         self.form = request.form # Macro object uses this
00127         self._ = request.getText
00128         self.cfg = request.cfg
00129         self.line_anchors = kw.get('line_anchors', True)
00130         self.macro = None
00131         self.start_line = kw.get('start_line', 0)
00132 
00133         # currently, there is only a single, optional argument to this parser and
00134         # (when given), it is used as class(es) for a div wrapping the formatter output
00135         # either use a single class like "comment" or multiple like "comment/red/dotted"
00136         self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ')
00137 
00138         self.is_em = 0 # must be int
00139         self.is_b = 0 # must be int
00140         self.is_u = False
00141         self.is_strike = False
00142         self.is_big = False
00143         self.is_small = False
00144         self.is_remark = False
00145 
00146         self.lineno = 0
00147         self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
00148         self.in_li = 0 # between <li> and </li>
00149         self.in_dd = 0 # between <dd> and </dd>
00150 
00151         # states of the parser concerning being inside/outside of some "pre" section:
00152         # None == we are not in any kind of pre section (was: 0)
00153         # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1)
00154         # 'found_parser' == we found a valid parser (was: 2)
00155         # 'no_parser' == we have no (valid) parser, use a normal <pre>...</pre> (was: 3)
00156         self.in_pre = None
00157 
00158         self.in_table = 0
00159         self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
00160         self.titles = request._page_headings
00161 
00162         # holds the nesting level (in chars) of open lists
00163         self.list_indents = []
00164         self.list_types = []
00165 
00166         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(macro.getNames(self.cfg))}
00167 
00168     def _close_item(self, result):
00169         #result.append("<!-- close item begin -->\n")
00170         if self.in_table:
00171             result.append(self.formatter.table(0))
00172             self.in_table = 0
00173         if self.in_li:
00174             self.in_li = 0
00175             if self.formatter.in_p:
00176                 result.append(self.formatter.paragraph(0))
00177             result.append(self.formatter.listitem(0))
00178         if self.in_dd:
00179             self.in_dd = 0
00180             if self.formatter.in_p:
00181                 result.append(self.formatter.paragraph(0))
00182             result.append(self.formatter.definition_desc(0))
00183         #result.append("<!-- close item end -->\n")
00184 
00185 
00186     def interwiki(self, target_and_text, **kw):
00187         # TODO: maybe support [wiki:Page http://wherever/image.png] ?
00188         scheme, rest = target_and_text.split(':', 1)
00189         wikiname, pagename, text = wikiutil.split_wiki(rest)
00190         if not pagename:
00191             pagename = self.formatter.page.page_name
00192         if not text:
00193             text = pagename
00194         #self.request.log("interwiki: split_wiki -> %s.%s.%s" % (wikiname,pagename,text))
00195 
00196         if wikiname.lower() == 'self': # [wiki:Self:LocalPage text] or [:LocalPage:text]
00197             return self._word_repl(pagename, text)
00198 
00199         # check for image URL, and possibly return IMG tag
00200         if not kw.get('pretty_url', 0) and wikiutil.isPicture(pagename):
00201             dummy, wikiurl, dummy, wikitag_bad = wikiutil.resolve_wiki(self.request, rest)
00202             href = wikiutil.join_wiki(wikiurl, pagename)
00203             #self.request.log("interwiki: join_wiki -> %s.%s.%s" % (wikiurl,pagename,href))
00204             return self.formatter.image(src=href)
00205 
00206         return (self.formatter.interwikilink(1, wikiname, pagename) +
00207                 self.formatter.text(text) +
00208                 self.formatter.interwikilink(0, wikiname, pagename))
00209 
00210     def attachment(self, target_and_text, **kw):
00211         """ This gets called on attachment URLs """
00212         _ = self._
00213         #self.request.log("attachment: target_and_text %s" % target_and_text)
00214         scheme, fname, text = wikiutil.split_wiki(target_and_text)
00215         if not text:
00216             text = fname
00217 
00218         if scheme == 'drawing':
00219             return self.formatter.attachment_drawing(fname, text)
00220 
00221         # check for image, and possibly return IMG tag (images are always inlined)
00222         if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
00223             return self.formatter.attachment_image(fname)
00224 
00225         # inline the attachment
00226         if scheme == 'inline':
00227             return self.formatter.attachment_inlined(fname, text)
00228 
00229         return self.formatter.attachment_link(fname, text)
00230 
00231     def _u_repl(self, word):
00232         """Handle underline."""
00233         self.is_u = not self.is_u
00234         return self.formatter.underline(self.is_u)
00235 
00236     def _strike_repl(self, word):
00237         """Handle strikethrough."""
00238         # XXX we don't really enforce the correct sequence --( ... )-- here
00239         self.is_strike = not self.is_strike
00240         return self.formatter.strike(self.is_strike)
00241 
00242     def _remark_repl(self, word):
00243         """Handle remarks."""
00244         # XXX we don't really enforce the correct sequence /* ... */ here
00245         self.is_remark = not self.is_remark
00246         span_kw = {
00247             'style': self.request.user.show_comments and "display:''" or "display:none",
00248             'class': "comment",
00249         }
00250         return self.formatter.span(self.is_remark, **span_kw)
00251 
00252     def _small_repl(self, word):
00253         """Handle small."""
00254         if word.strip() == '~-' and self.is_small:
00255             return self.formatter.text(word)
00256         if word.strip() == '-~' and not self.is_small:
00257             return self.formatter.text(word)
00258         self.is_small = not self.is_small
00259         return self.formatter.small(self.is_small)
00260 
00261     def _big_repl(self, word):
00262         """Handle big."""
00263         if word.strip() == '~+' and self.is_big:
00264             return self.formatter.text(word)
00265         if word.strip() == '+~' and not self.is_big:
00266             return self.formatter.text(word)
00267         self.is_big = not self.is_big
00268         return self.formatter.big(self.is_big)
00269 
00270     def _emph_repl(self, word):
00271         """Handle emphasis, i.e. '' and '''."""
00272         ##print "#", self.is_b, self.is_em, "#"
00273         if len(word) == 3:
00274             self.is_b = not self.is_b
00275             if self.is_em and self.is_b:
00276                 self.is_b = 2
00277             return self.formatter.strong(self.is_b)
00278         else:
00279             self.is_em = not self.is_em
00280             if self.is_em and self.is_b:
00281                 self.is_em = 2
00282             return self.formatter.emphasis(self.is_em)
00283 
00284     def _emph_ibb_repl(self, word):
00285         """Handle mixed emphasis, i.e. ''''' followed by '''."""
00286         self.is_b = not self.is_b
00287         self.is_em = not self.is_em
00288         if self.is_em and self.is_b:
00289             self.is_b = 2
00290         return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00291 
00292     def _emph_ibi_repl(self, word):
00293         """Handle mixed emphasis, i.e. ''''' followed by ''."""
00294         self.is_b = not self.is_b
00295         self.is_em = not self.is_em
00296         if self.is_em and self.is_b:
00297             self.is_em = 2
00298         return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00299 
00300     def _emph_ib_or_bi_repl(self, word):
00301         """Handle mixed emphasis, exactly five '''''."""
00302         ##print "*", self.is_b, self.is_em, "*"
00303         b_before_em = self.is_b > self.is_em > 0
00304         self.is_b = not self.is_b
00305         self.is_em = not self.is_em
00306         if b_before_em:
00307             return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00308         else:
00309             return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00310 
00311 
00312     def _sup_repl(self, word):
00313         """Handle superscript."""
00314         return self.formatter.sup(1) + \
00315             self.formatter.text(word[1:-1]) + \
00316             self.formatter.sup(0)
00317 
00318     def _sub_repl(self, word):
00319         """Handle subscript."""
00320         return self.formatter.sub(1) + \
00321             self.formatter.text(word[2:-2]) + \
00322             self.formatter.sub(0)
00323 
00324 
00325     def _rule_repl(self, word):
00326         """Handle sequences of dashes."""
00327         result = self._undent() + self._closeP()
00328         if len(word) <= 4:
00329             result = result + self.formatter.rule()
00330         else:
00331             # Create variable rule size 1 - 6. Actual size defined in css.
00332             size = min(len(word), 10) - 4
00333             result = result + self.formatter.rule(size)
00334         return result
00335 
00336 
00337     def _word_repl(self, word, text=None):
00338         """Handle WikiNames."""
00339 
00340         # check for parent links
00341         # !!! should use wikiutil.AbsPageName here, but setting `text`
00342         # correctly prevents us from doing this for now
00343         if word.startswith(wikiutil.PARENT_PREFIX):
00344             if not text:
00345                 text = word
00346             word = '/'.join(filter(None, self.formatter.page.page_name.split('/')[:-1] + [word[wikiutil.PARENT_PREFIX_LEN:]]))
00347 
00348         if not text:
00349             # if a simple, self-referencing link, emit it as plain text
00350             if word == self.formatter.page.page_name:
00351                 return self.formatter.text(word)
00352             text = word
00353         if word.startswith(wikiutil.CHILD_PREFIX):
00354             word = self.formatter.page.page_name + '/' + word[wikiutil.CHILD_PREFIX_LEN:]
00355 
00356         # handle anchors
00357         parts = word.split("#", 1)
00358         anchor = ""
00359         if len(parts) == 2:
00360             word, anchor = parts
00361 
00362         return (self.formatter.pagelink(1, word, anchor=anchor) +
00363                 self.formatter.text(text) +
00364                 self.formatter.pagelink(0, word))
00365 
00366     def _notword_repl(self, word):
00367         """Handle !NotWikiNames."""
00368         return self.formatter.nowikiword(word[1:])
00369 
00370     def _interwiki_repl(self, word):
00371         """Handle InterWiki links."""
00372         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
00373         if wikitag_bad:
00374             return self.formatter.text(word)
00375         else:
00376             return self.interwiki("wiki:" + word)
00377 
00378     def _url_repl(self, word):
00379         """Handle literal URLs including inline images."""
00380         scheme = word.split(":", 1)[0]
00381 
00382         if scheme == "wiki":
00383             return self.interwiki(word)
00384 
00385         if scheme in self.attachment_schemas:
00386             return self.attachment(word)
00387 
00388         if wikiutil.isPicture(word):
00389             word = wikiutil.mapURL(self.request, word)
00390             # Get image name http://here.com/dir/image.gif -> image
00391             name = word.split('/')[-1]
00392             name = ''.join(name.split('.')[:-1])
00393             return self.formatter.image(src=word, alt=name)
00394         else:
00395             return (self.formatter.url(1, word, css=scheme) +
00396                     self.formatter.text(word) +
00397                     self.formatter.url(0))
00398 
00399 
00400     def _wikiname_bracket_repl(self, text):
00401         """Handle special-char wikinames with link text, like:
00402            ["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]i
00403         """
00404         word = text[1:-1] # strip brackets
00405         first_char = word[0]
00406         if first_char in wikiutil.QUOTE_CHARS:
00407             # split on closing quote
00408             target, linktext = word[1:].split(first_char, 1)
00409         else: # not quoted
00410             # split on whitespace
00411             target, linktext = word.split(None, 1)
00412         if target:
00413             linktext = linktext.strip()
00414             return self._word_repl(target, linktext)
00415         else:
00416             return self.formatter.text(text)
00417 
00418 
00419     def _url_bracket_repl(self, word):
00420         """Handle bracketed URLs."""
00421         word = word[1:-1] # strip brackets
00422 
00423         # Local extended link? [:page name:link text] XXX DEPRECATED
00424         if word[0] == ':':
00425             words = word[1:].split(':', 1)
00426             if len(words) == 1:
00427                 words = words * 2
00428             target_and_text = 'wiki:Self:%s %s' % (wikiutil.quoteName(words[0]), words[1])
00429             return self.interwiki(target_and_text, pretty_url=1)
00430 
00431         scheme_and_rest = word.split(":", 1)
00432         if len(scheme_and_rest) == 1: # no scheme
00433             # Traditional split on space
00434             words = word.split(None, 1)
00435             if len(words) == 1:
00436                 words = words * 2
00437 
00438             if words[0].startswith('#'): # anchor link
00439                 return (self.formatter.url(1, words[0]) +
00440                         self.formatter.text(words[1]) +
00441                         self.formatter.url(0))
00442         else:
00443             scheme, rest = scheme_and_rest
00444             if scheme == "wiki":
00445                 return self.interwiki(word, pretty_url=1)
00446             if scheme in self.attachment_schemas:
00447                 return self.attachment(word, pretty_url=1)
00448 
00449             words = word.split(None, 1)
00450             if len(words) == 1:
00451                 words = words * 2
00452 
00453         if wikiutil.isPicture(words[1]) and re.match(self.url_rule, words[1]):
00454             return (self.formatter.url(1, words[0], css='external', do_escape=0) +
00455                     self.formatter.image(title=words[0], alt=words[0], src=words[1]) +
00456                     self.formatter.url(0))
00457         else:
00458             return (self.formatter.url(1, words[0], css=scheme, do_escape=0) +
00459                     self.formatter.text(words[1]) +
00460                     self.formatter.url(0))
00461 
00462 
00463     def _email_repl(self, word):
00464         """Handle email addresses (without a leading mailto:)."""
00465         return (self.formatter.url(1, "mailto:" + word, css='mailto') +
00466                 self.formatter.text(word) +
00467                 self.formatter.url(0))
00468 
00469 
00470     def _ent_repl(self, word):
00471         """Handle SGML entities."""
00472         return self.formatter.text(word)
00473         #return {'&': '&amp;',
00474         #        '<': '&lt;',
00475         #        '>': '&gt;'}[word]
00476 
00477     def _ent_numeric_repl(self, word):
00478         """Handle numeric (decimal and hexadecimal) SGML entities."""
00479         return self.formatter.rawHTML(word)
00480 
00481     def _ent_symbolic_repl(self, word):
00482         """Handle symbolic SGML entities."""
00483         return self.formatter.rawHTML(word)
00484 
00485     def _indent_repl(self, match):
00486         """Handle pure indentation (no - * 1. markup)."""
00487         result = []
00488         if not (self.in_li or self.in_dd):
00489             self._close_item(result)
00490             self.in_li = 1
00491             css_class = None
00492             if self.line_was_empty and not self.first_list_item:
00493                 css_class = 'gap'
00494             result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00495         return ''.join(result)
00496 
00497     def _li_none_repl(self, match):
00498         """Handle type=none (" .") lists."""
00499         result = []
00500         self._close_item(result)
00501         self.in_li = 1
00502         css_class = None
00503         if self.line_was_empty and not self.first_list_item:
00504             css_class = 'gap'
00505         result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00506         return ''.join(result)
00507 
00508     def _li_repl(self, match):
00509         """Handle bullet (" *") lists."""
00510         result = []
00511         self._close_item(result)
00512         self.in_li = 1
00513         css_class = None
00514         if self.line_was_empty and not self.first_list_item:
00515             css_class = 'gap'
00516         result.append(self.formatter.listitem(1, css_class=css_class))
00517         return ''.join(result)
00518 
00519     def _ol_repl(self, match):
00520         """Handle numbered lists."""
00521         return self._li_repl(match)
00522 
00523     def _dl_repl(self, match):
00524         """Handle definition lists."""
00525         result = []
00526         self._close_item(result)
00527         self.in_dd = 1
00528         result.extend([
00529             self.formatter.definition_term(1),
00530             self.formatter.text(match[1:-3].lstrip(' ')),
00531             self.formatter.definition_term(0),
00532             self.formatter.definition_desc(1),
00533         ])
00534         return ''.join(result)
00535 
00536 
00537     def _indent_level(self):
00538         """Return current char-wise indent level."""
00539         return len(self.list_indents) and self.list_indents[-1]
00540 
00541 
00542     def _indent_to(self, new_level, list_type, numtype, numstart):
00543         """Close and open lists."""
00544         openlist = []   # don't make one out of these two statements!
00545         closelist = []
00546 
00547         if self._indent_level() != new_level and self.in_table:
00548             closelist.append(self.formatter.table(0))
00549             self.in_table = 0
00550 
00551         while self._indent_level() > new_level:
00552             self._close_item(closelist)
00553             if self.list_types[-1] == 'ol':
00554                 tag = self.formatter.number_list(0)
00555             elif self.list_types[-1] == 'dl':
00556                 tag = self.formatter.definition_list(0)
00557             else:
00558                 tag = self.formatter.bullet_list(0)
00559             closelist.append(tag)
00560 
00561             del self.list_indents[-1]
00562             del self.list_types[-1]
00563 
00564             if self.list_types: # we are still in a list
00565                 if self.list_types[-1] == 'dl':
00566                     self.in_dd = 1
00567                 else:
00568                     self.in_li = 1
00569 
00570         # Open new list, if necessary
00571         if self._indent_level() < new_level:
00572             self.list_indents.append(new_level)
00573             self.list_types.append(list_type)
00574 
00575             if self.formatter.in_p:
00576                 closelist.append(self.formatter.paragraph(0))
00577 
00578             if list_type == 'ol':
00579                 tag = self.formatter.number_list(1, numtype, numstart)
00580             elif list_type == 'dl':
00581                 tag = self.formatter.definition_list(1)
00582             else:
00583                 tag = self.formatter.bullet_list(1)
00584             openlist.append(tag)
00585 
00586             self.first_list_item = 1
00587             self.in_li = 0
00588             self.in_dd = 0
00589 
00590         # If list level changes, close an open table
00591         if self.in_table and (openlist or closelist):
00592             closelist[0:0] = [self.formatter.table(0)]
00593             self.in_table = 0
00594 
00595         self.in_list = self.list_types != []
00596         return ''.join(closelist) + ''.join(openlist)
00597 
00598 
00599     def _undent(self):
00600         """Close all open lists."""
00601         result = []
00602         #result.append("<!-- _undent start -->\n")
00603         self._close_item(result)
00604         for type in self.list_types[::-1]:
00605             if type == 'ol':
00606                 result.append(self.formatter.number_list(0))
00607             elif type == 'dl':
00608                 result.append(self.formatter.definition_list(0))
00609             else:
00610                 result.append(self.formatter.bullet_list(0))
00611         #result.append("<!-- _undent end -->\n")
00612         self.list_indents = []
00613         self.list_types = []
00614         return ''.join(result)
00615 
00616 
00617     def _tt_repl(self, word):
00618         """Handle inline code."""
00619         return self.formatter.code(1) + \
00620             self.formatter.text(word[3:-3]) + \
00621             self.formatter.code(0)
00622 
00623 
00624     def _tt_bt_repl(self, word):
00625         """Handle backticked inline code."""
00626         # if len(word) == 2: return "" // removed for FCK editor
00627         return self.formatter.code(1, css="backtick") + \
00628             self.formatter.text(word[1:-1]) + \
00629             self.formatter.code(0)
00630 
00631 
00632     def _getTableAttrs(self, attrdef):
00633         # skip "|" and initial "<"
00634         while attrdef and attrdef[0] == "|":
00635             attrdef = attrdef[1:]
00636         if not attrdef or attrdef[0] != "<":
00637             return {}, ''
00638         attrdef = attrdef[1:]
00639 
00640         # extension for special table markup
00641         def table_extension(key, parser, attrs, wiki_parser=self):
00642             """ returns: tuple (found_flag, msg)
00643                 found_flag: whether we found something and were able to process it here
00644                   true for special stuff like 100% or - or #AABBCC
00645                   false for style xxx="yyy" attributes
00646                 msg: "" or an error msg
00647             """
00648             _ = wiki_parser._
00649             found = False
00650             msg = ''
00651             if key[0] in "0123456789":
00652                 token = parser.get_token()
00653                 if token != '%':
00654                     wanted = '%'
00655                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
00656                         'wanted': wanted, 'key': key, 'token': token}
00657                 else:
00658                     try:
00659                         dummy = int(key)
00660                     except ValueError:
00661                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
00662                             'key': key, 'token': token}
00663                     else:
00664                         found = True
00665                         attrs['width'] = '"%s%%"' % key
00666             elif key == '-':
00667                 arg = parser.get_token()
00668                 try:
00669                     dummy = int(arg)
00670                 except ValueError:
00671                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
00672                         'arg': arg, 'key': key}
00673                 else:
00674                     found = True
00675                     attrs['colspan'] = '"%s"' % arg
00676             elif key == '|':
00677                 arg = parser.get_token()
00678                 try:
00679                     dummy = int(arg)
00680                 except ValueError:
00681                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
00682                         'arg': arg, 'key': key}
00683                 else:
00684                     found = True
00685                     attrs['rowspan'] = '"%s"' % arg
00686             elif key == '(':
00687                 found = True
00688                 attrs['align'] = '"left"'
00689             elif key == ':':
00690                 found = True
00691                 attrs['align'] = '"center"'
00692             elif key == ')':
00693                 found = True
00694                 attrs['align'] = '"right"'
00695             elif key == '^':
00696                 found = True
00697                 attrs['valign'] = '"top"'
00698             elif key == 'v':
00699                 found = True
00700                 attrs['valign'] = '"bottom"'
00701             elif key == '#':
00702                 arg = parser.get_token()
00703                 try:
00704                     if len(arg) != 6: raise ValueError
00705                     dummy = int(arg, 16)
00706                 except ValueError:
00707                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
00708                         'arg': arg, 'key': key}
00709                 else:
00710                     found = True
00711                     attrs['bgcolor'] = '"#%s"' % arg
00712             return found, self.formatter.rawHTML(msg)
00713 
00714         # scan attributes
00715         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
00716         if msg:
00717             msg = '<strong class="highlight">%s</strong>' % msg
00718         #self.request.log("parseAttributes returned %r" % attr)
00719         return attr, msg
00720 
00721     def _tableZ_repl(self, word):
00722         """Handle table row end."""
00723         if self.in_table:
00724             result = ''
00725             # REMOVED: check for self.in_li, p should always close
00726             if self.formatter.in_p:
00727                 result = self.formatter.paragraph(0)
00728             result += self.formatter.table_cell(0) + self.formatter.table_row(0)
00729             return result
00730         else:
00731             return self.formatter.text(word)
00732 
00733     def _table_repl(self, word):
00734         """Handle table cell separator."""
00735         if self.in_table:
00736             result = []
00737             # check for attributes
00738             attrs, attrerr = self._getTableAttrs(word)
00739 
00740             # start the table row?
00741             if self.table_rowstart:
00742                 self.table_rowstart = 0
00743                 result.append(self.formatter.table_row(1, attrs))
00744             else:
00745                 # Close table cell, first closing open p
00746                 # REMOVED check for self.in_li, paragraph should close always!
00747                 if self.formatter.in_p:
00748                     result.append(self.formatter.paragraph(0))
00749                 result.append(self.formatter.table_cell(0))
00750 
00751             # check for adjacent cell markers
00752             if word.count("|") > 2:
00753                 if not attrs.has_key('align') and \
00754                    not (attrs.has_key('style') and 'text-align' in attrs['style'].lower()):
00755                     # add center alignment if we don't have some alignment already
00756                     attrs['align'] = '"center"'
00757                 if not attrs.has_key('colspan'):
00758                     attrs['colspan'] = '"%d"' % (word.count("|")/2)
00759 
00760             # return the complete cell markup
00761             result.append(self.formatter.table_cell(1, attrs) + attrerr)
00762             result.append(self._line_anchordef())
00763             return ''.join(result)
00764         else:
00765             return self.formatter.text(word)
00766 
00767 
00768     def _heading_repl(self, word):
00769         """Handle section headings."""
00770         from MoinMoin.support.python_compatibility import hash_new
00771 
00772         h = word.strip()
00773         level = 1
00774         while h[level:level+1] == '=':
00775             level += 1
00776         depth = min(5, level)
00777 
00778         # FIXME: needed for Included pages but might still result in unpredictable results
00779         # when included the same page multiple times
00780         title_text = h[level:-level].strip()
00781         pntt = self.formatter.page.page_name + title_text
00782         self.titles.setdefault(pntt, 0)
00783         self.titles[pntt] += 1
00784 
00785         unique_id = ''
00786         if self.titles[pntt] > 1:
00787             unique_id = '-%d' % self.titles[pntt]
00788         result = self._closeP()
00789         result += self.formatter.heading(1, depth, id="head-"+hash_new('sha1', pntt.encode(config.charset)).hexdigest()+unique_id)
00790 
00791         return (result + self.formatter.text(title_text) +
00792                 self.formatter.heading(0, depth))
00793 
00794     def _parser_repl(self, word):
00795         """Handle parsed code displays."""
00796         if word.startswith('{{{'):
00797             word = word[3:]
00798 
00799         self.parser = None
00800         self.parser_name = None
00801         s_word = word.strip()
00802         if s_word == '#!':
00803             # empty bang paths lead to a normal code display
00804             # can be used to escape real, non-empty bang paths
00805             word = ''
00806             self.in_pre = 'no_parser'
00807             return self._closeP() + self.formatter.preformatted(1)
00808         elif s_word.startswith('#!'):
00809             # First try to find a parser for this
00810             parser_name = s_word[2:].split()[0]
00811             self.setParser(parser_name)
00812 
00813         if self.parser:
00814             self.parser_name = parser_name
00815             self.in_pre = 'found_parser'
00816             self.parser_lines = [word]
00817             return ''
00818         elif s_word:
00819             self.in_pre = 'no_parser'
00820             return self._closeP() + self.formatter.preformatted(1) + \
00821                    self.formatter.text(s_word + ' (-)')
00822         else:
00823             self.in_pre = 'search_parser'
00824             return ''
00825 
00826     def _pre_repl(self, word):
00827         """Handle code displays."""
00828         word = word.strip()
00829         if word == '{{{' and not self.in_pre:
00830             self.in_pre = 'no_parser'
00831             return self._closeP() + self.formatter.preformatted(1)
00832         elif word == '}}}' and self.in_pre:
00833             self.in_pre = None
00834             self.inhibit_p = 0
00835             return self.formatter.preformatted(0)
00836         return self.formatter.text(word)
00837 
00838 
00839     def _smiley_repl(self, word):
00840         """Handle smileys."""
00841         return self.formatter.smiley(word)
00842 
00843     _smileyA_repl = _smiley_repl
00844 
00845 
00846     def _comment_repl(self, word):
00847         # if we are in a paragraph, we must close it so that normal text following
00848         # in the line below the comment will reopen a new paragraph.
00849         if self.formatter.in_p:
00850             self.formatter.paragraph(0)
00851         self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
00852         return self.formatter.comment(word)
00853 
00854     def _closeP(self):
00855         if self.formatter.in_p:
00856             return self.formatter.paragraph(0)
00857         return ''
00858 
00859     def _macro_repl(self, word):
00860         """Handle macros ([[macroname]])."""
00861         macro_name = word[2:-2]
00862         self.inhibit_p = 0 # 1 fixes UserPreferences, 0 fixes paragraph formatting for macros
00863 
00864         # check for arguments
00865         args = None
00866         if macro_name.count("("):
00867             macro_name, args = macro_name.split('(', 1)
00868             args = args[:-1]
00869 
00870         # create macro instance
00871         if self.macro is None:
00872             self.macro = macro.Macro(self)
00873         return self.formatter.macro(self.macro, macro_name, args)
00874 
00875     def scan(self, scan_re, line, inhibit_p=False):
00876         """ Scans one line
00877         Append text before match, invoke replace() with match, and add text after match.
00878         """
00879         result = []
00880         lastpos = 0
00881 
00882         ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
00883 
00884         for match in scan_re.finditer(line):
00885             # Add text before the match
00886             if lastpos < match.start():
00887 
00888                 ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
00889 
00890                 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p):
00891                     result.append(self.formatter.paragraph(1, css_class="line862"))
00892                 result.append(self.formatter.text(line[lastpos:match.start()]))
00893 
00894             # Replace match with markup
00895             if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
00896                     self.in_table or self.in_list):
00897                 result.append(self.formatter.paragraph(1, css_class="line867"))
00898             result.append(self.replace(match, inhibit_p))
00899             lastpos = match.end()
00900 
00901         ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
00902 
00903         # Add paragraph with the remainder of the line
00904         if not (inhibit_p or self.in_pre or self.in_li or self.in_dd or self.inhibit_p or
00905                 self.formatter.in_p) and lastpos < len(line):
00906             result.append(self.formatter.paragraph(1, css_class="line874"))
00907         result.append(self.formatter.text(line[lastpos:]))
00908         return u''.join(result)
00909 
00910     def replace(self, match, inhibit_p=False):
00911         """ Replace match using type name """
00912         result = []
00913         for type, hit in match.groupdict().items():
00914             if hit is not None and not type in ["hmarker", ]:
00915 
00916                 ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
00917                 # Open p for certain types
00918                 if not (inhibit_p or self.inhibit_p or self.formatter.in_p
00919                         or self.in_pre or (type in self.no_new_p_before)):
00920                     result.append(self.formatter.paragraph(1, css_class="line891"))
00921 
00922                 # Get replace method and replace hit
00923                 replace = getattr(self, '_' + type + '_repl')
00924                 result.append(replace(hit))
00925                 return ''.join(result)
00926         else:
00927             # We should never get here
00928             import pprint
00929             raise Exception("Can't handle match " + `match`
00930                 + "\n" + pprint.pformat(match.groupdict())
00931                 + "\n" + pprint.pformat(match.groups()) )
00932 
00933         return ""
00934 
00935     def _line_anchordef(self):
00936         if self.line_anchors and not self.line_anchor_printed:
00937             self.line_anchor_printed = 1
00938             return self.formatter.line_anchordef(self.lineno)
00939         else:
00940             return ''
00941 
00942     def format(self, formatter, inhibit_p=False):
00943         """ For each line, scan through looking for magic
00944             strings, outputting verbatim any intervening text.
00945         """
00946         self.formatter = formatter
00947         self.hilite_re = self.formatter.page.hilite_re
00948 
00949         # prepare regex patterns
00950         rules = self.formatting_rules.replace('\n', '|')
00951         if self.cfg.bang_meta:
00952             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
00953                 'word_rule': self.word_rule,
00954                 'rules': rules,
00955             }
00956         pre_rules = self.pre_formatting_rules.replace('\n', '|')
00957         self.request.clock.start('compile_huge_and_ugly')
00958         scan_re = re.compile(rules, re.UNICODE)
00959         pre_scan_re = re.compile(pre_rules, re.UNICODE)
00960         number_re = re.compile(self.ol_rule, re.UNICODE)
00961         term_re = re.compile(self.dl_rule, re.UNICODE)
00962         indent_re = re.compile(ur"^\s*", re.UNICODE)
00963         eol_re = re.compile(r'\r?\n', re.UNICODE)
00964         self.request.clock.stop('compile_huge_and_ugly')
00965 
00966         # get text and replace TABs
00967         rawtext = self.raw.expandtabs()
00968 
00969         # go through the lines
00970         self.lineno = self.start_line
00971         self.lines = eol_re.split(rawtext)
00972         self.line_is_empty = 0
00973 
00974         self.in_processing_instructions = 1
00975 
00976         if self.wrapping_div_class:
00977             div_kw = {'css_class': self.wrapping_div_class, }
00978             if 'comment' in self.wrapping_div_class.split():
00979                 # show comment divs depending on user profile (and wiki configuration)
00980                 div_kw['style'] = self.request.user.show_comments and "display:''" or "display:none"
00981             self.request.write(self.formatter.div(1, **div_kw))
00982 
00983         # Main loop
00984         for line in self.lines:
00985             self.lineno += 1
00986             self.line_anchor_printed = 0
00987             if not self.in_table:
00988                 self.request.write(self._line_anchordef())
00989             self.table_rowstart = 1
00990             self.line_was_empty = self.line_is_empty
00991             self.line_is_empty = 0
00992             self.first_list_item = 0
00993             self.inhibit_p = 0
00994 
00995             # ignore processing instructions
00996             if self.in_processing_instructions:
00997                 found = False
00998                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
00999                            "#pragma", "#form", "#acl", "#language"):
01000                     if line.lower().startswith(pi):
01001                         self.request.write(self.formatter.comment(line))
01002                         found = True
01003                         break
01004                 if not found:
01005                     self.in_processing_instructions = 0
01006                 else:
01007                     continue # do not parse this line
01008             if self.in_pre:
01009                 # TODO: move this into function
01010                 # still looking for processing instructions
01011                 if self.in_pre == 'search_parser':
01012                     self.parser = None
01013                     parser_name = ''
01014                     if line.strip().startswith("#!"):
01015                         parser_name = line.strip()[2:].split()[0]
01016                         self.setParser(parser_name)
01017 
01018                     if self.parser:
01019                         self.in_pre = 'found_parser'
01020                         self.parser_lines = [line]
01021                         self.parser_name = parser_name
01022                         continue
01023                     else:
01024                         self.request.write(self._closeP() +
01025                                            self.formatter.preformatted(1))
01026                         self.in_pre = 'no_parser'
01027                 if self.in_pre == 'found_parser':
01028                     # processing mode
01029                     try:
01030                         endpos = line.index("}}}")
01031                     except ValueError:
01032                         self.parser_lines.append(line)
01033                         continue
01034                     if line[:endpos]:
01035                         self.parser_lines.append(line[:endpos])
01036 
01037                     # Close p before calling parser
01038                     # TODO: do we really need this?
01039                     self.request.write(self._closeP())
01040                     res = self.formatter.parser(self.parser_name, self.parser_lines)
01041                     self.request.write(res)
01042                     del self.parser_lines
01043                     self.in_pre = None
01044                     self.parser = None
01045 
01046                     # send rest of line through regex machinery
01047                     line = line[endpos+3:]
01048                     if not line.strip(): # just in the case "}}} " when we only have blanks left...
01049                         continue
01050             else:
01051                 # we don't have \n as whitespace any more
01052                 # This is the space between lines we join to one paragraph
01053                 line += ' '
01054 
01055                 # Paragraph break on empty lines
01056                 if not line.strip():
01057                     if self.in_table:
01058                         self.request.write(self.formatter.table(0))
01059                         self.request.write(self._line_anchordef())
01060                         self.in_table = 0
01061                     # CHANGE: removed check for not self.list_types
01062                     # p should close on every empty line
01063                     if self.formatter.in_p:
01064                         self.request.write(self.formatter.paragraph(0))
01065                     self.line_is_empty = 1
01066                     continue
01067 
01068                 # Check indent level
01069                 indent = indent_re.match(line)
01070                 indlen = len(indent.group(0))
01071                 indtype = "ul"
01072                 numtype = None
01073                 numstart = None
01074                 if indlen:
01075                     match = number_re.match(line)
01076                     if match:
01077                         numtype, numstart = match.group(0).strip().split('.')
01078                         numtype = numtype[0]
01079 
01080                         if numstart and numstart[0] == "#":
01081                             numstart = int(numstart[1:])
01082                         else:
01083                             numstart = None
01084 
01085                         indtype = "ol"
01086                     else:
01087                         match = term_re.match(line)
01088                         if match:
01089                             indtype = "dl"
01090 
01091                 # output proper indentation tags
01092                 self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
01093 
01094                 # Table mode
01095                 # TODO: move into function?
01096                 if (not self.in_table and line[indlen:indlen + 2] == "||"
01097                     and line.endswith("|| ") and len(line) >= 5 + indlen):
01098                     # Start table
01099                     if self.list_types and not self.in_li:
01100                         self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
01101                         ## CHANGE: no automatic p on li
01102                         ##self.request.write(self.formatter.paragraph(1))
01103                         self.in_li = 1
01104 
01105                     # CHANGE: removed check for self.in_li
01106                     # paragraph should end before table, always!
01107                     if self.formatter.in_p:
01108                         self.request.write(self.formatter.paragraph(0))
01109                     attrs, attrerr = self._getTableAttrs(line[indlen+2:])
01110                     self.request.write(self.formatter.table(1, attrs) + attrerr)
01111                     self.in_table = True # self.lineno
01112                 elif (self.in_table and not
01113                       # intra-table comments should not break a table
01114                       (line.startswith("##") or
01115                        line[indlen:indlen + 2] == "||" and
01116                        line.endswith("|| ") and
01117                        len(line) >= 5 + indlen)):
01118 
01119                     # Close table
01120                     self.request.write(self.formatter.table(0))
01121                     self.request.write(self._line_anchordef())
01122                     self.in_table = 0
01123 
01124             # Scan line, format and write
01125             scanning_re = self.in_pre and pre_scan_re or scan_re
01126             formatted_line = self.scan(scanning_re, line, inhibit_p=inhibit_p)
01127             self.request.write(formatted_line)
01128             if self.in_pre == 'no_parser':
01129                 self.request.write(self.formatter.linebreak())
01130 
01131         # Close code displays, paragraphs, tables and open lists
01132         self.request.write(self._undent())
01133         if self.in_pre: self.request.write(self.formatter.preformatted(0))
01134         if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
01135         if self.in_table: self.request.write(self.formatter.table(0))
01136 
01137         if self.wrapping_div_class:
01138             self.request.write(self.formatter.div(0))
01139 
01140     # Private helpers ------------------------------------------------------------
01141 
01142     def setParser(self, name):
01143         """ Set parser to parser named 'name' """
01144         # XXX this is done by the formatter as well
01145         try:
01146             self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name)
01147         except wikiutil.PluginMissingError:
01148             self.parser = None