Back to index

moin  1.9.0~rc2
text_moin158_wiki.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - MoinMoin Wiki Markup Parser
00004 
00005     @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de>
00006     @license: GNU GPL, see COPYING for details.
00007 """
00008 
00009 import os, re
00010 from MoinMoin import config, wikiutil
00011 from MoinMoin import macro as wikimacro
00012 from MoinMoin.Page import Page
00013 from MoinMoin.util import web
00014 
00015 Dependencies = []
00016 
00017 class Parser:
00018     """
00019         Object that turns Wiki markup into HTML.
00020 
00021         All formatting commands can be parsed one line at a time, though
00022         some state is carried over between lines.
00023 
00024         Methods named like _*_repl() are responsible to handle the named regex
00025         patterns defined in print_html().
00026     """
00027 
00028     # allow caching
00029     caching = 1
00030     Dependencies = []
00031 
00032     # some common strings
00033     PARENT_PREFIX = wikiutil.PARENT_PREFIX
00034     punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
00035     attachment_schemas = ["attachment", "inline", "drawing", ]
00036     url_schemas = ['http', 'https', 'ftp', 'wiki', 'mailto', 'nntp', 'news',
00037                    'telnet', 'file', 'irc', 'ircs',
00038                    'webcal', 'ed2k', 'xmpp', 'rootz',
00039                   ]
00040     url_pattern = u'|'.join(url_schemas + attachment_schemas)
00041 
00042     # some common rules
00043     word_rule = ur'(?:(?<![%(u)s%(l)s])|^)%(parent)s(?:%(subpages)s(?:[%(u)s][%(l)s]+){2,})+(?![%(u)s%(l)s]+)' % {
00044         'u': config.chars_upper,
00045         'l': config.chars_lower,
00046         'subpages': wikiutil.CHILD_PREFIX + '?',
00047         'parent': ur'(?:%s)?' % re.escape(PARENT_PREFIX),
00048     }
00049     url_rule = ur'%(url_guard)s(%(url)s)\:([^\s<%(punct)s]|([%(punct)s][^\s<%(punct)s]))+' % {
00050         'url_guard': u'(^|(?<!\w))',
00051         'url': url_pattern,
00052         'punct': punct_pattern,
00053     }
00054 
00055     ol_rule = ur"^\s+(?:[0-9]+|[aAiI])\.(?:#\d+)?\s"
00056     dl_rule = ur"^\s+.*?::\s"
00057 
00058     config_smileys = dict([(key, None) for key in config.smileys])
00059 
00060     # the big, fat, ugly one ;)
00061     formatting_rules = ur"""(?P<ent_numeric>&#(\d{1,5}|x[0-9a-fA-F]+);)
00062 (?:(?P<emph_ibb>'''''(?=[^']+'''))
00063 (?P<emph_ibi>'''''(?=[^']+''))
00064 (?P<emph_ib_or_bi>'{5}(?=[^']))
00065 (?P<emph>'{2,3})
00066 (?P<u>__)
00067 (?P<sup>\^.*?\^)
00068 (?P<sub>,,[^,]{1,40},,)
00069 (?P<tt>\{\{\{.*?\}\}\})
00070 (?P<processor>(\{\{\{(#!.*|\s*$)))
00071 (?P<pre>(\{\{\{ ?|\}\}\}))
00072 (?P<small>(\~- ?|-\~))
00073 (?P<big>(\~\+ ?|\+\~))
00074 (?P<strike>(--\(|\)--))
00075 (?P<rule>-{4,})
00076 (?P<comment>^\#\#.*$)
00077 (?P<macro>\[\[(%%(macronames)s)(?:\(.*?\))?\]\]))
00078 (?P<ol>%(ol_rule)s)
00079 (?P<dl>%(dl_rule)s)
00080 (?P<li>^\s+\*\s*)
00081 (?P<li_none>^\s+\.\s*)
00082 (?P<indent>^\s+)
00083 (?P<tableZ>\|\| $)
00084 (?P<table>(?:\|\|)+(?:<[^>]*?>)?(?!\|? $))
00085 (?P<heading>^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)
00086 (?P<interwiki>[A-Z][a-zA-Z]+\:[^\s'\"\:<\|]([^\s%(punct)s]|([%(punct)s][^\s%(punct)s]))+)
00087 (?P<word>%(word_rule)s)
00088 (?P<url_bracket>\[((%(url)s)\:|#|\:)[^\s\]]+(\s[^\]]+)?\])
00089 (?P<url>%(url_rule)s)
00090 (?P<email>[-\w._+]+\@[\w-]+(\.[\w-]+)+)
00091 (?P<smiley>(?<=\s)(%(smiley)s)(?=\s))
00092 (?P<smileyA>^(%(smiley)s)(?=\s))
00093 (?P<ent_symbolic>&\w+;)
00094 (?P<ent>[<>&])
00095 (?P<wikiname_bracket>\[".*?"\])
00096 (?P<tt_bt>`.*?`)"""  % {
00097 
00098         'url': url_pattern,
00099         'punct': punct_pattern,
00100         'ol_rule': ol_rule,
00101         'dl_rule': dl_rule,
00102         'url_rule': url_rule,
00103         'word_rule': word_rule,
00104         'smiley': u'|'.join(map(re.escape, config_smileys.keys()))}
00105 
00106     # Don't start p before these
00107     no_new_p_before = ("heading rule table tableZ tr td "
00108                        "ul ol dl dt dd li li_none indent "
00109                        "macro processor pre")
00110     no_new_p_before = no_new_p_before.split()
00111     no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
00112 
00113     def __init__(self, raw, request, **kw):
00114         self.raw = raw
00115         self.request = request
00116         self.form = request.form
00117         self._ = request.getText
00118         self.cfg = request.cfg
00119         self.line_anchors = kw.get('line_anchors', True)
00120         self.macro = None
00121         self.start_line = kw.get('start_line', 0)
00122 
00123         self.is_em = 0
00124         self.is_b = 0
00125         self.is_u = 0
00126         self.is_strike = 0
00127         self.lineno = 0
00128         self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
00129         self.in_li = 0 # between <li> and </li>
00130         self.in_dd = 0 # between <dd> and </dd>
00131         self.in_pre = 0
00132         self.in_table = 0
00133         self.is_big = False
00134         self.is_small = False
00135         self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
00136         self.titles = request._page_headings
00137 
00138         # holds the nesting level (in chars) of open lists
00139         self.list_indents = []
00140         self.list_types = []
00141 
00142         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(wikimacro.getNames(self.cfg))}
00143 
00144     def _close_item(self, result):
00145         #result.append("<!-- close item begin -->\n")
00146         if self.in_table:
00147             result.append(self.formatter.table(0))
00148             self.in_table = 0
00149         if self.in_li:
00150             self.in_li = 0
00151             if self.formatter.in_p:
00152                 result.append(self.formatter.paragraph(0))
00153             result.append(self.formatter.listitem(0))
00154         if self.in_dd:
00155             self.in_dd = 0
00156             if self.formatter.in_p:
00157                 result.append(self.formatter.paragraph(0))
00158             result.append(self.formatter.definition_desc(0))
00159         #result.append("<!-- close item end -->\n")
00160 
00161 
00162     def interwiki(self, url_and_text, **kw):
00163         # TODO: maybe support [wiki:Page http://wherever/image.png] ?
00164         if len(url_and_text) == 1:
00165             url = url_and_text[0]
00166             text = None
00167         else:
00168             url, text = url_and_text
00169 
00170         # keep track of whether this is a self-reference, so links
00171         # are always shown even the page doesn't exist.
00172         is_self_reference = 0
00173         url2 = url.lower()
00174         if url2.startswith('wiki:self:'):
00175             url = url[10:] # remove "wiki:self:"
00176             is_self_reference = 1
00177         elif url2.startswith('wiki:'):
00178             url = url[5:] # remove "wiki:"
00179 
00180         tag, tail = wikiutil.split_wiki(url)
00181         if text is None:
00182             if tag:
00183                 text = tail
00184             else:
00185                 text = url
00186                 url = ""
00187         elif (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
00188               is_self_reference or # [wiki:Self:LocalPage text] or [:LocalPage:text]
00189               Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
00190             return self._word_repl(url, text)
00191 
00192         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
00193         href = wikiutil.join_wiki(wikiurl, wikitail)
00194 
00195         # check for image URL, and possibly return IMG tag
00196         if not kw.get('pretty_url', 0) and wikiutil.isPicture(wikitail):
00197             return self.formatter.image(src=href)
00198 
00199         # link to self?
00200         if wikitag is None:
00201             return self._word_repl(wikitail)
00202 
00203         return (self.formatter.interwikilink(1, tag, tail) +
00204                 self.formatter.text(text) +
00205                 self.formatter.interwikilink(0, tag, tail))
00206 
00207     def attachment(self, url_and_text, **kw):
00208         """ This gets called on attachment URLs.
00209         """
00210         _ = self._
00211         if len(url_and_text) == 1:
00212             url = url_and_text[0]
00213             text = None
00214         else:
00215             url, text = url_and_text
00216 
00217         inline = url[0] == 'i'
00218         drawing = url[0] == 'd'
00219         url = url.split(":", 1)[1]
00220         url = wikiutil.url_unquote(url)
00221         text = text or url
00222 
00223         from MoinMoin.action import AttachFile
00224         if drawing:
00225             return self.formatter.attachment_drawing(url, text)
00226 
00227         # check for image URL, and possibly return IMG tag
00228         # (images are always inlined, just like for other URLs)
00229         if not kw.get('pretty_url', 0) and wikiutil.isPicture(url):
00230             return self.formatter.attachment_image(url)
00231 
00232         # inline the attachment
00233         if inline:
00234             return self.formatter.attachment_inlined(url, text)
00235 
00236         return self.formatter.attachment_link(url, text)
00237 
00238     def _u_repl(self, word):
00239         """Handle underline."""
00240         self.is_u = not self.is_u
00241         return self.formatter.underline(self.is_u)
00242 
00243     def _strike_repl(self, word):
00244         """Handle strikethrough."""
00245         # XXX we don't really enforce the correct sequence --( ... )-- here
00246         self.is_strike = not self.is_strike
00247         return self.formatter.strike(self.is_strike)
00248 
00249     def _small_repl(self, word):
00250         """Handle small."""
00251         if word.strip() == '~-' and self.is_small:
00252             return self.formatter.text(word)
00253         if word.strip() == '-~' and not self.is_small:
00254             return self.formatter.text(word)
00255         self.is_small = not self.is_small
00256         return self.formatter.small(self.is_small)
00257 
00258     def _big_repl(self, word):
00259         """Handle big."""
00260         if word.strip() == '~+' and self.is_big:
00261             return self.formatter.text(word)
00262         if word.strip() == '+~' and not self.is_big:
00263             return self.formatter.text(word)
00264         self.is_big = not self.is_big
00265         return self.formatter.big(self.is_big)
00266 
00267     def _emph_repl(self, word):
00268         """Handle emphasis, i.e. '' and '''."""
00269         ##print "#", self.is_b, self.is_em, "#"
00270         if len(word) == 3:
00271             self.is_b = not self.is_b
00272             if self.is_em and self.is_b:
00273                 self.is_b = 2
00274             return self.formatter.strong(self.is_b)
00275         else:
00276             self.is_em = not self.is_em
00277             if self.is_em and self.is_b:
00278                 self.is_em = 2
00279             return self.formatter.emphasis(self.is_em)
00280 
00281     def _emph_ibb_repl(self, word):
00282         """Handle mixed emphasis, i.e. ''''' followed by '''."""
00283         self.is_b = not self.is_b
00284         self.is_em = not self.is_em
00285         if self.is_em and self.is_b:
00286             self.is_b = 2
00287         return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00288 
00289     def _emph_ibi_repl(self, word):
00290         """Handle mixed emphasis, i.e. ''''' followed by ''."""
00291         self.is_b = not self.is_b
00292         self.is_em = not self.is_em
00293         if self.is_em and self.is_b:
00294             self.is_em = 2
00295         return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00296 
00297     def _emph_ib_or_bi_repl(self, word):
00298         """Handle mixed emphasis, exactly five '''''."""
00299         ##print "*", self.is_b, self.is_em, "*"
00300         b_before_em = self.is_b > self.is_em > 0
00301         self.is_b = not self.is_b
00302         self.is_em = not self.is_em
00303         if b_before_em:
00304             return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
00305         else:
00306             return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
00307 
00308 
00309     def _sup_repl(self, word):
00310         """Handle superscript."""
00311         return self.formatter.sup(1) + \
00312             self.formatter.text(word[1:-1]) + \
00313             self.formatter.sup(0)
00314 
00315     def _sub_repl(self, word):
00316         """Handle subscript."""
00317         return self.formatter.sub(1) + \
00318             self.formatter.text(word[2:-2]) + \
00319             self.formatter.sub(0)
00320 
00321 
00322     def _rule_repl(self, word):
00323         """Handle sequences of dashes."""
00324         result = self._undent() + self._closeP()
00325         if len(word) <= 4:
00326             result = result + self.formatter.rule()
00327         else:
00328             # Create variable rule size 1 - 6. Actual size defined in css.
00329             size = min(len(word), 10) - 4
00330             result = result + self.formatter.rule(size)
00331         return result
00332 
00333 
00334     def _word_repl(self, word, text=None):
00335         """Handle WikiNames."""
00336 
00337         # check for parent links
00338         # !!! should use wikiutil.AbsPageName here, but setting `text`
00339         # correctly prevents us from doing this for now
00340         if word.startswith(wikiutil.PARENT_PREFIX):
00341             if not text:
00342                 text = word
00343             word = '/'.join(filter(None, self.formatter.page.page_name.split('/')[:-1] + [word[wikiutil.PARENT_PREFIX_LEN:]]))
00344 
00345         if not text:
00346             # if a simple, self-referencing link, emit it as plain text
00347             if word == self.formatter.page.page_name:
00348                 return self.formatter.text(word)
00349             text = word
00350         if word.startswith(wikiutil.CHILD_PREFIX):
00351             word = self.formatter.page.page_name + '/' + word[wikiutil.CHILD_PREFIX_LEN:]
00352 
00353         # handle anchors
00354         parts = word.split("#", 1)
00355         anchor = ""
00356         if len(parts) == 2:
00357             word, anchor = parts
00358 
00359         return (self.formatter.pagelink(1, word, anchor=anchor) +
00360                 self.formatter.text(text) +
00361                 self.formatter.pagelink(0, word))
00362 
00363     def _notword_repl(self, word):
00364         """Handle !NotWikiNames."""
00365         return self.formatter.nowikiword(word[1:])
00366 
00367     def _interwiki_repl(self, word):
00368         """Handle InterWiki links."""
00369         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
00370         if wikitag_bad:
00371             return self.formatter.text(word)
00372         else:
00373             return self.interwiki(["wiki:" + word])
00374 
00375 
00376     def _url_repl(self, word):
00377         """Handle literal URLs including inline images."""
00378         scheme = word.split(":", 1)[0]
00379 
00380         if scheme == "wiki":
00381             return self.interwiki([word])
00382         if scheme in self.attachment_schemas:
00383             return self.attachment([word])
00384 
00385         if wikiutil.isPicture(word):
00386             word = wikiutil.mapURL(self.request, word)
00387             # Get image name http://here.com/dir/image.gif -> image
00388             name = word.split('/')[-1]
00389             name = ''.join(name.split('.')[:-1])
00390             return self.formatter.image(src=word, alt=name)
00391         else:
00392             return (self.formatter.url(1, word, css=scheme) +
00393                     self.formatter.text(word) +
00394                     self.formatter.url(0))
00395 
00396 
00397     def _wikiname_bracket_repl(self, word):
00398         """Handle special-char wikinames."""
00399         wikiname = word[2:-2]
00400         if wikiname:
00401             return self._word_repl(wikiname)
00402         else:
00403             return self.formatter.text(word)
00404 
00405 
00406     def _url_bracket_repl(self, word):
00407         """Handle bracketed URLs."""
00408 
00409         # Local extended link?
00410         if word[1] == ':':
00411             words = word[2:-1].split(':', 1)
00412             if len(words) == 1:
00413                 words = words * 2
00414             words[0] = 'wiki:Self:%s' % words[0]
00415             return self.interwiki(words, pretty_url=1)
00416             #return self._word_repl(words[0], words[1])
00417 
00418         # Traditional split on space
00419         words = word[1:-1].split(None, 1)
00420         if len(words) == 1:
00421             words = words * 2
00422 
00423         if words[0][0] == '#':
00424             # anchor link
00425             return (self.formatter.url(1, words[0]) +
00426                     self.formatter.text(words[1]) +
00427                     self.formatter.url(0))
00428 
00429         scheme = words[0].split(":", 1)[0]
00430         if scheme == "wiki":
00431             return self.interwiki(words, pretty_url=1)
00432         if scheme in self.attachment_schemas:
00433             return self.attachment(words, pretty_url=1)
00434 
00435         if wikiutil.isPicture(words[1]) and re.match(self.url_rule, words[1]):
00436             return (self.formatter.url(1, words[0], css='external') +
00437                     self.formatter.image(title=words[0], alt=words[0], src=words[1]) +
00438                     self.formatter.url(0))
00439         else:
00440             return (self.formatter.url(1, words[0], css=scheme) +
00441                     self.formatter.text(words[1]) +
00442                     self.formatter.url(0))
00443 
00444 
00445     def _email_repl(self, word):
00446         """Handle email addresses (without a leading mailto:)."""
00447         return (self.formatter.url(1, "mailto:" + word, css='mailto') +
00448                 self.formatter.text(word) +
00449                 self.formatter.url(0))
00450 
00451 
00452     def _ent_repl(self, word):
00453         """Handle SGML entities."""
00454         return self.formatter.text(word)
00455         #return {'&': '&amp;',
00456         #        '<': '&lt;',
00457         #        '>': '&gt;'}[word]
00458 
00459     def _ent_numeric_repl(self, word):
00460         """Handle numeric (decimal and hexadecimal) SGML entities."""
00461         return self.formatter.rawHTML(word)
00462 
00463     def _ent_symbolic_repl(self, word):
00464         """Handle symbolic SGML entities."""
00465         return self.formatter.rawHTML(word)
00466 
00467     def _indent_repl(self, match):
00468         """Handle pure indentation (no - * 1. markup)."""
00469         result = []
00470         if not (self.in_li or self.in_dd):
00471             self._close_item(result)
00472             self.in_li = 1
00473             css_class = None
00474             if self.line_was_empty and not self.first_list_item:
00475                 css_class = 'gap'
00476             result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00477         return ''.join(result)
00478 
00479     def _li_none_repl(self, match):
00480         """Handle type=none (" .") lists."""
00481         result = []
00482         self._close_item(result)
00483         self.in_li = 1
00484         css_class = None
00485         if self.line_was_empty and not self.first_list_item:
00486             css_class = 'gap'
00487         result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
00488         return ''.join(result)
00489 
00490     def _li_repl(self, match):
00491         """Handle bullet (" *") lists."""
00492         result = []
00493         self._close_item(result)
00494         self.in_li = 1
00495         css_class = None
00496         if self.line_was_empty and not self.first_list_item:
00497             css_class = 'gap'
00498         result.append(self.formatter.listitem(1, css_class=css_class))
00499         return ''.join(result)
00500 
00501     def _ol_repl(self, match):
00502         """Handle numbered lists."""
00503         return self._li_repl(match)
00504 
00505     def _dl_repl(self, match):
00506         """Handle definition lists."""
00507         result = []
00508         self._close_item(result)
00509         self.in_dd = 1
00510         result.extend([
00511             self.formatter.definition_term(1),
00512             self.formatter.text(match[1:-3].lstrip(' ')),
00513             self.formatter.definition_term(0),
00514             self.formatter.definition_desc(1),
00515         ])
00516         return ''.join(result)
00517 
00518 
00519     def _indent_level(self):
00520         """Return current char-wise indent level."""
00521         return len(self.list_indents) and self.list_indents[-1]
00522 
00523 
00524     def _indent_to(self, new_level, list_type, numtype, numstart):
00525         """Close and open lists."""
00526         open = []   # don't make one out of these two statements!
00527         close = []
00528 
00529         if self._indent_level() != new_level and self.in_table:
00530             close.append(self.formatter.table(0))
00531             self.in_table = 0
00532 
00533         while self._indent_level() > new_level:
00534             self._close_item(close)
00535             if self.list_types[-1] == 'ol':
00536                 tag = self.formatter.number_list(0)
00537             elif self.list_types[-1] == 'dl':
00538                 tag = self.formatter.definition_list(0)
00539             else:
00540                 tag = self.formatter.bullet_list(0)
00541             close.append(tag)
00542 
00543             del self.list_indents[-1]
00544             del self.list_types[-1]
00545 
00546             if self.list_types: # we are still in a list
00547                 if self.list_types[-1] == 'dl':
00548                     self.in_dd = 1
00549                 else:
00550                     self.in_li = 1
00551 
00552         # Open new list, if necessary
00553         if self._indent_level() < new_level:
00554             self.list_indents.append(new_level)
00555             self.list_types.append(list_type)
00556 
00557             if self.formatter.in_p:
00558                 close.append(self.formatter.paragraph(0))
00559 
00560             if list_type == 'ol':
00561                 tag = self.formatter.number_list(1, numtype, numstart)
00562             elif list_type == 'dl':
00563                 tag = self.formatter.definition_list(1)
00564             else:
00565                 tag = self.formatter.bullet_list(1)
00566             open.append(tag)
00567 
00568             self.first_list_item = 1
00569             self.in_li = 0
00570             self.in_dd = 0
00571 
00572         # If list level changes, close an open table
00573         if self.in_table and (open or close):
00574             close[0:0] = [self.formatter.table(0)]
00575             self.in_table = 0
00576 
00577         self.in_list = self.list_types != []
00578         return ''.join(close) + ''.join(open)
00579 
00580 
00581     def _undent(self):
00582         """Close all open lists."""
00583         result = []
00584         #result.append("<!-- _undent start -->\n")
00585         self._close_item(result)
00586         for type in self.list_types[::-1]:
00587             if type == 'ol':
00588                 result.append(self.formatter.number_list(0))
00589             elif type == 'dl':
00590                 result.append(self.formatter.definition_list(0))
00591             else:
00592                 result.append(self.formatter.bullet_list(0))
00593         #result.append("<!-- _undent end -->\n")
00594         self.list_indents = []
00595         self.list_types = []
00596         return ''.join(result)
00597 
00598 
00599     def _tt_repl(self, word):
00600         """Handle inline code."""
00601         return self.formatter.code(1) + \
00602             self.formatter.text(word[3:-3]) + \
00603             self.formatter.code(0)
00604 
00605 
00606     def _tt_bt_repl(self, word):
00607         """Handle backticked inline code."""
00608         # if len(word) == 2: return "" // removed for FCK editor
00609         return self.formatter.code(1, css="backtick") + \
00610             self.formatter.text(word[1:-1]) + \
00611             self.formatter.code(0)
00612 
00613 
00614     def _getTableAttrs(self, attrdef):
00615         # skip "|" and initial "<"
00616         while attrdef and attrdef[0] == "|":
00617             attrdef = attrdef[1:]
00618         if not attrdef or attrdef[0] != "<":
00619             return {}, ''
00620         attrdef = attrdef[1:]
00621 
00622         # extension for special table markup
00623         def table_extension(key, parser, attrs, wiki_parser=self):
00624             """ returns: tuple (found_flag, msg)
00625                 found_flag: whether we found something and were able to process it here
00626                   true for special stuff like 100% or - or #AABBCC
00627                   false for style xxx="yyy" attributes
00628                 msg: "" or an error msg
00629             """
00630             _ = wiki_parser._
00631             found = False
00632             msg = ''
00633             if key[0] in "0123456789":
00634                 token = parser.get_token()
00635                 if token != '%':
00636                     wanted = '%'
00637                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
00638                         'wanted': wanted, 'key': key, 'token': token}
00639                 else:
00640                     try:
00641                         dummy = int(key)
00642                     except ValueError:
00643                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
00644                             'key': key, 'token': token}
00645                     else:
00646                         found = True
00647                         attrs['width'] = '"%s%%"' % key
00648             elif key == '-':
00649                 arg = parser.get_token()
00650                 try:
00651                     dummy = int(arg)
00652                 except ValueError:
00653                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
00654                         'arg': arg, 'key': key}
00655                 else:
00656                     found = True
00657                     attrs['colspan'] = '"%s"' % arg
00658             elif key == '|':
00659                 arg = parser.get_token()
00660                 try:
00661                     dummy = int(arg)
00662                 except ValueError:
00663                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
00664                         'arg': arg, 'key': key}
00665                 else:
00666                     found = True
00667                     attrs['rowspan'] = '"%s"' % arg
00668             elif key == '(':
00669                 found = True
00670                 attrs['align'] = '"left"'
00671             elif key == ':':
00672                 found = True
00673                 attrs['align'] = '"center"'
00674             elif key == ')':
00675                 found = True
00676                 attrs['align'] = '"right"'
00677             elif key == '^':
00678                 found = True
00679                 attrs['valign'] = '"top"'
00680             elif key == 'v':
00681                 found = True
00682                 attrs['valign'] = '"bottom"'
00683             elif key == '#':
00684                 arg = parser.get_token()
00685                 try:
00686                     if len(arg) != 6: raise ValueError
00687                     dummy = int(arg, 16)
00688                 except ValueError:
00689                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
00690                         'arg': arg, 'key': key}
00691                 else:
00692                     found = True
00693                     attrs['bgcolor'] = '"#%s"' % arg
00694             return found, self.formatter.rawHTML(msg)
00695 
00696         # scan attributes
00697         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
00698         if msg:
00699             msg = '<strong class="highlight">%s</strong>' % msg
00700         return attr, msg
00701 
00702     def _tableZ_repl(self, word):
00703         """Handle table row end."""
00704         if self.in_table:
00705             result = ''
00706             # REMOVED: check for self.in_li, p should always close
00707             if self.formatter.in_p:
00708                 result = self.formatter.paragraph(0)
00709             result += self.formatter.table_cell(0) + self.formatter.table_row(0)
00710             return result
00711         else:
00712             return self.formatter.text(word)
00713 
00714     def _table_repl(self, word):
00715         """Handle table cell separator."""
00716         if self.in_table:
00717             result = []
00718             # check for attributes
00719             attrs, attrerr = self._getTableAttrs(word)
00720 
00721             # start the table row?
00722             if self.table_rowstart:
00723                 self.table_rowstart = 0
00724                 result.append(self.formatter.table_row(1, attrs))
00725             else:
00726                 # Close table cell, first closing open p
00727                 # REMOVED check for self.in_li, paragraph should close always!
00728                 if self.formatter.in_p:
00729                     result.append(self.formatter.paragraph(0))
00730                 result.append(self.formatter.table_cell(0))
00731 
00732             # check for adjacent cell markers
00733             if word.count("|") > 2:
00734                 if not attrs.has_key('align') and \
00735                    not (attrs.has_key('style') and 'text-align' in attrs['style'].lower()):
00736                     # add center alignment if we don't have some alignment already
00737                     attrs['align'] = '"center"'
00738                 if not attrs.has_key('colspan'):
00739                     attrs['colspan'] = '"%d"' % (word.count("|")/2)
00740 
00741             # return the complete cell markup
00742             result.append(self.formatter.table_cell(1, attrs) + attrerr)
00743             result.append(self._line_anchordef())
00744             return ''.join(result)
00745         else:
00746             return self.formatter.text(word)
00747 
00748 
00749     def _heading_repl(self, word):
00750         """Handle section headings."""
00751         from MoinMoin.support.python_compatibility import hash_new
00752 
00753         h = word.strip()
00754         level = 1
00755         while h[level:level+1] == '=':
00756             level += 1
00757         depth = min(5, level)
00758 
00759         # this is needed for Included pages
00760         # TODO but it might still result in unpredictable results
00761         # when included the same page multiple times
00762         title_text = h[level:-level].strip()
00763         pntt = self.formatter.page.page_name + title_text
00764         self.titles.setdefault(pntt, 0)
00765         self.titles[pntt] += 1
00766 
00767         unique_id = ''
00768         if self.titles[pntt] > 1:
00769             unique_id = '-%d' % self.titles[pntt]
00770         result = self._closeP()
00771         result += self.formatter.heading(1, depth, id="head-"+hash_new('sha1', pntt.encode(config.charset)).hexdigest()+unique_id)
00772 
00773         return (result + self.formatter.text(title_text) +
00774                 self.formatter.heading(0, depth))
00775 
00776     def _processor_repl(self, word):
00777         """Handle processed code displays."""
00778         if word[:3] == '{{{':
00779             word = word[3:]
00780 
00781         self.processor = None
00782         self.processor_name = None
00783         self.processor_is_parser = 0
00784         s_word = word.strip()
00785         if s_word == '#!':
00786             # empty bang paths lead to a normal code display
00787             # can be used to escape real, non-empty bang paths
00788             word = ''
00789             self.in_pre = 3
00790             return self._closeP() + self.formatter.preformatted(1)
00791         elif s_word[:2] == '#!':
00792             # First try to find a processor for this (will go away in 2.0)
00793             processor_name = s_word[2:].split()[0]
00794             self.setProcessor(processor_name)
00795 
00796         if self.processor:
00797             self.processor_name = processor_name
00798             self.in_pre = 2
00799             self.colorize_lines = [word]
00800             return ''
00801         elif s_word:
00802             self.in_pre = 3
00803             return self._closeP() + self.formatter.preformatted(1) + \
00804                    self.formatter.text(s_word + ' (-)')
00805         else:
00806             self.in_pre = 1
00807             return ''
00808 
00809     def _pre_repl(self, word):
00810         """Handle code displays."""
00811         word = word.strip()
00812         if word == '{{{' and not self.in_pre:
00813             self.in_pre = 3
00814             return self._closeP() + self.formatter.preformatted(self.in_pre)
00815         elif word == '}}}' and self.in_pre:
00816             self.in_pre = 0
00817             self.inhibit_p = 0
00818             return self.formatter.preformatted(self.in_pre)
00819         return self.formatter.text(word)
00820 
00821 
00822     def _smiley_repl(self, word):
00823         """Handle smileys."""
00824         return self.formatter.smiley(word)
00825 
00826     _smileyA_repl = _smiley_repl
00827 
00828 
00829     def _comment_repl(self, word):
00830         # if we are in a paragraph, we must close it so that normal text following
00831         # in the line below the comment will reopen a new paragraph.
00832         if self.formatter.in_p:
00833             self.formatter.paragraph(0)
00834         self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
00835         return self.formatter.comment(word)
00836 
00837     def _closeP(self):
00838         if self.formatter.in_p:
00839             return self.formatter.paragraph(0)
00840         return ''
00841 
00842     def _macro_repl(self, word):
00843         """Handle macros ([[macroname]])."""
00844         macro_name = word[2:-2]
00845         self.inhibit_p = 0 # 1 fixes UserPreferences, 0 fixes paragraph formatting for macros
00846 
00847         # check for arguments
00848         args = None
00849         if macro_name.count("("):
00850             macro_name, args = macro_name.split('(', 1)
00851             args = args[:-1]
00852 
00853         # create macro instance
00854         if self.macro is None:
00855             self.macro = wikimacro.Macro(self)
00856         return self.formatter.macro(self.macro, macro_name, args)
00857 
00858     def scan(self, scan_re, line):
00859         """ Scans one line
00860 
00861         Append text before match, invoke replace() with match, and add text after match.
00862         """
00863         result = []
00864         lastpos = 0
00865 
00866         ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
00867 
00868         for match in scan_re.finditer(line):
00869             # Add text before the match
00870             if lastpos < match.start():
00871 
00872                 ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
00873 
00874                 if not (self.inhibit_p or self.in_pre or self.formatter.in_p):
00875                     result.append(self.formatter.paragraph(1, css_class="line862"))
00876                 result.append(self.formatter.text(line[lastpos:match.start()]))
00877 
00878             # Replace match with markup
00879             if not (self.inhibit_p or self.in_pre or self.formatter.in_p or
00880                     self.in_table or self.in_list):
00881                 result.append(self.formatter.paragraph(1, css_class="line867"))
00882             result.append(self.replace(match))
00883             lastpos = match.end()
00884 
00885         ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
00886 
00887         # Add paragraph with the remainder of the line
00888         if not (self.in_pre or self.in_li or self.in_dd or self.inhibit_p or
00889                 self.formatter.in_p) and lastpos < len(line):
00890             result.append(self.formatter.paragraph(1, css_class="line874"))
00891         result.append(self.formatter.text(line[lastpos:]))
00892         return u''.join(result)
00893 
00894     def replace(self, match):
00895         """ Replace match using type name """
00896         result = []
00897         for type, hit in match.groupdict().items():
00898             if hit is not None and type != "hmarker":
00899 
00900                 ###result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
00901                 if self.in_pre and type not in ['pre', 'ent']:
00902                     return self.formatter.text(hit)
00903                 else:
00904                     # Open p for certain types
00905                     if not (self.inhibit_p or self.formatter.in_p
00906                             or self.in_pre or (type in self.no_new_p_before)):
00907                         result.append(self.formatter.paragraph(1, css_class="line891"))
00908 
00909                     # Get replace method and replece hit
00910                     replace = getattr(self, '_' + type + '_repl')
00911                     result.append(replace(hit))
00912                     return ''.join(result)
00913         else:
00914             # We should never get here
00915             import pprint
00916             raise Exception("Can't handle match " + `match`
00917                 + "\n" + pprint.pformat(match.groupdict())
00918                 + "\n" + pprint.pformat(match.groups()) )
00919 
00920         return ""
00921 
00922     def _line_anchordef(self):
00923         if self.line_anchors and not self.line_anchor_printed:
00924             self.line_anchor_printed = 1
00925             return self.formatter.line_anchordef(self.lineno)
00926         else:
00927             return ''
00928 
00929     def format(self, formatter):
00930         """ For each line, scan through looking for magic
00931             strings, outputting verbatim any intervening text.
00932         """
00933         self.formatter = formatter
00934         self.hilite_re = self.formatter.page.hilite_re
00935 
00936         # prepare regex patterns
00937         rules = self.formatting_rules.replace('\n', '|')
00938         if self.cfg.bang_meta:
00939             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
00940                 'word_rule': self.word_rule,
00941                 'rules': rules,
00942             }
00943         self.request.clock.start('compile_huge_and_ugly')
00944         scan_re = re.compile(rules, re.UNICODE)
00945         number_re = re.compile(self.ol_rule, re.UNICODE)
00946         term_re = re.compile(self.dl_rule, re.UNICODE)
00947         indent_re = re.compile("^\s*", re.UNICODE)
00948         eol_re = re.compile(r'\r?\n', re.UNICODE)
00949         self.request.clock.stop('compile_huge_and_ugly')
00950 
00951         # get text and replace TABs
00952         rawtext = self.raw.expandtabs()
00953 
00954         # go through the lines
00955         self.lineno = self.start_line
00956         self.lines = eol_re.split(rawtext)
00957         self.line_is_empty = 0
00958 
00959         self.in_processing_instructions = 1
00960 
00961         # Main loop
00962         for line in self.lines:
00963             self.lineno += 1
00964             self.line_anchor_printed = 0
00965             if not self.in_table:
00966                 self.request.write(self._line_anchordef())
00967             self.table_rowstart = 1
00968             self.line_was_empty = self.line_is_empty
00969             self.line_is_empty = 0
00970             self.first_list_item = 0
00971             self.inhibit_p = 0
00972 
00973             # ignore processing instructions
00974             if self.in_processing_instructions:
00975                 found = False
00976                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
00977                            "#pragma", "#form", "#acl", "#language"):
00978                     if line.lower().startswith(pi):
00979                         self.request.write(self.formatter.comment(line))
00980                         found = True
00981                         break
00982                 if not found:
00983                     self.in_processing_instructions = 0
00984                 else:
00985                     continue # do not parse this line
00986             if self.in_pre:
00987                 # TODO: move this into function
00988                 # still looking for processing instructions
00989                 # TODO: use strings for pre state, not numbers
00990                 if self.in_pre == 1:
00991                     self.processor = None
00992                     self.processor_is_parser = 0
00993                     processor_name = ''
00994                     if (line.strip()[:2] == "#!"):
00995                         processor_name = line.strip()[2:].split()[0]
00996                         self.setProcessor(processor_name)
00997 
00998                     if self.processor:
00999                         self.in_pre = 2
01000                         self.colorize_lines = [line]
01001                         self.processor_name = processor_name
01002                         continue
01003                     else:
01004                         self.request.write(self._closeP() +
01005                                            self.formatter.preformatted(1))
01006                         self.in_pre = 3
01007                 if self.in_pre == 2:
01008                     # processing mode
01009                     endpos = line.find("}}}")
01010                     if endpos == -1:
01011                         self.colorize_lines.append(line)
01012                         continue
01013                     if line[:endpos]:
01014                         self.colorize_lines.append(line[:endpos])
01015 
01016                     # Close p before calling processor
01017                     # TODO: do we really need this?
01018                     self.request.write(self._closeP())
01019                     res = self.formatter.processor(self.processor_name,
01020                                                    self.colorize_lines,
01021                                                    self.processor_is_parser)
01022                     self.request.write(res)
01023                     del self.colorize_lines
01024                     self.in_pre = 0
01025                     self.processor = None
01026 
01027                     # send rest of line through regex machinery
01028                     line = line[endpos+3:]
01029                     if not line.strip(): # just in the case "}}} " when we only have blanks left...
01030                         continue
01031             else:
01032                 # we don't have \n as whitespace any more
01033                 # This is the space between lines we join to one paragraph
01034                 line += ' '
01035 
01036                 # Paragraph break on empty lines
01037                 if not line.strip():
01038                     if self.in_table:
01039                         self.request.write(self.formatter.table(0))
01040                         self.request.write(self._line_anchordef())
01041                         self.in_table = 0
01042                     # CHANGE: removed check for not self.list_types
01043                     # p should close on every empty line
01044                     if self.formatter.in_p:
01045                         self.request.write(self.formatter.paragraph(0))
01046                     self.line_is_empty = 1
01047                     continue
01048 
01049                 # Check indent level
01050                 indent = indent_re.match(line)
01051                 indlen = len(indent.group(0))
01052                 indtype = "ul"
01053                 numtype = None
01054                 numstart = None
01055                 if indlen:
01056                     match = number_re.match(line)
01057                     if match:
01058                         numtype, numstart = match.group(0).strip().split('.')
01059                         numtype = numtype[0]
01060 
01061                         if numstart and numstart[0] == "#":
01062                             numstart = int(numstart[1:])
01063                         else:
01064                             numstart = None
01065 
01066                         indtype = "ol"
01067                     else:
01068                         match = term_re.match(line)
01069                         if match:
01070                             indtype = "dl"
01071 
01072                 # output proper indentation tags
01073                 self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
01074 
01075                 # Table mode
01076                 # TODO: move into function?
01077                 if (not self.in_table and line[indlen:indlen + 2] == "||"
01078                     and line[-3:] == "|| " and len(line) >= 5 + indlen):
01079                     # Start table
01080                     if self.list_types and not self.in_li:
01081                         self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
01082                         ## CHANGE: no automatic p on li
01083                         ##self.request.write(self.formatter.paragraph(1))
01084                         self.in_li = 1
01085 
01086                     # CHANGE: removed check for self.in_li
01087                     # paragraph should end before table, always!
01088                     if self.formatter.in_p:
01089                         self.request.write(self.formatter.paragraph(0))
01090                     attrs, attrerr = self._getTableAttrs(line[indlen+2:])
01091                     self.request.write(self.formatter.table(1, attrs) + attrerr)
01092                     self.in_table = True # self.lineno
01093                 elif (self.in_table and not
01094                       # intra-table comments should not break a table
01095                       (line[:2] == "##" or
01096                        line[indlen:indlen + 2] == "||" and
01097                        line[-3:] == "|| " and
01098                        len(line) >= 5 + indlen)):
01099 
01100                     # Close table
01101                     self.request.write(self.formatter.table(0))
01102                     self.request.write(self._line_anchordef())
01103                     self.in_table = 0
01104 
01105             # Scan line, format and write
01106             formatted_line = self.scan(scan_re, line)
01107             self.request.write(formatted_line)
01108 
01109             if self.in_pre == 3:
01110                 self.request.write(self.formatter.linebreak())
01111 
01112         # Close code displays, paragraphs, tables and open lists
01113         self.request.write(self._undent())
01114         if self.in_pre: self.request.write(self.formatter.preformatted(0))
01115         if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
01116         if self.in_table: self.request.write(self.formatter.table(0))
01117 
01118     # --------------------------------------------------------------------
01119     # Private helpers
01120 
01121     def setProcessor(self, name):
01122         """ Set processer to either processor or parser named 'name' """
01123         cfg = self.request.cfg
01124         try:
01125             self.processor = wikiutil.importPlugin(cfg, "processor", name,
01126                                                    "process")
01127             self.processor_is_parser = 0
01128         except wikiutil.PluginMissingError:
01129             try:
01130                 self.processor = wikiutil.importPlugin(cfg, "parser", name,
01131                                                    "Parser")
01132                 self.processor_is_parser = 1
01133             except wikiutil.PluginMissingError:
01134                 self.processor = None
01135 
01136