Back to index

moin  1.9.0~rc2
text_creole.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Creole wiki markup parser
00004 
00005     See http://wikicreole.org/ for latest specs.
00006 
00007     Notes:
00008     * No markup allowed in headings.
00009       Creole 1.0 does not require us to support this.
00010     * No markup allowed in table headings.
00011       Creole 1.0 does not require us to support this.
00012     * No (non-bracketed) generic url recognition: this is "mission impossible"
00013       except if you want to risk lots of false positives. Only known protocols
00014       are recognized.
00015     * We do not allow ":" before "//" italic markup to avoid urls with
00016       unrecognized schemes (like wtf://server/path) triggering italic rendering
00017       for the rest of the paragraph.
00018 
00019     @copyright: 2007 MoinMoin:RadomirDopieralski (creole 0.5 implementation),
00020                 2007 MoinMoin:ThomasWaldmann (updates)
00021     @license: GNU GPL, see COPYING for details.
00022 """
00023 
00024 import re
00025 import StringIO
00026 from MoinMoin import config, wikiutil
00027 from MoinMoin.macro import Macro
00028 from _creole import Parser as CreoleParser
00029 
00030 Dependencies = []
00031 
00032 _ = lambda x: x
00033 
00034 class Parser:
00035     """
00036     Glue the DocParser and DocEmitter with the
00037     MoinMoin current API.
00038     """
00039 
00040     # Enable caching
00041     caching = 1
00042     Dependencies = Dependencies
00043     quickhelp = _(u"""\
00044  Emphasis:: <<Verbatim(//)>>''italics''<<Verbatim(//)>>; <<Verbatim(**)>>'''bold'''<<Verbatim(**)>>; <<Verbatim(**//)>>'''''bold italics'''''<<Verbatim(//**)>>; <<Verbatim(//)>>''mixed ''<<Verbatim(**)>>'''''bold'''<<Verbatim(**)>> and italics''<<Verbatim(//)>>;
00045  Horizontal Rule:: <<Verbatim(----)>>
00046  Force Linebreak:: <<Verbatim(\\\\)>>
00047  Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
00048  Lists:: * bullets; ** sub-bullets; # numbered items; ## numbered sub items.
00049  Links:: <<Verbatim([[target]])>>; <<Verbatim([[target|linktext]])>>.
00050  Tables:: |= header text | cell text | more cell text |;
00051 
00052 (!) For more help, see HelpOnEditing or HelpOnCreoleSyntax.
00053 """)
00054 
00055     def __init__(self, raw, request, **kw):
00056         """Create a minimal Parser object with required attributes."""
00057 
00058         self.request = request
00059         self.form = request.form
00060         self.raw = raw
00061 
00062     def format(self, formatter):
00063         """Create and call the true parser and emitter."""
00064 
00065         document = CreoleParser(self.raw).parse()
00066         result = Emitter(document, formatter, self.request, Macro(self)).emit()
00067         self.request.write(result)
00068 
00069 class Rules:
00070     # For the link targets:
00071     proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc'
00072     extern = r'(?P<extern_addr>(?P<extern_proto>%s):.*)' % proto
00073     attach = r'''
00074             (?P<attach_scheme> attachment | drawing | image ):
00075             (?P<attach_addr> .* )
00076         '''
00077     interwiki = r'''
00078             (?P<inter_wiki> [A-Z][a-zA-Z]+ ) :
00079             (?P<inter_page> .* )
00080         '''
00081     page = r'(?P<page_name> .* )'
00082 
00083 
00084 class Emitter:
00085     """
00086     Generate the output for the document
00087     tree consisting of DocNodes.
00088     """
00089 
00090     addr_re = re.compile('|'.join([
00091             Rules.extern,
00092             Rules.attach,
00093             Rules.interwiki,
00094             Rules.page
00095         ]), re.X | re.U) # for addresses
00096 
00097     def __init__(self, root, formatter, request, macro):
00098         self.root = root
00099         self.formatter = formatter
00100         self.request = request
00101         self.form = request.form
00102         self.macro = macro
00103 
00104     def get_text(self, node):
00105         """Try to emit whatever text is in the node."""
00106 
00107         try:
00108             return node.children[0].content or ''
00109         except:
00110             return node.content or ''
00111 
00112     # *_emit methods for emitting nodes of the document:
00113 
00114     def document_emit(self, node):
00115         return self.emit_children(node)
00116 
00117     def text_emit(self, node):
00118         return self.formatter.text(node.content or '')
00119 
00120     def separator_emit(self, node):
00121         return self.formatter.rule()
00122 
00123     def paragraph_emit(self, node):
00124         return ''.join([
00125             self.formatter.paragraph(1),
00126             self.emit_children(node),
00127             self.formatter.paragraph(0),
00128         ])
00129 
00130     def bullet_list_emit(self, node):
00131         return ''.join([
00132             self.formatter.bullet_list(1),
00133             self.emit_children(node),
00134             self.formatter.bullet_list(0),
00135         ])
00136 
00137     def number_list_emit(self, node):
00138         return ''.join([
00139             self.formatter.number_list(1),
00140             self.emit_children(node),
00141             self.formatter.number_list(0),
00142         ])
00143 
00144     def list_item_emit(self, node):
00145         return ''.join([
00146             self.formatter.listitem(1),
00147             self.emit_children(node),
00148             self.formatter.listitem(0),
00149         ])
00150 
00151 # Not used
00152 #    def definition_list_emit(self, node):
00153 #        return ''.join([
00154 #            self.formatter.definition_list(1),
00155 #            self.emit_children(node),
00156 #            self.formatter.definition_list(0),
00157 #        ])
00158 
00159 # Not used
00160 #    def term_emit(self, node):
00161 #        return ''.join([
00162 #            self.formatter.definition_term(1),
00163 #            self.emit_children(node),
00164 #            self.formatter.definition_term(0),
00165 #        ])
00166 
00167 # Not used
00168 #    def definition_emit(self, node):
00169 #        return ''.join([
00170 #            self.formatter.definition_desc(1),
00171 #            self.emit_children(node),
00172 #            self.formatter.definition_desc(0),
00173 #        ])
00174 
00175     def table_emit(self, node):
00176         return ''.join([
00177             self.formatter.table(1, attrs=getattr(node, 'attrs', '')),
00178             self.emit_children(node),
00179             self.formatter.table(0),
00180         ])
00181 
00182     def table_row_emit(self, node):
00183         return ''.join([
00184             self.formatter.table_row(1, attrs=getattr(node, 'attrs', '')),
00185             self.emit_children(node),
00186             self.formatter.table_row(0),
00187         ])
00188 
00189     def table_cell_emit(self, node):
00190         return ''.join([
00191             self.formatter.table_cell(1, attrs=getattr(node, 'attrs', '')),
00192             self.emit_children(node),
00193             self.formatter.table_cell(0),
00194         ])
00195 
00196     def table_head_emit(self, node):
00197         return ''.join([
00198             self.formatter.rawHTML('<th>'),
00199             self.emit_children(node),
00200             self.formatter.rawHTML('</th>'),
00201         ])
00202 
00203     def emphasis_emit(self, node):
00204         return ''.join([
00205             self.formatter.emphasis(1),
00206             self.emit_children(node),
00207             self.formatter.emphasis(0),
00208         ])
00209 
00210 # Not used
00211 #    def quote_emit(self, node):
00212 #        return ''.join([
00213 #            self.formatter.rawHTML('<q>'),
00214 #            self.emit_children(node),
00215 #            self.formatter.rawHTML('</q>'),
00216 #        ])
00217 
00218     def strong_emit(self, node):
00219         return ''.join([
00220             self.formatter.strong(1),
00221             self.emit_children(node),
00222             self.formatter.strong(0),
00223         ])
00224 
00225 # Not used
00226 #    def smiley_emit(self, node):
00227 #        return self.formatter.smiley(node.content)
00228 
00229     def header_emit(self, node):
00230         text = self.get_text(node)
00231         return ''.join([
00232             self.formatter.heading(1, node.level, id=text),
00233             self.formatter.text(text),
00234             self.formatter.heading(0, node.level),
00235         ])
00236 
00237     def code_emit(self, node):
00238 # XXX The current formatter will replace all spaces with &nbsp;, so we need
00239 # to use rawHTML instead, until that is fixed.
00240 #        return ''.join([
00241 #            self.formatter.code(1),
00242 #            self.formatter.text(node.content or ''),
00243 #            self.formatter.code(0),
00244 #        ])
00245         return ''.join([
00246             self.formatter.rawHTML('<tt>'),
00247             self.formatter.text(node.content or ''),
00248             self.formatter.rawHTML('</tt>'),
00249         ])
00250 
00251 # Not used
00252 #    def abbr_emit(self, node):
00253 #        return ''.join([
00254 #            self.formatter.rawHTML('<abbr title="%s">' % node.title),
00255 #            self.formatter.text(node.content or ''),
00256 #            self.formatter.rawHTML('</abbr>'),
00257 #        ])
00258 
00259     def link_emit(self, node):
00260         target = node.content
00261         m = self.addr_re.match(target)
00262         if m:
00263             if m.group('page_name'):
00264                 # link to a page
00265                 word = m.group('page_name')
00266                 if word.startswith(wikiutil.PARENT_PREFIX):
00267                     word = word[wikiutil.PARENT_PREFIX_LEN:]
00268                 elif word.startswith(wikiutil.CHILD_PREFIX):
00269                     word = "%s/%s" % (self.formatter.page.page_name,
00270                         word[wikiutil.CHILD_PREFIX_LEN:])
00271                 word, anchor = wikiutil.split_anchor(word)
00272                 return ''.join([
00273                     self.formatter.pagelink(1, word, anchor=anchor),
00274                     self.emit_children(node) or self.formatter.text(target),
00275                     self.formatter.pagelink(0, word),
00276                 ])
00277             elif m.group('extern_addr'):
00278                 # external link
00279                 address = m.group('extern_addr')
00280                 proto = m.group('extern_proto')
00281                 return ''.join([
00282                     self.formatter.url(1, address, css=proto),
00283                     self.emit_children(node) or self.formatter.text(target),
00284                     self.formatter.url(0),
00285                 ])
00286             elif m.group('inter_wiki'):
00287                 # interwiki link
00288                 wiki = m.group('inter_wiki')
00289                 page = m.group('inter_page')
00290                 page, anchor = wikiutil.split_anchor(page)
00291                 return ''.join([
00292                     self.formatter.interwikilink(1, wiki, page, anchor=anchor),
00293                     self.emit_children(node) or self.formatter.text(page),
00294                     self.formatter.interwikilink(0),
00295                 ])
00296             elif m.group('attach_scheme'):
00297                 # link to an attachment
00298                 scheme = m.group('attach_scheme')
00299                 attachment = m.group('attach_addr')
00300                 url = wikiutil.url_unquote(attachment)
00301                 text = self.get_text(node)
00302                 return ''.join([
00303                         self.formatter.attachment_link(1, url),
00304                         self.formatter.text(text),
00305                         self.formatter.attachment_link(0)
00306                     ])
00307         return "".join(["[[", self.formatter.text(target), "]]"])
00308 
00309 # Not used
00310 #    def anchor_link_emit(self, node):
00311 #        return ''.join([
00312 #            self.formatter.url(1, node.content, css='anchor'),
00313 #            self.emit_children(node),
00314 #            self.formatter.url(0),
00315 #        ])
00316 
00317     def image_emit(self, node):
00318         target = node.content
00319         text = self.get_text(node)
00320         m = self.addr_re.match(target)
00321         if m:
00322             if m.group('page_name'):
00323                 # inserted anchors
00324                 url = wikiutil.url_unquote(target)
00325                 if target.startswith('#'):
00326                     return self.formatter.anchordef(url[1:])
00327                 # default to images
00328                 return self.formatter.attachment_image(
00329                     url, alt=text, html_class='image')
00330             elif m.group('extern_addr'):
00331                 # external link
00332                 address = m.group('extern_addr')
00333                 proto = m.group('extern_proto')
00334                 url = wikiutil.url_unquote(address)
00335                 return self.formatter.image(
00336                     src=url, alt=text, html_class='external_image')
00337             elif m.group('attach_scheme'):
00338                 # link to an attachment
00339                 scheme = m.group('attach_scheme')
00340                 attachment = m.group('attach_addr')
00341                 url = wikiutil.url_unquote(attachment)
00342                 if scheme == 'image':
00343                     return self.formatter.attachment_image(
00344                         url, alt=text, html_class='image')
00345                 elif scheme == 'drawing':
00346                     url = wikiutil.drawing2fname(url)
00347                     return self.formatter.attachment_drawing(url, text, alt=text)
00348                 else:
00349                     pass
00350             elif m.group('inter_wiki'):
00351                 # interwiki link
00352                 pass
00353 #        return "".join(["{{", self.formatter.text(target), "}}"])
00354         url = wikiutil.url_unquote(node.content)
00355         return self.formatter.attachment_inlined(url, text)
00356 
00357 # Not used
00358 #    def drawing_emit(self, node):
00359 #        url = wikiutil.url_unquote(node.content)
00360 #        text = self.get_text(node)
00361 #        return self.formatter.attachment_drawing(url, text)
00362 
00363 # Not used
00364 #    def figure_emit(self, node):
00365 #        text = self.get_text(node)
00366 #        url = wikiutil.url_unquote(node.content)
00367 #        return ''.join([
00368 #            self.formatter.rawHTML('<div class="figure">'),
00369 #            self.get_image(url, text), self.emit_children(node),
00370 #            self.formatter.rawHTML('</div>'),
00371 #        ])
00372 
00373 # Not used
00374 #    def bad_link_emit(self, node):
00375 #        return self.formatter.text(''.join([
00376 #            '[[',
00377 #            node.content or '',
00378 #            ']]',
00379 #        ]))
00380 
00381     def macro_emit(self, node):
00382         macro_name = node.content
00383         args = node.args
00384         return self.formatter.macro(self.macro, macro_name, args)
00385 
00386 # Not used
00387 #    def section_emit(self, node):
00388 #        return ''.join([
00389 #            self.formatter.rawHTML(
00390 #                '<div class="%s" style="%s">' % (node.sect, node.style)),
00391 #            self.emit_children(node),
00392 #            self.formatter.rawHTML('</div>'),
00393 #        ])
00394 
00395     def break_emit(self, node):
00396         return self.formatter.linebreak(preformatted=0)
00397 
00398 # Not used
00399 #    def blockquote_emit(self, node):
00400 #        return ''.join([
00401 #            self.formatter.rawHTML('<blockquote>'),
00402 #            self.emit_children(node),
00403 #            self.formatter.rawHTML('</blockquote>'),
00404 #        ])
00405 
00406     def preformatted_emit(self, node):
00407         parser_name = getattr(node, 'sect', '')
00408         if parser_name:
00409             # The formatter.parser will *sometimes* just return the result
00410             # and *sometimes* try to write it directly. We need to take both
00411             # cases into account!
00412             lines = node.content.split(u'\n')
00413             buf = StringIO.StringIO()
00414             try:
00415                 try:
00416                     self.request.redirect(buf)
00417                     ret = self.formatter.parser(parser_name, lines)
00418                 finally:
00419                     self.request.redirect()
00420                 buf.flush()
00421                 writ = buf.getvalue()
00422                 buf.close()
00423                 return ret + writ
00424             except wikiutil.PluginMissingError:
00425                 pass
00426         return ''.join([
00427             self.formatter.preformatted(1),
00428             self.formatter.text(node.content),
00429             self.formatter.preformatted(0),
00430         ])
00431 
00432     def default_emit(self, node):
00433         """Fallback function for emitting unknown nodes."""
00434 
00435         return ''.join([
00436             self.formatter.preformatted(1),
00437             self.formatter.text('<%s>\n' % node.kind),
00438             self.emit_children(node),
00439             self.formatter.preformatted(0),
00440         ])
00441 
00442     def emit_children(self, node):
00443         """Emit all the children of a node."""
00444 
00445         return ''.join([self.emit_node(child) for child in node.children])
00446 
00447     def emit_node(self, node):
00448         """Emit a single node."""
00449 
00450         emit = getattr(self, '%s_emit' % node.kind, self.default_emit)
00451         return emit(node)
00452 
00453     def emit(self):
00454         """Emit the document represented by self.root DOM tree."""
00455 
00456         # Try to disable 'smart' formatting if possible
00457         magic_save = getattr(self.formatter, 'no_magic', False)
00458         self.formatter.no_magic = True
00459         output = '\n'.join([
00460             self.emit_node(self.root),
00461         ])
00462         # restore 'smart' formatting if it was set
00463         self.formatter.no_magic = magic_save
00464         return output
00465 
00466 del _