Back to index

moin  1.9.0~rc2
_conv160a_wiki.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - convert content in 1.6.0alpha (rev 1844: 58ebb64243cc) wiki markup to 1.6.0 style
00004                by using a modified 1.6.0alpha parser as translator.
00005 
00006     PLEASE NOTE: most moin users will never need to execute this code,
00007                  because it is just for users of 1.6.0alpha version,
00008                  that used modified link markup, but was never released.
00009                  The 1.5.x/1.6.x releases use a different link markup than 1.6.0a.
00010 
00011     @copyright: 2007 MoinMoin:JohannesBerg,
00012                 2007-2009 MoinMoin:ThomasWaldmann
00013     @license: GNU GPL, see COPYING for details.
00014 """
00015 
00016 import re
00017 
00018 from MoinMoin import i18n
00019 i18n.wikiLanguages = lambda: {}
00020 
00021 from MoinMoin import config, macro, wikiutil
00022 from MoinMoin.action import AttachFile
00023 from MoinMoin.Page import Page
00024 from MoinMoin.support.python_compatibility import rsplit
00025 
00026 import wikiutil160a
00027 from text_moin160a_wiki import Parser
00028 
00029 QUOTE_CHARS = u"'\""
00030 
00031 def convert_wiki(request, pagename, intext, renames):
00032     """ Convert content written in wiki markup """
00033     noeol = False
00034     if not intext.endswith('\r\n'):
00035         intext += '\r\n'
00036         noeol = True
00037     c = Converter(request, pagename, intext, renames)
00038     result = request.redirectedOutput(c.convert, request)
00039     if noeol and result.endswith('\r\n'):
00040         result = result[:-2]
00041     return result
00042 
00043 
00044 STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
00045 
00046 # copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
00047 # ... and slightly modified/refactored for our needs here.
00048 # hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
00049 #       quoted urls when they contain a '=' char in the 1.5 data input.
00050 def explore_args(args):
00051     """ explore args for positional and keyword parameters """
00052     if args:
00053         args = args.split(',')
00054         args = [arg.strip() for arg in args]
00055     else:
00056         args = []
00057 
00058     kw_count = 0
00059     kw = {} # keyword args
00060     pp = [] # positional parameters
00061 
00062     kwAllowed = ('width', 'height', 'alt')
00063 
00064     for arg in args:
00065         if '=' in arg:
00066             key, value = arg.split('=', 1)
00067             key_lowerstr = str(key.lower())
00068             # avoid that urls with "=" are interpreted as keyword
00069             if key_lowerstr in kwAllowed:
00070                 kw_count += 1
00071                 kw[key_lowerstr] = value
00072             elif not kw_count and '://' in arg:
00073                 # assuming that this is the image
00074                 pp.append(arg)
00075         else:
00076             pp.append(arg)
00077 
00078     if STONEAGE_IMAGELINK and len(pp) >= 2:
00079         pp[0], pp[1] = pp[1], pp[0]
00080 
00081     return pp, kw
00082 
00083 
00084 class Converter(Parser):
00085     def __init__(self, request, pagename, raw, renames):
00086         self.pagename = pagename
00087         self.raw = raw
00088         self.renames = renames
00089         self.request = request
00090         self._ = None
00091         self.in_pre = 0
00092 
00093         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
00094 
00095     # no change
00096     def return_word(self, word):
00097         return word
00098     _emph_repl = return_word
00099     _emph_ibb_repl = return_word
00100     _emph_ibi_repl = return_word
00101     _emph_ib_or_bi_repl = return_word
00102     _u_repl = return_word
00103     _strike_repl = return_word
00104     _sup_repl = return_word
00105     _sub_repl = return_word
00106     _small_repl = return_word
00107     _big_repl = return_word
00108     _tt_repl = return_word
00109     _tt_bt_repl = return_word
00110     _remark_repl = return_word
00111     _table_repl = return_word
00112     _tableZ_repl = return_word
00113     _rule_repl = return_word
00114     _smiley_repl = return_word
00115     _smileyA_repl = return_word
00116     _ent_repl = return_word
00117     _ent_numeric_repl = return_word
00118     _ent_symbolic_repl = return_word
00119     _heading_repl = return_word
00120     _email_repl = return_word
00121     _notword_repl = return_word
00122     _indent_repl = return_word
00123     _li_none_repl = return_word
00124     _li_repl = return_word
00125     _ol_repl = return_word
00126     _dl_repl = return_word
00127     _comment_repl = return_word
00128 
00129     # translate pagenames using pagename translation map
00130 
00131     def _replace(self, key):
00132         """ replace a item_name if it is in the renames dict
00133             key is either a 2-tuple ('PAGE', pagename)
00134             or a 3-tuple ('FILE', pagename, filename)
00135         """
00136         current_page = self.pagename
00137         item_type, page_name, file_name = (key + (None, ))[:3]
00138         abs_page_name = wikiutil.AbsPageName(current_page, page_name)
00139         if item_type == 'PAGE':
00140             key = (item_type, abs_page_name)
00141             new_name = self.renames.get(key)
00142             if new_name is None:
00143                 # we don't have an entry in rename map - apply the same magic
00144                 # to the page name as 1.5 did (" " -> "_") and try again:
00145                 abs_magic_name = abs_page_name.replace(u' ', u'_')
00146                 key = (item_type, abs_magic_name)
00147                 new_name = self.renames.get(key)
00148                 if new_name is None:
00149                     # we didn't find it under the magic name either -
00150                     # that means we do not rename it!
00151                     new_name = page_name
00152             if new_name != page_name and abs_page_name != page_name:
00153                 # we have to fix the (absolute) new_name to be a relative name (as it was before)
00154                 new_name = wikiutil.RelPageName(current_page, new_name)
00155         elif item_type == 'FILE':
00156             key = (item_type, abs_page_name, file_name)
00157             new_name = self.renames.get(key)
00158             if new_name is None:
00159                 # we don't have an entry in rename map - apply the same magic
00160                 # to the page name as 1.5 did (" " -> "_") and try again:
00161                 abs_magic_name = abs_page_name.replace(u' ', u'_')
00162                 key = (item_type, abs_magic_name, file_name)
00163                 new_name = self.renames.get(key)
00164                 if new_name is None:
00165                     # we didn't find it under the magic name either -
00166                     # that means we do not rename it!
00167                     new_name = file_name
00168         return new_name
00169 
00170     def _replace_target(self, target):
00171         target_and_anchor = rsplit(target, '#', 1)
00172         if len(target_and_anchor) > 1:
00173             target, anchor = target_and_anchor
00174             target = self._replace(('PAGE', target))
00175             return '%s#%s' % (target, anchor)
00176         else:
00177             target = self._replace(('PAGE', target))
00178             return target
00179 
00180     # markup conversion
00181 
00182     def _macro_repl(self, word):
00183         # we use [[...]] for links now, macros will be <<...>>
00184         macro_rule = ur"""
00185             \[\[
00186             (?P<macro_name>\w+)
00187             (\((?P<macro_args>.*?)\))?
00188             \]\]
00189         """
00190         word = unicode(word) # XXX why is word not unicode before???
00191         m = re.match(macro_rule, word, re.X|re.U)
00192         macro_name = m.group('macro_name')
00193         macro_args = m.group('macro_args')
00194         if macro_name == 'ImageLink':
00195             fixed, kw = explore_args(macro_args)
00196             #print "macro_args=%r" % macro_args
00197             #print "fixed=%r, kw=%r" % (fixed, kw)
00198             image, target = (fixed + ['', ''])[:2]
00199             if image is None:
00200                 image = ''
00201             if target is None:
00202                 target = ''
00203             if '://' not in image:
00204                 # if it is not a URL, it is meant as attachment
00205                 image = u'attachment:%s' % image
00206             if not target:
00207                 target = image
00208             elif target.startswith('inline:'):
00209                 target = 'attachment:' + target[7:] # we don't support inline:
00210             elif target.startswith('wiki:'):
00211                 target = target[5:] # drop wiki:
00212             image_attrs = []
00213             alt = kw.get('alt') or ''
00214             width = kw.get('width')
00215             if width is not None:
00216                 image_attrs.append(u"width=%s" % width)
00217             height = kw.get('height')
00218             if height is not None:
00219                 image_attrs.append(u"height=%s" % height)
00220             image_attrs = u", ".join(image_attrs)
00221             if image_attrs:
00222                 image_attrs = u'|' + image_attrs
00223             if alt or image_attrs:
00224                 alt = u'|' + alt
00225             result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
00226         else:
00227             if macro_args:
00228                 macro_args = u"(%s)" % macro_args
00229             else:
00230                 macro_args = u''
00231             result = u"<<%s%s>>" % (macro_name, macro_args)
00232         # XXX later check whether some to be renamed pagename is used as macro param
00233         return result
00234 
00235     def _word_repl(self, word, text=None):
00236         """Handle WikiNames."""
00237         if not text:
00238             if wikiutil.isStrictWikiname(word):
00239                 return word
00240             else:
00241                 return '[[%s]]' % word
00242         else: # internal use:
00243             return '[[%s|%s]]' % (word, text)
00244 
00245     def _wikiname_bracket_repl(self, text):
00246         """Handle special-char wikinames with link text, like:
00247            ["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]
00248         """
00249         word = text[1:-1] # strip brackets
00250         first_char = word[0]
00251         if first_char in QUOTE_CHARS:
00252             # split on closing quote
00253             target, linktext = word[1:].split(first_char, 1)
00254         else: # not quoted
00255             # split on whitespace
00256             target, linktext = word.split(None, 1)
00257         if target:
00258             target = self._replace(('PAGE', target))
00259             linktext = linktext.strip()
00260             if linktext and linktext != target:
00261                 return '[[%s|%s]]' % (target, linktext)
00262             else:
00263                 return '[[%s]]' % target
00264         else:
00265             return text
00266 
00267     def _interwiki_repl(self, word):
00268         """Handle InterWiki links."""
00269         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
00270         if wikitag_bad:
00271             return word
00272         else:
00273             return self.interwiki("wiki:" + word)
00274 
00275     def interwiki(self, target_and_text, **kw):
00276         scheme, rest = target_and_text.split(':', 1)
00277         wikiname, pagename, text = wikiutil160a.split_wiki(rest)
00278 
00279         #if (pagename.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
00280         #    Page(self.request, pagename).exists()): # fancy link to local page [wiki:LocalPage text]
00281         #    # XXX OtherWiki:FooPage markup -> checks for local FooPage -sense???
00282         #    pagename = wikiutil.url_unquote(pagename)
00283         #    pagename = self._replace_target(pagename)
00284         #    return '[[%s%s]]' % (pagename, text)
00285 
00286         if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
00287             orig_pagename = pagename
00288             pagename = wikiutil.url_unquote(pagename)
00289             pagename = self._replace_target(pagename)
00290             camelcase = wikiutil.isStrictWikiname(pagename)
00291             if camelcase and (not text or text == orig_pagename):
00292                 return pagename # optimize special case
00293             else:
00294                 if text:
00295                     text = '|' + text
00296                 return '[[%s%s]]' % (pagename, text)
00297 
00298         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, wikiname+':')
00299         if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
00300             pagename = wikiutil.url_unquote(pagename)
00301             pagename = self._replace_target(pagename)
00302             wikitail = pagename
00303         else: # good
00304             wikitail = wikiutil.url_unquote(pagename)
00305 
00306         # link to self?
00307         if wikiutil.isPicture(wikitail):
00308             return '{{%s:%s%s}}' % (wikitag, wikitail, text)
00309         else:
00310             if ' ' not in wikitail and not text:
00311                 return '%s:%s' % (wikitag, wikitail)
00312             else:
00313                 if text:
00314                     text = '|' + text
00315                 return '[[%s:%s%s]]' % (wikitag, wikitail, text)
00316 
00317     def attachment(self, target_and_text, **kw):
00318         """ This gets called on attachment URLs """
00319         _ = self._
00320         scheme, fname, text = wikiutil160a.split_wiki(target_and_text)
00321 
00322         pagename, fname = AttachFile.absoluteName(fname, self.pagename)
00323         from_this_page = pagename == self.pagename
00324         fname = self._replace(('FILE', pagename, fname))
00325         #fname = wikiutil.url_unquote(fname)
00326         #fname = self._replace(('FILE', pagename, fname))
00327         pagename = self._replace(('PAGE', pagename))
00328         if from_this_page:
00329             name = fname
00330         else:
00331             name = "%s/%s" % (pagename, fname)
00332 
00333         fn_txt = name
00334         if text:
00335             fn_txt += '|' + text
00336 
00337         if scheme == 'drawing':
00338             return "{{drawing:%s}}" % fn_txt
00339 
00340         # check for image, and possibly return IMG tag (images are always inlined)
00341         if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
00342             return "{{attachment:%s}}" % fn_txt
00343 
00344         # inline the attachment
00345         if scheme == 'inline':
00346             return '{{attachment:%s}}' % fn_txt
00347 
00348         return '[[attachment:%s]]' % fn_txt
00349 
00350     def _url_repl(self, word):
00351         """Handle literal URLs including inline images."""
00352         scheme = word.split(":", 1)[0]
00353 
00354         if scheme == 'wiki':
00355             return self.interwiki(word)
00356         if scheme in self.attachment_schemas:
00357             return '%s' % self.attachment(word)
00358 
00359         if wikiutil.isPicture(word): # magic will go away in 1.6!
00360             return '{{%s}}' % word # new markup for inline images
00361         else:
00362             return word
00363 
00364 
00365     def _url_bracket_repl(self, word):
00366         """Handle bracketed URLs."""
00367         word = word[1:-1] # strip brackets
00368 
00369         # Local extended link? [:page name:link text] XXX DEPRECATED
00370         if word[0] == ':':
00371             words = word[1:].split(':', 1)
00372             link, text = (words + ['', ''])[:2]
00373             if link.strip() == text.strip():
00374                 text = ''
00375             link = self._replace_target(link)
00376             if text:
00377                 text = '|' + text
00378             return '[[%s%s]]' % (link, text)
00379 
00380         scheme_and_rest = word.split(":", 1)
00381         if len(scheme_and_rest) == 1: # no scheme
00382             # Traditional split on space
00383             words = word.split(None, 1)
00384             if words[0].startswith('#'): # anchor link
00385                 link, text = (words + ['', ''])[:2]
00386                 if link.strip() == text.strip():
00387                     text = ''
00388                 if text:
00389                     text = '|' + text
00390                 return '[[%s%s]]' % (link, text)
00391         else:
00392             scheme = scheme_and_rest[0]
00393             if scheme == "wiki":
00394                 return self.interwiki(word, pretty_url=1)
00395             if scheme in self.attachment_schemas:
00396                 m = self.attachment(word)
00397                 if scheme == 'attachment':
00398                     # with url_bracket markup, 1.6.0a parser does not embed pictures, but link!
00399                     return '[[%s]]' % m[2:-2]
00400                 else:
00401                     # drawing and inline
00402                     return m
00403 
00404             words = word.split(None, 1)
00405             if len(words) == 1:
00406                 words = words * 2
00407 
00408         target, text = words
00409         if wikiutil.isPicture(text) and re.match(self.url_rule, text):
00410             return '[[%s|{{%s}}]]' % (target, text)
00411         else:
00412             if target == text:
00413                 return '[[%s]]' % target
00414             else:
00415                 return '[[%s|%s]]' % (target, text)
00416 
00417     def _parser_repl(self, word):
00418         self.in_pre = 'no_parser'
00419         return word
00420 
00421     def _pre_repl(self, word):
00422         w = word.strip()
00423         if w == '{{{' and not self.in_pre:
00424             self.in_pre = 'no_parser'
00425         elif w == '}}}' and self.in_pre:
00426             self.in_pre = None
00427         return word
00428 
00429     def scan(self, scan_re, line):
00430         """ Scans one line - append text before match, invoke replace() with match, and add text after match.  """
00431         result = []
00432         lastpos = 0
00433 
00434         for match in scan_re.finditer(line):
00435             # Add text before the match
00436             if lastpos < match.start():
00437                 result.append(line[lastpos:match.start()])
00438             # Replace match with markup
00439             result.append(self.replace(match))
00440             lastpos = match.end()
00441 
00442         # Add remainder of the line
00443         result.append(line[lastpos:])
00444         return u''.join(result)
00445 
00446 
00447     def replace(self, match):
00448         """ Replace match using type name """
00449         result = []
00450         for _type, hit in match.groupdict().items():
00451             if hit is not None and not _type in ["hmarker", ]:
00452                 # Get replace method and replace hit
00453                 replace = getattr(self, '_' + _type + '_repl')
00454                 # print _type, hit
00455                 result.append(replace(hit))
00456                 return ''.join(result)
00457         else:
00458             # We should never get here
00459             import pprint
00460             raise Exception("Can't handle match %r\n%s\n%s" % (
00461                 match,
00462                 pprint.pformat(match.groupdict()),
00463                 pprint.pformat(match.groups()),
00464             ))
00465 
00466         return ""
00467 
00468     def convert(self, request):
00469         """ For each line, scan through looking for magic
00470             strings, outputting verbatim any intervening text.
00471         """
00472         self.request = request
00473         # prepare regex patterns
00474         rules = self.formatting_rules.replace('\n', '|')
00475         if self.request.cfg.bang_meta:
00476             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
00477                 'word_rule': self.word_rule,
00478                 'rules': rules,
00479             }
00480         pre_rules = r'''(?P<pre>\}\}\})'''
00481         pre_scan_re = re.compile(pre_rules, re.UNICODE)
00482         scan_re = re.compile(rules, re.UNICODE)
00483         eol_re = re.compile(r'\r?\n', re.UNICODE)
00484 
00485         rawtext = self.raw
00486 
00487         # remove last item because it's guaranteed to be empty
00488         self.lines = eol_re.split(rawtext)[:-1]
00489         self.in_processing_instructions = True
00490 
00491         # Main loop
00492         for line in self.lines:
00493             # ignore processing instructions
00494             if self.in_processing_instructions:
00495                 found = False
00496                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
00497                            "#pragma", "#form", "#acl", "#language"):
00498                     if line.lower().startswith(pi):
00499                         self.request.write(line + '\r\n')
00500                         found = True
00501                         break
00502                 if not found:
00503                     self.in_processing_instructions = False
00504                 else:
00505                     continue # do not parse this line
00506             if not line.strip():
00507                 self.request.write(line + '\r\n')
00508             else:
00509                 # Scan line, format and write
00510                 scanning_re = self.in_pre and pre_scan_re or scan_re
00511                 formatted_line = self.scan(scanning_re, line)
00512                 self.request.write(formatted_line + '\r\n')
00513