Back to index

moin  1.9.0~rc2
_conv160_wiki.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - convert content in 1.5.8 wiki markup to 1.6.0 style
00004                by using a modified 1.5.8 parser as translator.
00005 
00006     Assuming we have this "renames" map:
00007     -------------------------------------------------------
00008     'PAGE', 'some_page'        -> 'some page'
00009     'FILE', 'with%20blank.txt' -> 'with blank.txt'
00010 
00011     Markup transformations needed:
00012     -------------------------------------------------------
00013     ["some_page"]           -> [[some page]] # renamed
00014     [:some_page:some text]  -> [[some page|some text]]
00015     [:page:text]            -> [[page|text]]
00016                                (with a page not being renamed)
00017 
00018     attachment:with%20blank.txt -> [[attachment:with blank.txt]]
00019     attachment:some_page/with%20blank.txt -> [[attachment:some page/with blank.txt]]
00020     The attachment processing should also urllib.unquote the filename (or at
00021     least replace %20 by space) and put it into "quotes" if it contains spaces.
00022 
00023     @copyright: 2007 MoinMoin:JohannesBerg,
00024                 2007 MoinMoin:ThomasWaldmann
00025     @license: GNU GPL, see COPYING for details.
00026 """
00027 
00028 import re
00029 
00030 from MoinMoin import i18n
00031 i18n.wikiLanguages = lambda: {}
00032 
00033 from MoinMoin import config, wikiutil, macro
00034 from MoinMoin.action import AttachFile
00035 from MoinMoin.Page import Page
00036 from MoinMoin.support.python_compatibility import rsplit
00037 
00038 from text_moin158_wiki import Parser
00039 
00040 def convert_wiki(request, pagename, intext, renames):
00041     """ Convert content written in wiki markup """
00042     noeol = False
00043     if not intext.endswith('\r\n'):
00044         intext += '\r\n'
00045         noeol = True
00046     c = Converter(request, pagename, intext, renames)
00047     result = request.redirectedOutput(c.convert, request)
00048     if noeol and result.endswith('\r\n'):
00049         result = result[:-2]
00050     return result
00051 
00052 
00053 STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
00054 
00055 # copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
00056 # ... and slightly modified/refactored for our needs here.
00057 # hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
00058 #       quoted urls when they contain a '=' char in the 1.5 data input.
00059 def explore_args(args):
00060     """ explore args for positional and keyword parameters """
00061     if args:
00062         args = args.split(',')
00063         args = [arg.strip() for arg in args]
00064     else:
00065         args = []
00066 
00067     kw_count = 0
00068     kw = {} # keyword args
00069     pp = [] # positional parameters
00070 
00071     kwAllowed = ('width', 'height', 'alt')
00072 
00073     for arg in args:
00074         if '=' in arg:
00075             key, value = arg.split('=', 1)
00076             key_lowerstr = str(key.lower())
00077             # avoid that urls with "=" are interpreted as keyword
00078             if key_lowerstr in kwAllowed:
00079                 kw_count += 1
00080                 kw[key_lowerstr] = value
00081             elif not kw_count and '://' in arg:
00082                 # assuming that this is the image
00083                 pp.append(arg)
00084         else:
00085             pp.append(arg)
00086 
00087     if STONEAGE_IMAGELINK and len(pp) >= 2:
00088         pp[0], pp[1] = pp[1], pp[0]
00089 
00090     return pp, kw
00091 
00092 
00093 class Converter(Parser):
00094     def __init__(self, request, pagename, raw, renames):
00095         self.pagename = pagename
00096         self.raw = raw
00097         self.renames = renames
00098         self.request = request
00099         self._ = None
00100         self.in_pre = 0
00101 
00102         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
00103 
00104     # no change
00105     def return_word(self, word):
00106         return word
00107     _emph_repl = return_word
00108     _emph_ibb_repl = return_word
00109     _emph_ibi_repl = return_word
00110     _emph_ib_or_bi_repl = return_word
00111     _u_repl = return_word
00112     _strike_repl = return_word
00113     _sup_repl = return_word
00114     _sub_repl = return_word
00115     _small_repl = return_word
00116     _big_repl = return_word
00117     _tt_repl = return_word
00118     _tt_bt_repl = return_word
00119     _remark_repl = return_word
00120     _table_repl = return_word
00121     _tableZ_repl = return_word
00122     _rule_repl = return_word
00123     _smiley_repl = return_word
00124     _smileyA_repl = return_word
00125     _ent_repl = return_word
00126     _ent_numeric_repl = return_word
00127     _ent_symbolic_repl = return_word
00128     _heading_repl = return_word
00129     _email_repl = return_word
00130     _notword_repl = return_word
00131     _indent_repl = return_word
00132     _li_none_repl = return_word
00133     _li_repl = return_word
00134     _ol_repl = return_word
00135     _dl_repl = return_word
00136     _comment_repl = return_word
00137 
00138     # translate pagenames using pagename translation map
00139 
00140     def _replace(self, key):
00141         """ replace a item_name if it is in the renames dict
00142             key is either a 2-tuple ('PAGE', pagename)
00143             or a 3-tuple ('FILE', pagename, filename)
00144         """
00145         current_page = self.pagename
00146         item_type, page_name, file_name = (key + (None, ))[:3]
00147         abs_page_name = wikiutil.AbsPageName(current_page, page_name)
00148         if item_type == 'PAGE':
00149             key = (item_type, abs_page_name)
00150             new_name = self.renames.get(key)
00151             if new_name is None:
00152                 # we don't have an entry in rename map - apply the same magic
00153                 # to the page name as 1.5 did (" " -> "_") and try again:
00154                 abs_magic_name = abs_page_name.replace(u' ', u'_')
00155                 key = (item_type, abs_magic_name)
00156                 new_name = self.renames.get(key)
00157                 if new_name is None:
00158                     # we didn't find it under the magic name either -
00159                     # that means we do not rename it!
00160                     new_name = page_name
00161             if new_name != page_name and abs_page_name != page_name:
00162                 # we have to fix the (absolute) new_name to be a relative name (as it was before)
00163                 new_name = wikiutil.RelPageName(current_page, new_name)
00164         elif item_type == 'FILE':
00165             key = (item_type, abs_page_name, file_name)
00166             new_name = self.renames.get(key)
00167             if new_name is None:
00168                 # we don't have an entry in rename map - apply the same magic
00169                 # to the page name as 1.5 did (" " -> "_") and try again:
00170                 abs_magic_name = abs_page_name.replace(u' ', u'_')
00171                 key = (item_type, abs_magic_name, file_name)
00172                 new_name = self.renames.get(key)
00173                 if new_name is None:
00174                     # we didn't find it under the magic name either -
00175                     # that means we do not rename it!
00176                     new_name = file_name
00177         return new_name
00178 
00179     def _replace_target(self, target):
00180         target_and_anchor = rsplit(target, '#', 1)
00181         if len(target_and_anchor) > 1:
00182             target, anchor = target_and_anchor
00183             target = self._replace(('PAGE', target))
00184             return '%s#%s' % (target, anchor)
00185         else:
00186             target = self._replace(('PAGE', target))
00187             return target
00188 
00189     # markup conversion
00190 
00191     def _macro_repl(self, word):
00192         # we use [[...]] for links now, macros will be <<...>>
00193         macro_rule = ur"""
00194             \[\[
00195             (?P<macro_name>\w+)
00196             (\((?P<macro_args>.*?)\))?
00197             \]\]
00198         """
00199         word = unicode(word) # XXX why is word not unicode before???
00200         m = re.match(macro_rule, word, re.X|re.U)
00201         macro_name = m.group('macro_name')
00202         macro_args = m.group('macro_args')
00203         if macro_name == 'ImageLink':
00204             fixed, kw = explore_args(macro_args)
00205             #print "macro_args=%r" % macro_args
00206             #print "fixed=%r, kw=%r" % (fixed, kw)
00207             image, target = (fixed + ['', ''])[:2]
00208             if image is None:
00209                 image = ''
00210             if target is None:
00211                 target = ''
00212             if '://' not in image:
00213                 # if it is not a URL, it is meant as attachment
00214                 image = u'attachment:%s' % image
00215             if not target:
00216                 target = image
00217             elif target.startswith('inline:'):
00218                 target = 'attachment:' + target[7:] # we don't support inline:
00219             elif target.startswith('wiki:'):
00220                 target = target[5:] # drop wiki:
00221             image_attrs = []
00222             alt = kw.get('alt') or ''
00223             width = kw.get('width')
00224             if width is not None:
00225                 image_attrs.append(u"width=%s" % width)
00226             height = kw.get('height')
00227             if height is not None:
00228                 image_attrs.append(u"height=%s" % height)
00229             image_attrs = u", ".join(image_attrs)
00230             if image_attrs:
00231                 image_attrs = u'|' + image_attrs
00232             if alt or image_attrs:
00233                 alt = u'|' + alt
00234             result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
00235         else:
00236             if macro_args:
00237                 macro_args = u"(%s)" % macro_args
00238             else:
00239                 macro_args = u''
00240             result = u"<<%s%s>>" % (macro_name, macro_args)
00241         # XXX later check whether some to be renamed pagename is used as macro param
00242         return result
00243 
00244     def _word_repl(self, word, text=None):
00245         """Handle WikiNames."""
00246         if not text:
00247             return word
00248         else: # internal use:
00249             return '[[%s|%s]]' % (word, text)
00250 
00251     def _wikiname_bracket_repl(self, word):
00252         """Handle special-char wikinames."""
00253         pagename = word[2:-2]
00254         if pagename:
00255             pagename = self._replace(('PAGE', pagename))
00256             return '[[%s]]' % pagename
00257         else:
00258             return word
00259 
00260     def _interwiki_repl(self, word):
00261         """Handle InterWiki links."""
00262         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
00263         if wikitag_bad:
00264             return word
00265         else:
00266             wikiname, pagename = word.split(':', 1)
00267             pagename = wikiutil.url_unquote(pagename) # maybe someone has used %20 for blanks in pagename
00268             camelcase = wikiutil.isStrictWikiname(pagename)
00269             if wikiname in ('Self', self.request.cfg.interwikiname):
00270                 pagename = self._replace(('PAGE', pagename))
00271                 if camelcase:
00272                     return '%s' % pagename # optimize special case
00273                 else:
00274                     return '[[%s]]' % pagename # optimize special case
00275             else:
00276                 if ' ' in pagename: # we could get a ' '  by urlunquoting
00277                     return '[[%s:%s]]' % (wikiname, pagename)
00278                 else:
00279                     return '%s:%s' % (wikiname, pagename)
00280 
00281     def interwiki(self, url_and_text):
00282         if len(url_and_text) == 1:
00283             url = url_and_text[0]
00284             text = ''
00285         else:
00286             url, text = url_and_text
00287             text = '|' + text
00288 
00289         # keep track of whether this is a self-reference, so links
00290         # are always shown even the page doesn't exist.
00291         scheme, url = url.split(':', 1)
00292         wikiname, pagename = wikiutil.split_wiki(url)
00293         if (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
00294             Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
00295             pagename = wikiutil.url_unquote(url)
00296             pagename = self._replace_target(pagename)
00297             return '[[%s%s]]' % (pagename, text)
00298         if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
00299             pagename = wikiutil.url_unquote(pagename)
00300             pagename = self._replace_target(pagename)
00301             return '[[%s%s]]' % (pagename, text)
00302 
00303         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
00304         if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
00305             pagename = wikiutil.url_unquote(pagename)
00306             pagename = self._replace_target(pagename)
00307             wikitail = pagename
00308         else: # good
00309             wikitail = wikiutil.url_unquote(wikitail)
00310 
00311         # link to self?
00312         if wikiutil.isPicture(wikitail):
00313             return '{{%s:%s%s}}' % (wikitag, wikitail, text)
00314         else:
00315             if ' ' not in wikitail and not text:
00316                 return '%s:%s' % (wikitag, wikitail)
00317             else:
00318                 return '[[%s:%s%s]]' % (wikitag, wikitail, text)
00319 
00320     def attachment(self, url_and_text):
00321         """ This gets called on attachment URLs. """
00322         if len(url_and_text) == 1:
00323             url = url_and_text[0]
00324             text = ''
00325         else:
00326             url, text = url_and_text
00327             text = '|' + text
00328 
00329         scheme, fname = url.split(":", 1)
00330         #scheme, fname, text = wikiutil.split_wiki(target_and_text)
00331 
00332         pagename, fname = AttachFile.absoluteName(fname, self.pagename)
00333         from_this_page = pagename == self.pagename
00334         fname = self._replace(('FILE', pagename, fname))
00335         fname = wikiutil.url_unquote(fname)
00336         fname = self._replace(('FILE', pagename, fname))
00337         pagename = self._replace(('PAGE', pagename))
00338         if from_this_page:
00339             name = fname
00340         else:
00341             name = "%s/%s" % (pagename, fname)
00342 
00343         if scheme == 'drawing':
00344             return "{{drawing:%s%s}}" % (name, text)
00345 
00346         # check for image URL, and possibly return IMG tag
00347         # (images are always inlined, just like for other URLs)
00348         if wikiutil.isPicture(name):
00349             return "{{attachment:%s%s}}" % (name, text)
00350 
00351         # inline the attachment
00352         if scheme == 'inline':
00353             return '{{attachment:%s%s}}' % (name, text)
00354         else: # 'attachment'
00355             return '[[attachment:%s%s]]' % (name, text)
00356 
00357     def _url_repl(self, word):
00358         """Handle literal URLs including inline images."""
00359         scheme = word.split(":", 1)[0]
00360 
00361         if scheme == 'wiki':
00362             return self.interwiki([word])
00363         if scheme in self.attachment_schemas:
00364             return '%s' % self.attachment([word])
00365 
00366         if wikiutil.isPicture(word): # magic will go away in 1.6!
00367             return '{{%s}}' % word # new markup for inline images
00368         else:
00369             return word
00370 
00371     def _url_bracket_repl(self, word):
00372         """Handle bracketed URLs."""
00373         word = word[1:-1] # strip brackets
00374 
00375         # Local extended link?
00376         if word[0] == ':':
00377             words = word[1:].split(':', 1)
00378             link, text = (words + ['', ''])[:2]
00379             if link.strip() == text.strip():
00380                 text = ''
00381             link = self._replace_target(link)
00382             if text:
00383                 text = '|' + text
00384             return '[[%s%s]]' % (link, text)
00385 
00386         # Traditional split on space
00387         words = word.split(None, 1)
00388         if words[0][0] == '#':
00389             # anchor link
00390             link, text = (words + ['', ''])[:2]
00391             if link.strip() == text.strip():
00392                 text = ''
00393             #link = self._replace_target(link)
00394             if text:
00395                 text = '|' + text
00396             return '[[%s%s]]' % (link, text)
00397 
00398         scheme = words[0].split(":", 1)[0]
00399         if scheme == "wiki":
00400             return self.interwiki(words)
00401             #scheme, wikiname, pagename, text = self.interwiki(word)
00402             #print "%r %r %r %r" % (scheme, wikiname, pagename, text)
00403             #if wikiname in ('Self', self.request.cfg.interwikiname, ''):
00404             #    if text:
00405             #        text = '|' + text
00406             #    return '[[%s%s]]' % (pagename, text)
00407             #else:
00408             #    if text:
00409             #        text = '|' + text
00410             #    return "[[%s:%s%s]]" % (wikiname, pagename, text)
00411         if scheme in self.attachment_schemas:
00412             m = self.attachment(words)
00413             if m.startswith('{{') and m.endswith('}}'):
00414                 # with url_bracket markup, 1.5.8 parser does not embed, but link!
00415                 m = '[[%s]]' % m[2:-2]
00416             return m
00417 
00418         target, desc = (words + ['', ''])[:2]
00419         if wikiutil.isPicture(desc) and re.match(self.url_rule, desc):
00420             #return '[[%s|{{%s|%s}}]]' % (words[0], words[1], words[0])
00421             return '[[%s|{{%s}}]]' % (target, desc)
00422         else:
00423             if desc:
00424                 desc = '|' + desc
00425             return '[[%s%s]]' % (target, desc)
00426 
00427     def _pre_repl(self, word):
00428         w = word.strip()
00429         if w == '{{{' and not self.in_pre:
00430             self.in_pre = True
00431         elif w == '}}}' and self.in_pre:
00432             self.in_pre = False
00433         return word
00434 
00435     def _processor_repl(self, word):
00436         self.in_pre = True
00437         return word
00438 
00439     def scan(self, scan_re, line):
00440         """ Scans one line - append text before match, invoke replace() with match, and add text after match.  """
00441         result = []
00442         lastpos = 0
00443 
00444         for match in scan_re.finditer(line):
00445             # Add text before the match
00446             if lastpos < match.start():
00447                 result.append(line[lastpos:match.start()])
00448             # Replace match with markup
00449             result.append(self.replace(match))
00450             lastpos = match.end()
00451 
00452         # Add remainder of the line
00453         result.append(line[lastpos:])
00454         return u''.join(result)
00455 
00456 
00457     def replace(self, match):
00458         """ Replace match using type name """
00459         result = []
00460         for _type, hit in match.groupdict().items():
00461             if hit is not None and not _type in ["hmarker", ]:
00462                 # Get replace method and replace hit
00463                 replace = getattr(self, '_' + _type + '_repl')
00464                 # print _type, hit
00465                 result.append(replace(hit))
00466                 return ''.join(result)
00467         else:
00468             # We should never get here
00469             import pprint
00470             raise Exception("Can't handle match %r\n%s\n%s" % (
00471                 match,
00472                 pprint.pformat(match.groupdict()),
00473                 pprint.pformat(match.groups()),
00474             ))
00475 
00476         return ""
00477 
00478     def convert(self, request):
00479         """ For each line, scan through looking for magic
00480             strings, outputting verbatim any intervening text.
00481         """
00482         self.request = request
00483         # prepare regex patterns
00484         rules = self.formatting_rules.replace('\n', '|')
00485         if self.request.cfg.bang_meta:
00486             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
00487                 'word_rule': self.word_rule,
00488                 'rules': rules,
00489             }
00490         pre_rules = r'''(?P<pre>\}\}\})'''
00491         pre_scan_re = re.compile(pre_rules, re.UNICODE)
00492         scan_re = re.compile(rules, re.UNICODE)
00493         eol_re = re.compile(r'\r?\n', re.UNICODE)
00494 
00495         rawtext = self.raw
00496 
00497         # remove last item because it's guaranteed to be empty
00498         self.lines = eol_re.split(rawtext)[:-1]
00499         self.in_processing_instructions = True
00500 
00501         # Main loop
00502         for line in self.lines:
00503             # ignore processing instructions
00504             if self.in_processing_instructions:
00505                 found = False
00506                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
00507                            "#pragma", "#form", "#acl", "#language"):
00508                     if line.lower().startswith(pi):
00509                         self.request.write(line + '\r\n')
00510                         found = True
00511                         break
00512                 if not found:
00513                     self.in_processing_instructions = False
00514                 else:
00515                     continue # do not parse this line
00516             if not line.strip():
00517                 self.request.write(line + '\r\n')
00518             else:
00519                 # Scan line, format and write
00520                 scanning_re = self.in_pre and pre_scan_re or scan_re
00521                 formatted_line = self.scan(scanning_re, line)
00522                 self.request.write(formatted_line + '\r\n')
00523