Back to index

moin  1.9.0~rc2
wikiutil.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Wiki Utility Functions
00004 
00005     @copyright: 2000-2004 Juergen Hermann <jh@web.de>,
00006                 2004 by Florian Festi,
00007                 2006 by Mikko Virkkil,
00008                 2005-2008 MoinMoin:ThomasWaldmann,
00009                 2007 MoinMoin:ReimarBauer
00010     @license: GNU GPL, see COPYING for details.
00011 """
00012 
00013 import cgi
00014 import codecs
00015 import os
00016 import re
00017 import time
00018 import urllib
00019 
00020 from MoinMoin import log
00021 logging = log.getLogger(__name__)
00022 
00023 from MoinMoin import config
00024 from MoinMoin.util import pysupport, lock
00025 from MoinMoin.support.python_compatibility import rsplit
00026 from inspect import getargspec, isfunction, isclass, ismethod
00027 
00028 from MoinMoin import web # needed so that next line works:
00029 import werkzeug
00030 
00031 # Exceptions
00032 class InvalidFileNameError(Exception):
00033     """ Called when we find an invalid file name """
00034     pass
00035 
00036 # constants for page names
00037 PARENT_PREFIX = "../"
00038 PARENT_PREFIX_LEN = len(PARENT_PREFIX)
00039 CHILD_PREFIX = "/"
00040 CHILD_PREFIX_LEN = len(CHILD_PREFIX)
00041 
00042 #############################################################################
00043 ### Getting data from user/Sending data to user
00044 #############################################################################
00045 
00046 def decodeUnknownInput(text):
00047     """ Decode unknown input, like text attachments
00048 
00049     First we try utf-8 because it has special format, and it will decode
00050     only utf-8 files. Then we try config.charset, then iso-8859-1 using
00051     'replace'. We will never raise an exception, but may return junk
00052     data.
00053 
00054     WARNING: Use this function only for data that you view, not for data
00055     that you save in the wiki.
00056 
00057     @param text: the text to decode, string
00058     @rtype: unicode
00059     @return: decoded text (maybe wrong)
00060     """
00061     # Shortcut for unicode input
00062     if isinstance(text, unicode):
00063         return text
00064 
00065     try:
00066         return unicode(text, 'utf-8')
00067     except UnicodeError:
00068         if config.charset not in ['utf-8', 'iso-8859-1']:
00069             try:
00070                 return unicode(text, config.charset)
00071             except UnicodeError:
00072                 pass
00073         return unicode(text, 'iso-8859-1', 'replace')
00074 
00075 
00076 def decodeUserInput(s, charsets=[config.charset]):
00077     """
00078     Decodes input from the user.
00079 
00080     @param s: the string to unquote
00081     @param charsets: list of charsets to assume the string is in
00082     @rtype: unicode
00083     @return: the unquoted string as unicode
00084     """
00085     for charset in charsets:
00086         try:
00087             return s.decode(charset)
00088         except UnicodeError:
00089             pass
00090     raise UnicodeError('The string %r cannot be decoded.' % s)
00091 
00092 
00093 def url_quote(s, safe='/', want_unicode=None):
00094     """ see werkzeug.url_quote, we use a different safe param default value """
00095     try:
00096         assert want_unicode is None
00097     except AssertionError:
00098         log.exception("call with deprecated want_unicode param, please fix caller")
00099     return werkzeug.url_quote(s, charset=config.charset, safe=safe)
00100 
00101 def url_quote_plus(s, safe='/', want_unicode=None):
00102     """ see werkzeug.url_quote_plus, we use a different safe param default value """
00103     try:
00104         assert want_unicode is None
00105     except AssertionError:
00106         log.exception("call with deprecated want_unicode param, please fix caller")
00107     return werkzeug.url_quote_plus(s, charset=config.charset, safe=safe)
00108 
00109 def url_unquote(s, want_unicode=None):
00110     """ see werkzeug.url_unquote """
00111     try:
00112         assert want_unicode is None
00113     except AssertionError:
00114         log.exception("call with deprecated want_unicode param, please fix caller")
00115     if isinstance(s, unicode):
00116         s = s.encode(config.charset)
00117     return werkzeug.url_unquote(s, charset=config.charset, errors='fallback:iso-8859-1')
00118 
00119 
00120 def parseQueryString(qstr, want_unicode=None):
00121     """ see werkzeug.url_decode
00122 
00123         Please note: this returns a MultiDict, you might need to use dict() on
00124                      the result if your code expects a "normal" dict.
00125     """
00126     try:
00127         assert want_unicode is None
00128     except AssertionError:
00129         log.exception("call with deprecated want_unicode param, please fix caller")
00130     return werkzeug.url_decode(qstr, charset=config.charset, errors='fallback:iso-8859-1',
00131                                decode_keys=False, include_empty=False)
00132 
00133 def makeQueryString(qstr=None, want_unicode=None, **kw):
00134     """ Make a querystring from arguments.
00135 
00136     kw arguments overide values in qstr.
00137 
00138     If a string is passed in, it's returned verbatim and keyword parameters are ignored.
00139 
00140     See also: werkzeug.url_encode
00141 
00142     @param qstr: dict to format as query string, using either ascii or unicode
00143     @param kw: same as dict when using keywords, using ascii or unicode
00144     @rtype: string
00145     @return: query string ready to use in a url
00146     """
00147     try:
00148         assert want_unicode is None
00149     except AssertionError:
00150         log.exception("call with deprecated want_unicode param, please fix caller")
00151     if qstr is None:
00152         qstr = {}
00153     elif isinstance(qstr, (str, unicode)):
00154         return qstr
00155     if isinstance(qstr, dict):
00156         qstr.update(kw)
00157         return werkzeug.url_encode(qstr, charset=config.charset, encode_keys=True)
00158     else:
00159         raise ValueError("Unsupported argument type, should be dict.")
00160 
00161 
00162 def quoteWikinameURL(pagename, charset=config.charset):
00163     """ Return a url encoding of filename in plain ascii
00164 
00165     Use urllib.quote to quote any character that is not always safe.
00166 
00167     @param pagename: the original pagename (unicode)
00168     @param charset: url text encoding, 'utf-8' recommended. Other charset
00169                     might not be able to encode the page name and raise
00170                     UnicodeError. (default config.charset ('utf-8')).
00171     @rtype: string
00172     @return: the quoted filename, all unsafe characters encoded
00173     """
00174     # XXX please note that urllib.quote and werkzeug.url_quote have
00175     # XXX different defaults for safe=...
00176     return werkzeug.url_quote(pagename, charset=charset, safe='/')
00177 
00178 
00179 escape = werkzeug.escape
00180 
00181 
00182 def clean_input(text, max_len=201):
00183     """ Clean input:
00184         replace CR, LF, TAB by whitespace
00185         delete control chars
00186 
00187         @param text: unicode text to clean
00188         @rtype: unicode
00189         @return: cleaned text
00190     """
00191     # we only have input fields with max 200 chars, but spammers send us more
00192     length = len(text)
00193     if length == 0 or length > max_len:
00194         return u''
00195     else:
00196         return text.translate(config.clean_input_translation_map)
00197 
00198 
00199 def make_breakable(text, maxlen):
00200     """ make a text breakable by inserting spaces into nonbreakable parts
00201     """
00202     text = text.split(" ")
00203     newtext = []
00204     for part in text:
00205         if len(part) > maxlen:
00206             while part:
00207                 newtext.append(part[:maxlen])
00208                 part = part[maxlen:]
00209         else:
00210             newtext.append(part)
00211     return " ".join(newtext)
00212 
00213 ########################################################################
00214 ### Storage
00215 ########################################################################
00216 
00217 # Precompiled patterns for file name [un]quoting
00218 UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
00219 QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
00220 
00221 
00222 def quoteWikinameFS(wikiname, charset=config.charset):
00223     """ Return file system representation of a Unicode WikiName.
00224 
00225     Warning: will raise UnicodeError if wikiname can not be encoded using
00226     charset. The default value of config.charset, 'utf-8' can encode any
00227     character.
00228 
00229     @param wikiname: Unicode string possibly containing non-ascii characters
00230     @param charset: charset to encode string
00231     @rtype: string
00232     @return: quoted name, safe for any file system
00233     """
00234     filename = wikiname.encode(charset)
00235 
00236     quoted = []
00237     location = 0
00238     for needle in UNSAFE.finditer(filename):
00239         # append leading safe stuff
00240         quoted.append(filename[location:needle.start()])
00241         location = needle.end()
00242         # Quote and append unsafe stuff
00243         quoted.append('(')
00244         for character in needle.group():
00245             quoted.append('%02x' % ord(character))
00246         quoted.append(')')
00247 
00248     # append rest of string
00249     quoted.append(filename[location:])
00250     return ''.join(quoted)
00251 
00252 
00253 def unquoteWikiname(filename, charsets=[config.charset]):
00254     """ Return Unicode WikiName from quoted file name.
00255 
00256     We raise an InvalidFileNameError if we find an invalid name, so the
00257     wiki could alarm the admin or suggest the user to rename a page.
00258     Invalid file names should never happen in normal use, but are rather
00259     cheap to find.
00260 
00261     This function should be used only to unquote file names, not page
00262     names we receive from the user. These are handled in request by
00263     urllib.unquote, decodePagename and normalizePagename.
00264 
00265     Todo: search clients of unquoteWikiname and check for exceptions.
00266 
00267     @param filename: string using charset and possibly quoted parts
00268     @param charsets: list of charsets used by string
00269     @rtype: Unicode String
00270     @return: WikiName
00271     """
00272     ### Temporary fix start ###
00273     # From some places we get called with Unicode strings
00274     if isinstance(filename, type(u'')):
00275         filename = filename.encode(config.charset)
00276     ### Temporary fix end ###
00277 
00278     parts = []
00279     start = 0
00280     for needle in QUOTED.finditer(filename):
00281         # append leading unquoted stuff
00282         parts.append(filename[start:needle.start()])
00283         start = needle.end()
00284         # Append quoted stuff
00285         group = needle.group(1)
00286         # Filter invalid filenames
00287         if (len(group) % 2 != 0):
00288             raise InvalidFileNameError(filename)
00289         try:
00290             for i in range(0, len(group), 2):
00291                 byte = group[i:i+2]
00292                 character = chr(int(byte, 16))
00293                 parts.append(character)
00294         except ValueError:
00295             # byte not in hex, e.g 'xy'
00296             raise InvalidFileNameError(filename)
00297 
00298     # append rest of string
00299     if start == 0:
00300         wikiname = filename
00301     else:
00302         parts.append(filename[start:len(filename)])
00303         wikiname = ''.join(parts)
00304 
00305     # FIXME: This looks wrong, because at this stage "()" can be both errors
00306     # like open "(" without close ")", or unquoted valid characters in the file name.
00307     # Filter invalid filenames. Any left (xx) must be invalid
00308     #if '(' in wikiname or ')' in wikiname:
00309     #    raise InvalidFileNameError(filename)
00310 
00311     wikiname = decodeUserInput(wikiname, charsets)
00312     return wikiname
00313 
00314 # time scaling
00315 def timestamp2version(ts):
00316     """ Convert UNIX timestamp (may be float or int) to our version
00317         (long) int.
00318         We don't want to use floats, so we just scale by 1e6 to get
00319         an integer in usecs.
00320     """
00321     return long(ts*1000000L) # has to be long for py 2.2.x
00322 
00323 def version2timestamp(v):
00324     """ Convert version number to UNIX timestamp (float).
00325         This must ONLY be used for display purposes.
00326     """
00327     return v / 1000000.0
00328 
00329 
00330 # This is the list of meta attribute names to be treated as integers.
00331 # IMPORTANT: do not use any meta attribute names with "-" (or any other chars
00332 # invalid in python attribute names), use e.g. _ instead.
00333 INTEGER_METAS = ['current', 'revision', # for page storage (moin 2.0)
00334                  'data_format_revision', # for data_dir format spec (use by mig scripts)
00335                 ]
00336 
00337 class MetaDict(dict):
00338     """ store meta informations as a dict.
00339     """
00340     def __init__(self, metafilename, cache_directory):
00341         """ create a MetaDict from metafilename """
00342         dict.__init__(self)
00343         self.metafilename = metafilename
00344         self.dirty = False
00345         lock_dir = os.path.join(cache_directory, '__metalock__')
00346         self.rlock = lock.ReadLock(lock_dir, 60.0)
00347         self.wlock = lock.WriteLock(lock_dir, 60.0)
00348 
00349         if not self.rlock.acquire(3.0):
00350             raise EnvironmentError("Could not lock in MetaDict")
00351         try:
00352             self._get_meta()
00353         finally:
00354             self.rlock.release()
00355 
00356     def _get_meta(self):
00357         """ get the meta dict from an arbitrary filename.
00358             does not keep state, does uncached, direct disk access.
00359             @param metafilename: the name of the file to read
00360             @return: dict with all values or {} if empty or error
00361         """
00362 
00363         try:
00364             metafile = codecs.open(self.metafilename, "r", "utf-8")
00365             meta = metafile.read() # this is much faster than the file's line-by-line iterator
00366             metafile.close()
00367         except IOError:
00368             meta = u''
00369         for line in meta.splitlines():
00370             key, value = line.split(':', 1)
00371             value = value.strip()
00372             if key in INTEGER_METAS:
00373                 value = int(value)
00374             dict.__setitem__(self, key, value)
00375 
00376     def _put_meta(self):
00377         """ put the meta dict into an arbitrary filename.
00378             does not keep or modify state, does uncached, direct disk access.
00379             @param metafilename: the name of the file to write
00380             @param metadata: dict of the data to write to the file
00381         """
00382         meta = []
00383         for key, value in self.items():
00384             if key in INTEGER_METAS:
00385                 value = str(value)
00386             meta.append("%s: %s" % (key, value))
00387         meta = '\r\n'.join(meta)
00388 
00389         metafile = codecs.open(self.metafilename, "w", "utf-8")
00390         metafile.write(meta)
00391         metafile.close()
00392         self.dirty = False
00393 
00394     def sync(self, mtime_usecs=None):
00395         """ No-Op except for that parameter """
00396         if not mtime_usecs is None:
00397             self.__setitem__('mtime', str(mtime_usecs))
00398         # otherwise no-op
00399 
00400     def __getitem__(self, key):
00401         """ We don't care for cache coherency here. """
00402         return dict.__getitem__(self, key)
00403 
00404     def __setitem__(self, key, value):
00405         """ Sets a dictionary entry. """
00406         if not self.wlock.acquire(5.0):
00407             raise EnvironmentError("Could not lock in MetaDict")
00408         try:
00409             self._get_meta() # refresh cache
00410             try:
00411                 oldvalue = dict.__getitem__(self, key)
00412             except KeyError:
00413                 oldvalue = None
00414             if value != oldvalue:
00415                 dict.__setitem__(self, key, value)
00416                 self._put_meta() # sync cache
00417         finally:
00418             self.wlock.release()
00419 
00420 
00421 # Quoting of wiki names, file names, etc. (in the wiki markup) -----------------------------------
00422 
00423 # don't ever change this - DEPRECATED, only needed for 1.5 > 1.6 migration conversion
00424 QUOTE_CHARS = u'"'
00425 
00426 
00427 #############################################################################
00428 ### InterWiki
00429 #############################################################################
00430 INTERWIKI_PAGE = "InterWikiMap"
00431 
00432 def generate_file_list(request):
00433     """ generates a list of all files. for internal use. """
00434 
00435     # order is important here, the local intermap file takes
00436     # precedence over the shared one, and is thus read AFTER
00437     # the shared one
00438     intermap_files = request.cfg.shared_intermap
00439     if not isinstance(intermap_files, list):
00440         intermap_files = [intermap_files]
00441     else:
00442         intermap_files = intermap_files[:]
00443     intermap_files.append(os.path.join(request.cfg.data_dir, "intermap.txt"))
00444     request.cfg.shared_intermap_files = [filename for filename in intermap_files
00445                                          if filename and os.path.isfile(filename)]
00446 
00447 
00448 def get_max_mtime(file_list, page):
00449     """ Returns the highest modification time of the files in file_list and the
00450     page page. """
00451     timestamps = [os.stat(filename).st_mtime for filename in file_list]
00452     if page.exists():
00453         # exists() is cached and thus cheaper than mtime_usecs()
00454         timestamps.append(version2timestamp(page.mtime_usecs()))
00455     if timestamps:
00456         return max(timestamps)
00457     else:
00458         return 0 # no files / pages there
00459 
00460 def load_wikimap(request):
00461     """ load interwiki map (once, and only on demand) """
00462     from MoinMoin.Page import Page
00463 
00464     now = int(time.time())
00465     if getattr(request.cfg, "shared_intermap_files", None) is None:
00466         generate_file_list(request)
00467 
00468     try:
00469         _interwiki_list = request.cfg.cache.interwiki_list
00470         old_mtime = request.cfg.cache.interwiki_mtime
00471         if request.cfg.cache.interwiki_ts + (1*60) < now: # 1 minutes caching time
00472             max_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
00473             if max_mtime > old_mtime:
00474                 raise AttributeError # refresh cache
00475             else:
00476                 request.cfg.cache.interwiki_ts = now
00477     except AttributeError:
00478         _interwiki_list = {}
00479         lines = []
00480 
00481         for filename in request.cfg.shared_intermap_files:
00482             f = codecs.open(filename, "r", config.charset)
00483             lines.extend(f.readlines())
00484             f.close()
00485 
00486         # add the contents of the InterWikiMap page
00487         lines += Page(request, INTERWIKI_PAGE).get_raw_body().splitlines()
00488 
00489         for line in lines:
00490             if not line or line[0] == '#':
00491                 continue
00492             try:
00493                 line = "%s %s/InterWiki" % (line, request.script_root)
00494                 wikitag, urlprefix, dummy = line.split(None, 2)
00495             except ValueError:
00496                 pass
00497             else:
00498                 _interwiki_list[wikitag] = urlprefix
00499 
00500         del lines
00501 
00502         # add own wiki as "Self" and by its configured name
00503         _interwiki_list['Self'] = request.script_root + '/'
00504         if request.cfg.interwikiname:
00505             _interwiki_list[request.cfg.interwikiname] = request.script_root + '/'
00506 
00507         # save for later
00508         request.cfg.cache.interwiki_list = _interwiki_list
00509         request.cfg.cache.interwiki_ts = now
00510         request.cfg.cache.interwiki_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
00511 
00512     return _interwiki_list
00513 
00514 def split_wiki(wikiurl):
00515     """
00516     Split a wiki url.
00517 
00518     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
00519     Use split_interwiki(), see below.
00520 
00521     @param wikiurl: the url to split
00522     @rtype: tuple
00523     @return: (tag, tail)
00524     """
00525     # !!! use a regex here!
00526     try:
00527         wikitag, tail = wikiurl.split(":", 1)
00528     except ValueError:
00529         try:
00530             wikitag, tail = wikiurl.split("/", 1)
00531         except ValueError:
00532             wikitag, tail = 'Self', wikiurl
00533     return wikitag, tail
00534 
00535 def split_interwiki(wikiurl):
00536     """ Split a interwiki name, into wikiname and pagename, e.g:
00537 
00538     'MoinMoin:FrontPage' -> "MoinMoin", "FrontPage"
00539     'FrontPage' -> "Self", "FrontPage"
00540     'MoinMoin:Page with blanks' -> "MoinMoin", "Page with blanks"
00541     'MoinMoin:' -> "MoinMoin", ""
00542 
00543     can also be used for:
00544 
00545     'attachment:filename with blanks.txt' -> "attachment", "filename with blanks.txt"
00546 
00547     @param wikiurl: the url to split
00548     @rtype: tuple
00549     @return: (wikiname, pagename)
00550     """
00551     try:
00552         wikiname, pagename = wikiurl.split(":", 1)
00553     except ValueError:
00554         wikiname, pagename = 'Self', wikiurl
00555     return wikiname, pagename
00556 
00557 def resolve_wiki(request, wikiurl):
00558     """
00559     Resolve an interwiki link.
00560 
00561     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
00562     Use resolve_interwiki(), see below.
00563 
00564     @param request: the request object
00565     @param wikiurl: the InterWiki:PageName link
00566     @rtype: tuple
00567     @return: (wikitag, wikiurl, wikitail, err)
00568     """
00569     _interwiki_list = load_wikimap(request)
00570     # split wiki url
00571     wikiname, pagename = split_wiki(wikiurl)
00572 
00573     # return resolved url
00574     if wikiname in _interwiki_list:
00575         return (wikiname, _interwiki_list[wikiname], pagename, False)
00576     else:
00577         return (wikiname, request.script_root, "/InterWiki", True)
00578 
00579 def resolve_interwiki(request, wikiname, pagename):
00580     """ Resolve an interwiki reference (wikiname:pagename).
00581 
00582     @param request: the request object
00583     @param wikiname: interwiki wiki name
00584     @param pagename: interwiki page name
00585     @rtype: tuple
00586     @return: (wikitag, wikiurl, wikitail, err)
00587     """
00588     _interwiki_list = load_wikimap(request)
00589     if wikiname in _interwiki_list:
00590         return (wikiname, _interwiki_list[wikiname], pagename, False)
00591     else:
00592         return (wikiname, request.script_root, "/InterWiki", True)
00593 
00594 def join_wiki(wikiurl, wikitail):
00595     """
00596     Add a (url_quoted) page name to an interwiki url.
00597 
00598     Note: We can't know what kind of URL quoting a remote wiki expects.
00599           We just use a utf-8 encoded string with standard URL quoting.
00600 
00601     @param wikiurl: wiki url, maybe including a $PAGE placeholder
00602     @param wikitail: page name
00603     @rtype: string
00604     @return: generated URL of the page in the other wiki
00605     """
00606     wikitail = url_quote(wikitail)
00607     if '$PAGE' in wikiurl:
00608         return wikiurl.replace('$PAGE', wikitail)
00609     else:
00610         return wikiurl + wikitail
00611 
00612 
00613 #############################################################################
00614 ### Page types (based on page names)
00615 #############################################################################
00616 
00617 def isSystemPage(request, pagename):
00618     """ Is this a system page?
00619 
00620     @param request: the request object
00621     @param pagename: the page name
00622     @rtype: bool
00623     @return: true if page is a system page
00624     """
00625     from MoinMoin import i18n
00626     return pagename in i18n.system_pages or isTemplatePage(request, pagename)
00627 
00628 
00629 def isTemplatePage(request, pagename):
00630     """ Is this a template page?
00631 
00632     @param pagename: the page name
00633     @rtype: bool
00634     @return: true if page is a template page
00635     """
00636     return request.cfg.cache.page_template_regexact.search(pagename) is not None
00637 
00638 
00639 def isGroupPage(pagename, cfg):
00640     """ Is this a name of group page?
00641 
00642     @param pagename: the page name
00643     @rtype: bool
00644     @return: true if page is a form page
00645     """
00646     return cfg.cache.page_group_regexact.search(pagename) is not None
00647 
00648 
00649 def filterCategoryPages(request, pagelist):
00650     """ Return category pages in pagelist
00651 
00652     WARNING: DO NOT USE THIS TO FILTER THE FULL PAGE LIST! Use
00653     getPageList with a filter function.
00654 
00655     If you pass a list with a single pagename, either that is returned
00656     or an empty list, thus you can use this function like a `isCategoryPage`
00657     one.
00658 
00659     @param pagelist: a list of pages
00660     @rtype: list
00661     @return: only the category pages of pagelist
00662     """
00663     func = request.cfg.cache.page_category_regexact.search
00664     return [pn for pn in pagelist if func(pn)]
00665 
00666 
00667 def getLocalizedPage(request, pagename): # was: getSysPage
00668     """ Get a system page according to user settings and available translations.
00669 
00670     We include some special treatment for the case that <pagename> is the
00671     currently rendered page, as this is the case for some pages used very
00672     often, like FrontPage, RecentChanges etc. - in that case we reuse the
00673     already existing page object instead creating a new one.
00674 
00675     @param request: the request object
00676     @param pagename: the name of the page
00677     @rtype: Page object
00678     @return: the page object of that system page, using a translated page,
00679              if it exists
00680     """
00681     from MoinMoin.Page import Page
00682     i18n_name = request.getText(pagename)
00683     pageobj = None
00684     if i18n_name != pagename:
00685         if request.page and i18n_name == request.page.page_name:
00686             # do not create new object for current page
00687             i18n_page = request.page
00688             if i18n_page.exists():
00689                 pageobj = i18n_page
00690         else:
00691             i18n_page = Page(request, i18n_name)
00692             if i18n_page.exists():
00693                 pageobj = i18n_page
00694 
00695     # if we failed getting a translated version of <pagename>,
00696     # we fall back to english
00697     if not pageobj:
00698         if request.page and pagename == request.page.page_name:
00699             # do not create new object for current page
00700             pageobj = request.page
00701         else:
00702             pageobj = Page(request, pagename)
00703     return pageobj
00704 
00705 
00706 def getFrontPage(request):
00707     """ Convenience function to get localized front page
00708 
00709     @param request: current request
00710     @rtype: Page object
00711     @return localized page_front_page, if there is a translation
00712     """
00713     return getLocalizedPage(request, request.cfg.page_front_page)
00714 
00715 
00716 def getHomePage(request, username=None):
00717     """
00718     Get a user's homepage, or return None for anon users and
00719     those who have not created a homepage.
00720 
00721     DEPRECATED - try to use getInterwikiHomePage (see below)
00722 
00723     @param request: the request object
00724     @param username: the user's name
00725     @rtype: Page
00726     @return: user's homepage object - or None
00727     """
00728     from MoinMoin.Page import Page
00729     # default to current user
00730     if username is None and request.user.valid:
00731         username = request.user.name
00732 
00733     # known user?
00734     if username:
00735         # Return home page
00736         page = Page(request, username)
00737         if page.exists():
00738             return page
00739 
00740     return None
00741 
00742 
00743 def getInterwikiHomePage(request, username=None):
00744     """
00745     Get a user's homepage.
00746 
00747     cfg.user_homewiki influences behaviour of this:
00748     'Self' does mean we store user homepage in THIS wiki.
00749     When set to our own interwikiname, it behaves like with 'Self'.
00750 
00751     'SomeOtherWiki' means we store user homepages in another wiki.
00752 
00753     @param request: the request object
00754     @param username: the user's name
00755     @rtype: tuple (or None for anon users)
00756     @return: (wikiname, pagename)
00757     """
00758     # default to current user
00759     if username is None and request.user.valid:
00760         username = request.user.name
00761     if not username:
00762         return None # anon user
00763 
00764     homewiki = request.cfg.user_homewiki
00765     if homewiki == request.cfg.interwikiname:
00766         homewiki = u'Self'
00767 
00768     return homewiki, username
00769 
00770 
00771 def AbsPageName(context, pagename):
00772     """
00773     Return the absolute pagename for a (possibly) relative pagename.
00774 
00775     @param context: name of the page where "pagename" appears on
00776     @param pagename: the (possibly relative) page name
00777     @rtype: string
00778     @return: the absolute page name
00779     """
00780     if pagename.startswith(PARENT_PREFIX):
00781         while context and pagename.startswith(PARENT_PREFIX):
00782             context = '/'.join(context.split('/')[:-1])
00783             pagename = pagename[PARENT_PREFIX_LEN:]
00784         pagename = '/'.join(filter(None, [context, pagename, ]))
00785     elif pagename.startswith(CHILD_PREFIX):
00786         if context:
00787             pagename = context + '/' + pagename[CHILD_PREFIX_LEN:]
00788         else:
00789             pagename = pagename[CHILD_PREFIX_LEN:]
00790     return pagename
00791 
00792 def RelPageName(context, pagename):
00793     """
00794     Return the relative pagename for some context.
00795 
00796     @param context: name of the page where "pagename" appears on
00797     @param pagename: the absolute page name
00798     @rtype: string
00799     @return: the relative page name
00800     """
00801     if context == '':
00802         # special case, context is some "virtual root" page with name == ''
00803         # every page is a subpage of this virtual root
00804         return CHILD_PREFIX + pagename
00805     elif pagename.startswith(context + CHILD_PREFIX):
00806         # simple child
00807         return pagename[len(context):]
00808     else:
00809         # some kind of sister/aunt
00810         context_frags = context.split('/')   # A, B, C, D, E
00811         pagename_frags = pagename.split('/') # A, B, C, F
00812         # first throw away common parents:
00813         common = 0
00814         for cf, pf in zip(context_frags, pagename_frags):
00815             if cf == pf:
00816                 common += 1
00817             else:
00818                 break
00819         context_frags = context_frags[common:] # D, E
00820         pagename_frags = pagename_frags[common:] # F
00821         go_up = len(context_frags)
00822         return PARENT_PREFIX * go_up + '/'.join(pagename_frags)
00823 
00824 
00825 def pagelinkmarkup(pagename, text=None):
00826     """ return markup that can be used as link to page <pagename> """
00827     from MoinMoin.parser.text_moin_wiki import Parser
00828     if re.match(Parser.word_rule + "$", pagename, re.U|re.X) and \
00829             (text is None or text == pagename):
00830         return pagename
00831     else:
00832         if text is None or text == pagename:
00833             text = ''
00834         else:
00835             text = '|%s' % text
00836         return u'[[%s%s]]' % (pagename, text)
00837 
00838 #############################################################################
00839 ### mimetype support
00840 #############################################################################
00841 import mimetypes
00842 
00843 MIMETYPES_MORE = {
00844  # OpenOffice 2.x & other open document stuff
00845  '.odt': 'application/vnd.oasis.opendocument.text',
00846  '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
00847  '.odp': 'application/vnd.oasis.opendocument.presentation',
00848  '.odg': 'application/vnd.oasis.opendocument.graphics',
00849  '.odc': 'application/vnd.oasis.opendocument.chart',
00850  '.odf': 'application/vnd.oasis.opendocument.formula',
00851  '.odb': 'application/vnd.oasis.opendocument.database',
00852  '.odi': 'application/vnd.oasis.opendocument.image',
00853  '.odm': 'application/vnd.oasis.opendocument.text-master',
00854  '.ott': 'application/vnd.oasis.opendocument.text-template',
00855  '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
00856  '.otp': 'application/vnd.oasis.opendocument.presentation-template',
00857  '.otg': 'application/vnd.oasis.opendocument.graphics-template',
00858  # some systems (like Mac OS X) don't have some of these:
00859  '.patch': 'text/x-diff',
00860  '.diff': 'text/x-diff',
00861  '.py': 'text/x-python',
00862  '.cfg': 'text/plain',
00863  '.conf': 'text/plain',
00864  '.irc': 'text/plain',
00865  '.md5': 'text/plain',
00866  '.csv': 'text/csv',
00867  '.flv': 'video/x-flv',
00868  '.wmv': 'video/x-ms-wmv',
00869  '.swf': 'application/x-shockwave-flash',
00870 }
00871 [mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
00872 
00873 MIMETYPES_sanitize_mapping = {
00874     # this stuff is text, but got application/* for unknown reasons
00875     ('application', 'docbook+xml'): ('text', 'docbook'),
00876     ('application', 'x-latex'): ('text', 'latex'),
00877     ('application', 'x-tex'): ('text', 'tex'),
00878     ('application', 'javascript'): ('text', 'javascript'),
00879 }
00880 
00881 MIMETYPES_spoil_mapping = {} # inverse mapping of above
00882 for _key, _value in MIMETYPES_sanitize_mapping.items():
00883     MIMETYPES_spoil_mapping[_value] = _key
00884 
00885 
00886 class MimeType(object):
00887     """ represents a mimetype like text/plain """
00888 
00889     def __init__(self, mimestr=None, filename=None):
00890         self.major = self.minor = None # sanitized mime type and subtype
00891         self.params = {} # parameters like "charset" or others
00892         self.charset = None # this stays None until we know for sure!
00893         self.raw_mimestr = mimestr
00894 
00895         if mimestr:
00896             self.parse_mimetype(mimestr)
00897         elif filename:
00898             self.parse_filename(filename)
00899 
00900     def parse_filename(self, filename):
00901         mtype, encoding = mimetypes.guess_type(filename)
00902         if mtype is None:
00903             mtype = 'application/octet-stream'
00904         self.parse_mimetype(mtype)
00905 
00906     def parse_mimetype(self, mimestr):
00907         """ take a string like used in content-type and parse it into components,
00908             alternatively it also can process some abbreviated string like "wiki"
00909         """
00910         parameters = mimestr.split(";")
00911         parameters = [p.strip() for p in parameters]
00912         mimetype, parameters = parameters[0], parameters[1:]
00913         mimetype = mimetype.split('/')
00914         if len(mimetype) >= 2:
00915             major, minor = mimetype[:2] # we just ignore more than 2 parts
00916         else:
00917             major, minor = self.parse_format(mimetype[0])
00918         self.major = major.lower()
00919         self.minor = minor.lower()
00920         for param in parameters:
00921             key, value = param.split('=')
00922             if value[0] == '"' and value[-1] == '"': # remove quotes
00923                 value = value[1:-1]
00924             self.params[key.lower()] = value
00925         if 'charset' in self.params:
00926             self.charset = self.params['charset'].lower()
00927         self.sanitize()
00928 
00929     def parse_format(self, format):
00930         """ maps from what we currently use on-page in a #format xxx processing
00931             instruction to a sanitized mimetype major, minor tuple.
00932             can also be user later for easier entry by the user, so he can just
00933             type "wiki" instead of "text/moin-wiki".
00934         """
00935         format = format.lower()
00936         if format in config.parser_text_mimetype:
00937             mimetype = 'text', format
00938         else:
00939             mapping = {
00940                 'wiki': ('text', 'moin-wiki'),
00941                 'irc': ('text', 'irssi'),
00942             }
00943             try:
00944                 mimetype = mapping[format]
00945             except KeyError:
00946                 mimetype = 'text', 'x-%s' % format
00947         return mimetype
00948 
00949     def sanitize(self):
00950         """ convert to some representation that makes sense - this is not necessarily
00951             conformant to /etc/mime.types or IANA listing, but if something is
00952             readable text, we will return some text/* mimetype, not application/*,
00953             because we need text/plain as fallback and not application/octet-stream.
00954         """
00955         self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
00956 
00957     def spoil(self):
00958         """ this returns something conformant to /etc/mime.type or IANA as a string,
00959             kind of inverse operation of sanitize(), but doesn't change self
00960         """
00961         major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
00962         return self.content_type(major, minor)
00963 
00964     def content_type(self, major=None, minor=None, charset=None, params=None):
00965         """ return a string suitable for Content-Type header
00966         """
00967         major = major or self.major
00968         minor = minor or self.minor
00969         params = params or self.params or {}
00970         if major == 'text':
00971             charset = charset or self.charset or params.get('charset', config.charset)
00972             params['charset'] = charset
00973         mimestr = "%s/%s" % (major, minor)
00974         params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
00975         params.insert(0, mimestr)
00976         return "; ".join(params)
00977 
00978     def mime_type(self):
00979         """ return a string major/minor only, no params """
00980         return "%s/%s" % (self.major, self.minor)
00981 
00982     def module_name(self):
00983         """ convert this mimetype to a string useable as python module name,
00984             we yield the exact module name first and then proceed to shorter
00985             module names (useful for falling back to them, if the more special
00986             module is not found) - e.g. first "text_python", next "text".
00987             Finally, we yield "application_octet_stream" as the most general
00988             mimetype we have.
00989             Hint: the fallback handler module for text/* should be implemented
00990                   in module "text" (not "text_plain")
00991         """
00992         mimetype = self.mime_type()
00993         modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
00994         fragments = modname.split('_')
00995         for length in range(len(fragments), 1, -1):
00996             yield "_".join(fragments[:length])
00997         yield self.raw_mimestr
00998         yield fragments[0]
00999         yield "application_octet_stream"
01000 
01001 
01002 #############################################################################
01003 ### Plugins
01004 #############################################################################
01005 
01006 class PluginError(Exception):
01007     """ Base class for plugin errors """
01008 
01009 class PluginMissingError(PluginError):
01010     """ Raised when a plugin is not found """
01011 
01012 class PluginAttributeError(PluginError):
01013     """ Raised when plugin does not contain an attribtue """
01014 
01015 
01016 def importPlugin(cfg, kind, name, function="execute"):
01017     """ Import wiki or builtin plugin
01018 
01019     Returns <function> attr from a plugin module <name>.
01020     If <function> attr is missing, raise PluginAttributeError.
01021     If <function> is None, return the whole module object.
01022 
01023     If <name> plugin can not be imported, raise PluginMissingError.
01024 
01025     kind may be one of 'action', 'formatter', 'macro', 'parser' or any other
01026     directory that exist in MoinMoin or data/plugin.
01027 
01028     Wiki plugins will always override builtin plugins. If you want
01029     specific plugin, use either importWikiPlugin or importBuiltinPlugin
01030     directly.
01031 
01032     @param cfg: wiki config instance
01033     @param kind: what kind of module we want to import
01034     @param name: the name of the module
01035     @param function: the function name
01036     @rtype: any object
01037     @return: "function" of module "name" of kind "kind", or None
01038     """
01039     try:
01040         return importWikiPlugin(cfg, kind, name, function)
01041     except PluginMissingError:
01042         return importBuiltinPlugin(kind, name, function)
01043 
01044 
01045 def importWikiPlugin(cfg, kind, name, function="execute"):
01046     """ Import plugin from the wiki data directory
01047 
01048     See importPlugin docstring.
01049     """
01050     plugins = wikiPlugins(kind, cfg)
01051     modname = plugins.get(name, None)
01052     if modname is None:
01053         raise PluginMissingError()
01054     moduleName = '%s.%s' % (modname, name)
01055     return importNameFromPlugin(moduleName, function)
01056 
01057 
01058 def importBuiltinPlugin(kind, name, function="execute"):
01059     """ Import builtin plugin from MoinMoin package
01060 
01061     See importPlugin docstring.
01062     """
01063     if not name in builtinPlugins(kind):
01064         raise PluginMissingError()
01065     moduleName = 'MoinMoin.%s.%s' % (kind, name)
01066     return importNameFromPlugin(moduleName, function)
01067 
01068 
01069 def importNameFromPlugin(moduleName, name):
01070     """ Return <name> attr from <moduleName> module,
01071         raise PluginAttributeError if name does not exist.
01072 
01073         If name is None, return the <moduleName> module object.
01074     """
01075     if name is None:
01076         fromlist = []
01077     else:
01078         fromlist = [name]
01079     module = __import__(moduleName, globals(), {}, fromlist)
01080     if fromlist:
01081         # module has the obj for module <moduleName>
01082         try:
01083             return getattr(module, name)
01084         except AttributeError:
01085             raise PluginAttributeError
01086     else:
01087         # module now has the toplevel module of <moduleName> (see __import__ docs!)
01088         components = moduleName.split('.')
01089         for comp in components[1:]:
01090             module = getattr(module, comp)
01091         return module
01092 
01093 
01094 def builtinPlugins(kind):
01095     """ Gets a list of modules in MoinMoin.'kind'
01096 
01097     @param kind: what kind of modules we look for
01098     @rtype: list
01099     @return: module names
01100     """
01101     modulename = "MoinMoin." + kind
01102     return pysupport.importName(modulename, "modules")
01103 
01104 
01105 def wikiPlugins(kind, cfg):
01106     """
01107     Gets a dict containing the names of all plugins of @kind
01108     as the key and the containing module name as the value.
01109 
01110     @param kind: what kind of modules we look for
01111     @rtype: dict
01112     @return: plugin name to containing module name mapping
01113     """
01114     # short-cut if we've loaded the dict already
01115     # (or already failed to load it)
01116     cache = cfg._site_plugin_lists
01117     if kind in cache:
01118         result = cache[kind]
01119     else:
01120         result = {}
01121         for modname in cfg._plugin_modules:
01122             try:
01123                 module = pysupport.importName(modname, kind)
01124                 packagepath = os.path.dirname(module.__file__)
01125                 plugins = pysupport.getPluginModules(packagepath)
01126                 for p in plugins:
01127                     if not p in result:
01128                         result[p] = '%s.%s' % (modname, kind)
01129             except AttributeError:
01130                 pass
01131         cache[kind] = result
01132     return result
01133 
01134 
01135 def getPlugins(kind, cfg):
01136     """ Gets a list of plugin names of kind
01137 
01138     @param kind: what kind of modules we look for
01139     @rtype: list
01140     @return: module names
01141     """
01142     # Copy names from builtin plugins - so we dont destroy the value
01143     all_plugins = builtinPlugins(kind)[:]
01144 
01145     # Add extension plugins without duplicates
01146     for plugin in wikiPlugins(kind, cfg):
01147         if plugin not in all_plugins:
01148             all_plugins.append(plugin)
01149 
01150     return all_plugins
01151 
01152 
01153 def searchAndImportPlugin(cfg, type, name, what=None):
01154     type2classname = {"parser": "Parser",
01155                       "formatter": "Formatter",
01156     }
01157     if what is None:
01158         what = type2classname[type]
01159     mt = MimeType(name)
01160     plugin = None
01161     for module_name in mt.module_name():
01162         try:
01163             plugin = importPlugin(cfg, type, module_name, what)
01164             break
01165         except PluginMissingError:
01166             pass
01167     else:
01168         raise PluginMissingError("Plugin not found! (%r %r %r)" % (type, name, what))
01169     return plugin
01170 
01171 
01172 #############################################################################
01173 ### Parsers
01174 #############################################################################
01175 
01176 def getParserForExtension(cfg, extension):
01177     """
01178     Returns the Parser class of the parser fit to handle a file
01179     with the given extension. The extension should be in the same
01180     format as os.path.splitext returns it (i.e. with the dot).
01181     Returns None if no parser willing to handle is found.
01182     The dict of extensions is cached in the config object.
01183 
01184     @param cfg: the Config instance for the wiki in question
01185     @param extension: the filename extension including the dot
01186     @rtype: class, None
01187     @returns: the parser class or None
01188     """
01189     if not hasattr(cfg.cache, 'EXT_TO_PARSER'):
01190         etp, etd = {}, None
01191         for pname in getPlugins('parser', cfg):
01192             try:
01193                 Parser = importPlugin(cfg, 'parser', pname, 'Parser')
01194             except PluginMissingError:
01195                 continue
01196             if hasattr(Parser, 'extensions'):
01197                 exts = Parser.extensions
01198                 if isinstance(exts, list):
01199                     for ext in Parser.extensions:
01200                         etp[ext] = Parser
01201                 elif str(exts) == '*':
01202                     etd = Parser
01203         cfg.cache.EXT_TO_PARSER = etp
01204         cfg.cache.EXT_TO_PARSER_DEFAULT = etd
01205 
01206     return cfg.cache.EXT_TO_PARSER.get(extension, cfg.cache.EXT_TO_PARSER_DEFAULT)
01207 
01208 
01209 #############################################################################
01210 ### Parameter parsing
01211 #############################################################################
01212 
01213 class BracketError(Exception):
01214     pass
01215 
01216 class BracketUnexpectedCloseError(BracketError):
01217     def __init__(self, bracket):
01218         self.bracket = bracket
01219         BracketError.__init__(self, "Unexpected closing bracket %s" % bracket)
01220 
01221 class BracketMissingCloseError(BracketError):
01222     def __init__(self, bracket):
01223         self.bracket = bracket
01224         BracketError.__init__(self, "Missing closing bracket %s" % bracket)
01225 
01226 class ParserPrefix:
01227     """
01228     Trivial container-class holding a single character for
01229     the possible prefixes for parse_quoted_separated_ext
01230     and implementing rich equal comparison.
01231     """
01232     def __init__(self, prefix):
01233         self.prefix = prefix
01234 
01235     def __eq__(self, other):
01236         return isinstance(other, ParserPrefix) and other.prefix == self.prefix
01237 
01238     def __repr__(self):
01239         return '<ParserPrefix(%s)>' % self.prefix.encode('utf-8')
01240 
01241 def parse_quoted_separated_ext(args, separator=None, name_value_separator=None,
01242                                brackets=None, seplimit=0, multikey=False,
01243                                prefixes=None, quotes='"'):
01244     """
01245     Parses the given string according to the other parameters.
01246 
01247     Items can be quoted with any character from the quotes parameter
01248     and each quote can be escaped by doubling it, the separator and
01249     name_value_separator can both be quoted, when name_value_separator
01250     is set then the name can also be quoted.
01251 
01252     Values that are not given are returned as None, while the
01253     empty string as a value can be achieved by quoting it.
01254 
01255     If a name or value does not start with a quote, then the quote
01256     looses its special meaning for that name or value, unless it
01257     starts with one of the given prefixes (the parameter is unicode
01258     containing all allowed prefixes.) The prefixes will be returned
01259     as ParserPrefix() instances in the first element of the tuple
01260     for that particular argument.
01261 
01262     If multiple separators follow each other, this is treated as
01263     having None arguments inbetween, that is also true for when
01264     space is used as separators (when separator is None), filter
01265     them out afterwards.
01266 
01267     The function can also do bracketing, i.e. parse expressions
01268     that contain things like
01269         "(a (a b))" to ['(', 'a', ['(', 'a', 'b']],
01270     in this case, as in this example, the returned list will
01271     contain sub-lists and the brackets parameter must be a list
01272     of opening and closing brackets, e.g.
01273         brackets = ['()', '<>']
01274     Each sub-list's first item is the opening bracket used for
01275     grouping.
01276     Nesting will be observed between the different types of
01277     brackets given. If bracketing doesn't match, a BracketError
01278     instance is raised with a 'bracket' property indicating the
01279     type of missing or unexpected bracket, the instance will be
01280     either of the class BracketMissingCloseError or of the class
01281     BracketUnexpectedCloseError.
01282 
01283     If multikey is True (along with setting name_value_separator),
01284     then the returned tuples for (key, value) pairs can also have
01285     multiple keys, e.g.
01286         "a=b=c" -> ('a', 'b', 'c')
01287 
01288     @param args: arguments to parse
01289     @param separator: the argument separator, defaults to None, meaning any
01290         space separates arguments
01291     @param name_value_separator: separator for name=value, default '=',
01292         name=value keywords not parsed if evaluates to False
01293     @param brackets: a list of two-character strings giving
01294         opening and closing brackets
01295     @param seplimit: limits the number of parsed arguments
01296     @param multikey: multiple keys allowed for a single value
01297     @rtype: list
01298     @returns: list of unicode strings and tuples containing
01299         unicode strings, or lists containing the same for
01300         bracketing support
01301     """
01302     idx = 0
01303     assert name_value_separator is None or name_value_separator != separator
01304     assert name_value_separator is None or len(name_value_separator) == 1
01305     if not isinstance(args, unicode):
01306         raise TypeError('args must be unicode')
01307     max = len(args)
01308     result = []         # result list
01309     cur = [None]        # current item
01310     quoted = None       # we're inside quotes, indicates quote character used
01311     skipquote = 0       # next quote is a quoted quote
01312     noquote = False     # no quotes expected because word didn't start with one
01313     seplimit_reached = False # number of separators exhausted
01314     separator_count = 0 # number of separators encountered
01315     SPACE = [' ', '\t', ]
01316     nextitemsep = [separator]   # used for skipping trailing space
01317     SPACE = [' ', '\t', ]
01318     if separator is None:
01319         nextitemsep = SPACE[:]
01320         separators = SPACE
01321     else:
01322         nextitemsep = [separator]   # used for skipping trailing space
01323         separators = [separator]
01324     if name_value_separator:
01325         nextitemsep.append(name_value_separator)
01326 
01327     # bracketing support
01328     opening = []
01329     closing = []
01330     bracketstack = []
01331     matchingbracket = {}
01332     if brackets:
01333         for o, c in brackets:
01334             assert not o in opening
01335             opening.append(o)
01336             assert not c in closing
01337             closing.append(c)
01338             matchingbracket[o] = c
01339 
01340     def additem(result, cur, separator_count, nextitemsep):
01341         if len(cur) == 1:
01342             result.extend(cur)
01343         elif cur:
01344             result.append(tuple(cur))
01345         cur = [None]
01346         noquote = False
01347         separator_count += 1
01348         seplimit_reached = False
01349         if seplimit and separator_count >= seplimit:
01350             seplimit_reached = True
01351             nextitemsep = [n for n in nextitemsep if n in separators]
01352 
01353         return cur, noquote, separator_count, seplimit_reached, nextitemsep
01354 
01355     while idx < max:
01356         char = args[idx]
01357         next = None
01358         if idx + 1 < max:
01359             next = args[idx+1]
01360         if skipquote:
01361             skipquote -= 1
01362         if not separator is None and not quoted and char in SPACE:
01363             spaces = ''
01364             # accumulate all space
01365             while char in SPACE and idx < max - 1:
01366                 spaces += char
01367                 idx += 1
01368                 char = args[idx]
01369             # remove space if args end with it
01370             if char in SPACE and idx == max - 1:
01371                 break
01372             # remove space at end of argument
01373             if char in nextitemsep:
01374                 continue
01375             idx -= 1
01376             if len(cur) and cur[-1]:
01377                 cur[-1] = cur[-1] + spaces
01378         elif not quoted and char == name_value_separator:
01379             if multikey or len(cur) == 1:
01380                 cur.append(None)
01381             else:
01382                 if not multikey:
01383                     if cur[-1] is None:
01384                         cur[-1] = ''
01385                     cur[-1] += name_value_separator
01386                 else:
01387                     cur.append(None)
01388             noquote = False
01389         elif not quoted and not seplimit_reached and char in separators:
01390             (cur, noquote, separator_count, seplimit_reached,
01391              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
01392         elif not quoted and not noquote and char in quotes:
01393             if len(cur) and cur[-1] is None:
01394                 del cur[-1]
01395             cur.append(u'')
01396             quoted = char
01397         elif char == quoted and not skipquote:
01398             if next == quoted:
01399                 skipquote = 2 # will be decremented right away
01400             else:
01401                 quoted = None
01402         elif not quoted and char in opening:
01403             while len(cur) and cur[-1] is None:
01404                 del cur[-1]
01405             (cur, noquote, separator_count, seplimit_reached,
01406              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
01407             bracketstack.append((matchingbracket[char], result))
01408             result = [char]
01409         elif not quoted and char in closing:
01410             while len(cur) and cur[-1] is None:
01411                 del cur[-1]
01412             (cur, noquote, separator_count, seplimit_reached,
01413              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
01414             cur = []
01415             if not bracketstack:
01416                 raise BracketUnexpectedCloseError(char)
01417             expected, oldresult = bracketstack[-1]
01418             if not expected == char:
01419                 raise BracketUnexpectedCloseError(char)
01420             del bracketstack[-1]
01421             oldresult.append(result)
01422             result = oldresult
01423         elif not quoted and prefixes and char in prefixes and cur == [None]:
01424             cur = [ParserPrefix(char)]
01425             cur.append(None)
01426         else:
01427             if len(cur):
01428                 if cur[-1] is None:
01429                     cur[-1] = char
01430                 else:
01431                     cur[-1] += char
01432             else:
01433                 cur.append(char)
01434             noquote = True
01435 
01436         idx += 1
01437 
01438     if bracketstack:
01439         raise BracketMissingCloseError(bracketstack[-1][0])
01440 
01441     if quoted:
01442         if len(cur):
01443             if cur[-1] is None:
01444                 cur[-1] = quoted
01445             else:
01446                 cur[-1] = quoted + cur[-1]
01447         else:
01448             cur.append(quoted)
01449 
01450     additem(result, cur, separator_count, nextitemsep)
01451 
01452     return result
01453 
01454 def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
01455     result = []
01456     positional = result
01457     if name_value:
01458         name_value_separator = '='
01459         trailing = []
01460         keywords = {}
01461     else:
01462         name_value_separator = None
01463 
01464     l = parse_quoted_separated_ext(args, separator=separator,
01465                                    name_value_separator=name_value_separator,
01466                                    seplimit=seplimit)
01467     for item in l:
01468         if isinstance(item, tuple):
01469             key, value = item
01470             if key is None:
01471                 key = u''
01472             keywords[key] = value
01473             positional = trailing
01474         else:
01475             positional.append(item)
01476 
01477     if name_value:
01478         return result, keywords, trailing
01479     return result
01480 
01481 def get_bool(request, arg, name=None, default=None):
01482     """
01483     For use with values returned from parse_quoted_separated or given
01484     as macro parameters, return a boolean from a unicode string.
01485     Valid input is 'true'/'false', 'yes'/'no' and '1'/'0' or None for
01486     the default value.
01487 
01488     @param request: A request instance
01489     @param arg: The argument, may be None or a unicode string
01490     @param name: Name of the argument, for error messages
01491     @param default: default value if arg is None
01492     @rtype: boolean or None
01493     @returns: the boolean value of the string according to above rules
01494               (or default value)
01495     """
01496     _ = request.getText
01497     assert default is None or isinstance(default, bool)
01498     if arg is None:
01499         return default
01500     elif not isinstance(arg, unicode):
01501         raise TypeError('Argument must be None or unicode')
01502     arg = arg.lower()
01503     if arg in [u'0', u'false', u'no']:
01504         return False
01505     elif arg in [u'1', u'true', u'yes']:
01506         return True
01507     else:
01508         if name:
01509             raise ValueError(
01510                 _('Argument "%s" must be a boolean value, not "%s"') % (
01511                     name, arg))
01512         else:
01513             raise ValueError(
01514                 _('Argument must be a boolean value, not "%s"') % arg)
01515 
01516 
01517 def get_int(request, arg, name=None, default=None):
01518     """
01519     For use with values returned from parse_quoted_separated or given
01520     as macro parameters, return an integer from a unicode string
01521     containing the decimal representation of a number.
01522     None is a valid input and yields the default value.
01523 
01524     @param request: A request instance
01525     @param arg: The argument, may be None or a unicode string
01526     @param name: Name of the argument, for error messages
01527     @param default: default value if arg is None
01528     @rtype: int or None
01529     @returns: the integer value of the string (or default value)
01530     """
01531     _ = request.getText
01532     assert default is None or isinstance(default, (int, long))
01533     if arg is None:
01534         return default
01535     elif not isinstance(arg, unicode):
01536         raise TypeError('Argument must be None or unicode')
01537     try:
01538         return int(arg)
01539     except ValueError:
01540         if name:
01541             raise ValueError(
01542                 _('Argument "%s" must be an integer value, not "%s"') % (
01543                     name, arg))
01544         else:
01545             raise ValueError(
01546                 _('Argument must be an integer value, not "%s"') % arg)
01547 
01548 
01549 def get_float(request, arg, name=None, default=None):
01550     """
01551     For use with values returned from parse_quoted_separated or given
01552     as macro parameters, return a float from a unicode string.
01553     None is a valid input and yields the default value.
01554 
01555     @param request: A request instance
01556     @param arg: The argument, may be None or a unicode string
01557     @param name: Name of the argument, for error messages
01558     @param default: default return value if arg is None
01559     @rtype: float or None
01560     @returns: the float value of the string (or default value)
01561     """
01562     _ = request.getText
01563     assert default is None or isinstance(default, (int, long, float))
01564     if arg is None:
01565         return default
01566     elif not isinstance(arg, unicode):
01567         raise TypeError('Argument must be None or unicode')
01568     try:
01569         return float(arg)
01570     except ValueError:
01571         if name:
01572             raise ValueError(
01573                 _('Argument "%s" must be a floating point value, not "%s"') % (
01574                     name, arg))
01575         else:
01576             raise ValueError(
01577                 _('Argument must be a floating point value, not "%s"') % arg)
01578 
01579 
01580 def get_complex(request, arg, name=None, default=None):
01581     """
01582     For use with values returned from parse_quoted_separated or given
01583     as macro parameters, return a complex from a unicode string.
01584     None is a valid input and yields the default value.
01585 
01586     @param request: A request instance
01587     @param arg: The argument, may be None or a unicode string
01588     @param name: Name of the argument, for error messages
01589     @param default: default return value if arg is None
01590     @rtype: complex or None
01591     @returns: the complex value of the string (or default value)
01592     """
01593     _ = request.getText
01594     assert default is None or isinstance(default, (int, long, float, complex))
01595     if arg is None:
01596         return default
01597     elif not isinstance(arg, unicode):
01598         raise TypeError('Argument must be None or unicode')
01599     try:
01600         # allow writing 'i' instead of 'j'
01601         arg = arg.replace('i', 'j').replace('I', 'j')
01602         return complex(arg)
01603     except ValueError:
01604         if name:
01605             raise ValueError(
01606                 _('Argument "%s" must be a complex value, not "%s"') % (
01607                     name, arg))
01608         else:
01609             raise ValueError(
01610                 _('Argument must be a complex value, not "%s"') % arg)
01611 
01612 
01613 def get_unicode(request, arg, name=None, default=None):
01614     """
01615     For use with values returned from parse_quoted_separated or given
01616     as macro parameters, return a unicode string from a unicode string.
01617     None is a valid input and yields the default value.
01618 
01619     @param request: A request instance
01620     @param arg: The argument, may be None or a unicode string
01621     @param name: Name of the argument, for error messages
01622     @param default: default return value if arg is None;
01623     @rtype: unicode or None
01624     @returns: the unicode string (or default value)
01625     """
01626     assert default is None or isinstance(default, unicode)
01627     if arg is None:
01628         return default
01629     elif not isinstance(arg, unicode):
01630         raise TypeError('Argument must be None or unicode')
01631 
01632     return arg
01633 
01634 
01635 def get_choice(request, arg, name=None, choices=[None]):
01636     """
01637     For use with values returned from parse_quoted_separated or given
01638     as macro parameters, return a unicode string that must be in the
01639     choices given. None is a valid input and yields first of the valid
01640     choices.
01641 
01642     @param request: A request instance
01643     @param arg: The argument, may be None or a unicode string
01644     @param name: Name of the argument, for error messages
01645     @param choices: the possible choices
01646     @rtype: unicode or None
01647     @returns: the unicode string (or default value)
01648     """
01649     assert isinstance(choices, (tuple, list))
01650     if arg is None:
01651         return choices[0]
01652     elif not isinstance(arg, unicode):
01653         raise TypeError('Argument must be None or unicode')
01654     elif not arg in choices:
01655         _ = request.getText
01656         if name:
01657             raise ValueError(
01658                 _('Argument "%s" must be one of "%s", not "%s"') % (
01659                     name, '", "'.join(choices), arg))
01660         else:
01661             raise ValueError(
01662                 _('Argument must be one of "%s", not "%s"') % (
01663                     '", "'.join(choices), arg))
01664 
01665     return arg
01666 
01667 
01668 class IEFArgument:
01669     """
01670     Base class for new argument parsers for
01671     invoke_extension_function.
01672     """
01673     def __init__(self):
01674         pass
01675 
01676     def parse_argument(self, s):
01677         """
01678         Parse the argument given in s (a string) and return
01679         the argument for the extension function.
01680         """
01681         raise NotImplementedError
01682 
01683     def get_default(self):
01684         """
01685         Return the default for this argument.
01686         """
01687         raise NotImplementedError
01688 
01689 
01690 class UnitArgument(IEFArgument):
01691     """
01692     Argument class for invoke_extension_function that forces
01693     having any of the specified units given for a value.
01694 
01695     Note that the default unit is "mm".
01696 
01697     Use, for example, "UnitArgument('7mm', float, ['%', 'mm'])".
01698 
01699     If the defaultunit parameter is given, any argument that
01700     can be converted into the given argtype is assumed to have
01701     the default unit. NOTE: This doesn't work with a choice
01702     (tuple or list) argtype.
01703     """
01704     def __init__(self, default, argtype, units=['mm'], defaultunit=None):
01705         """
01706         Initialise a UnitArgument giving the default,
01707         argument type and the permitted units.
01708         """
01709         IEFArgument.__init__(self)
01710         self._units = list(units)
01711         self._units.sort(lambda x, y: len(y) - len(x))
01712         self._type = argtype
01713         self._defaultunit = defaultunit
01714         assert defaultunit is None or defaultunit in units
01715         if default is not None:
01716             self._default = self.parse_argument(default)
01717         else:
01718             self._default = None
01719 
01720     def parse_argument(self, s):
01721         for unit in self._units:
01722             if s.endswith(unit):
01723                 ret = (self._type(s[:len(s) - len(unit)]), unit)
01724                 return ret
01725         if self._defaultunit is not None:
01726             try:
01727                 return (self._type(s), self._defaultunit)
01728             except ValueError:
01729                 pass
01730         units = ', '.join(self._units)
01731         ## XXX: how can we translate this?
01732         raise ValueError("Invalid unit in value %s (allowed units: %s)" % (s, units))
01733 
01734     def get_default(self):
01735         return self._default
01736 
01737 
01738 class required_arg:
01739     """
01740     Wrap a type in this class and give it as default argument
01741     for a function passed to invoke_extension_function() in
01742     order to get generic checking that the argument is given.
01743     """
01744     def __init__(self, argtype):
01745         """
01746         Initialise a required_arg
01747         @param argtype: the type the argument should have
01748         """
01749         if not (argtype in (bool, int, long, float, complex, unicode) or
01750                 isinstance(argtype, (IEFArgument, tuple, list))):
01751             raise TypeError("argtype must be a valid type")
01752         self.argtype = argtype
01753 
01754 
01755 def invoke_extension_function(request, function, args, fixed_args=[]):
01756     """
01757     Parses arguments for an extension call and calls the extension
01758     function with the arguments.
01759 
01760     If the macro function has a default value that is a bool,
01761     int, long, float or unicode object, then the given value
01762     is converted to the type of that default value before passing
01763     it to the macro function. That way, macros need not call the
01764     wikiutil.get_* functions for any arguments that have a default.
01765 
01766     @param request: the request object
01767     @param function: the function to invoke
01768     @param args: unicode string with arguments (or evaluating to False)
01769     @param fixed_args: fixed arguments to pass as the first arguments
01770     @returns: the return value from the function called
01771     """
01772 
01773     def _convert_arg(request, value, default, name=None):
01774         """
01775         Using the get_* functions, convert argument to the type of the default
01776         if that is any of bool, int, long, float or unicode; if the default
01777         is the type itself then convert to that type (keeps None) or if the
01778         default is a list require one of the list items.
01779 
01780         In other cases return the value itself.
01781         """
01782         # if extending this, extend required_arg as well!
01783         if isinstance(default, bool):
01784             return get_bool(request, value, name, default)
01785         elif isinstance(default, (int, long)):
01786             return get_int(request, value, name, default)
01787         elif isinstance(default, float):
01788             return get_float(request, value, name, default)
01789         elif isinstance(default, complex):
01790             return get_complex(request, value, name, default)
01791         elif isinstance(default, unicode):
01792             return get_unicode(request, value, name, default)
01793         elif isinstance(default, (tuple, list)):
01794             return get_choice(request, value, name, default)
01795         elif default is bool:
01796             return get_bool(request, value, name)
01797         elif default is int or default is long:
01798             return get_int(request, value, name)
01799         elif default is float:
01800             return get_float(request, value, name)
01801         elif default is complex:
01802             return get_complex(request, value, name)
01803         elif isinstance(default, IEFArgument):
01804             # defaults handled later
01805             if value is None:
01806                 return None
01807             return default.parse_argument(value)
01808         elif isinstance(default, required_arg):
01809             if isinstance(default.argtype, (tuple, list)):
01810                 # treat choice specially and return None if no choice
01811                 # is given in the value
01812                 choices = [None] + list(default.argtype)
01813                 return get_choice(request, value, name, choices)
01814             else:
01815                 return _convert_arg(request, value, default.argtype, name)
01816         return value
01817 
01818     assert isinstance(fixed_args, (list, tuple))
01819 
01820     _ = request.getText
01821 
01822     kwargs = {}
01823     kwargs_to_pass = {}
01824     trailing_args = []
01825 
01826     if args:
01827         assert isinstance(args, unicode)
01828 
01829         positional, keyword, trailing = parse_quoted_separated(args)
01830 
01831         for kw in keyword:
01832             try:
01833                 kwargs[str(kw)] = keyword[kw]
01834             except UnicodeEncodeError:
01835                 kwargs_to_pass[kw] = keyword[kw]
01836 
01837         trailing_args.extend(trailing)
01838 
01839     else:
01840         positional = []
01841 
01842     if isfunction(function) or ismethod(function):
01843         argnames, varargs, varkw, defaultlist = getargspec(function)
01844     elif isclass(function):
01845         (argnames, varargs,
01846          varkw, defaultlist) = getargspec(function.__init__.im_func)
01847     else:
01848         raise TypeError('function must be a function, method or class')
01849 
01850     # self is implicit!
01851     if ismethod(function) or isclass(function):
01852         argnames = argnames[1:]
01853 
01854     fixed_argc = len(fixed_args)
01855     argnames = argnames[fixed_argc:]
01856     argc = len(argnames)
01857     if not defaultlist:
01858         defaultlist = []
01859 
01860     # if the fixed parameters have defaults too...
01861     if argc < len(defaultlist):
01862         defaultlist = defaultlist[fixed_argc:]
01863     defstart = argc - len(defaultlist)
01864 
01865     defaults = {}
01866     # reverse to be able to pop() things off
01867     positional.reverse()
01868     allow_kwargs = False
01869     allow_trailing = False
01870     # convert all arguments to keyword arguments,
01871     # fill all arguments that weren't given with None
01872     for idx in range(argc):
01873         argname = argnames[idx]
01874         if argname == '_kwargs':
01875             allow_kwargs = True
01876             continue
01877         if argname == '_trailing_args':
01878             allow_trailing = True
01879             continue
01880         if positional:
01881             kwargs[argname] = positional.pop()
01882         if not argname in kwargs:
01883             kwargs[argname] = None
01884         if idx >= defstart:
01885             defaults[argname] = defaultlist[idx - defstart]
01886 
01887     if positional:
01888         if not allow_trailing:
01889             raise ValueError(_('Too many arguments'))
01890         trailing_args.extend(positional)
01891 
01892     if trailing_args:
01893         if not allow_trailing:
01894             raise ValueError(_('Cannot have arguments without name following'
01895                                ' named arguments'))
01896         kwargs['_trailing_args'] = trailing_args
01897 
01898     # type-convert all keyword arguments to the type
01899     # that the default value indicates
01900     for argname in kwargs.keys()[:]:
01901         if argname in defaults:
01902             # the value of 'argname' from kwargs will be put into the
01903             # macro's 'argname' argument, so convert that giving the
01904             # name to the converter so the user is told which argument
01905             # went wrong (if it does)
01906             kwargs[argname] = _convert_arg(request, kwargs[argname],
01907                                            defaults[argname], argname)
01908             if kwargs[argname] is None:
01909                 if isinstance(defaults[argname], required_arg):
01910                     raise ValueError(_('Argument "%s" is required') % argname)
01911                 if isinstance(defaults[argname], IEFArgument):
01912                     kwargs[argname] = defaults[argname].get_default()
01913 
01914         if not argname in argnames:
01915             # move argname into _kwargs parameter
01916             kwargs_to_pass[argname] = kwargs[argname]
01917             del kwargs[argname]
01918 
01919     if kwargs_to_pass:
01920         kwargs['_kwargs'] = kwargs_to_pass
01921         if not allow_kwargs:
01922             raise ValueError(_(u'No argument named "%s"') % (
01923                 kwargs_to_pass.keys()[0]))
01924 
01925     return function(*fixed_args, **kwargs)
01926 
01927 
01928 def parseAttributes(request, attrstring, endtoken=None, extension=None):
01929     """
01930     Parse a list of attributes and return a dict plus a possible
01931     error message.
01932     If extension is passed, it has to be a callable that returns
01933     a tuple (found_flag, msg). found_flag is whether it did find and process
01934     something, msg is '' when all was OK or any other string to return an error
01935     message.
01936 
01937     @param request: the request object
01938     @param attrstring: string containing the attributes to be parsed
01939     @param endtoken: token terminating parsing
01940     @param extension: extension function -
01941                       gets called with the current token, the parser and the dict
01942     @rtype: dict, msg
01943     @return: a dict plus a possible error message
01944     """
01945     import shlex, StringIO
01946 
01947     _ = request.getText
01948 
01949     parser = shlex.shlex(StringIO.StringIO(attrstring))
01950     parser.commenters = ''
01951     msg = None
01952     attrs = {}
01953 
01954     while not msg:
01955         try:
01956             key = parser.get_token()
01957         except ValueError, err:
01958             msg = str(err)
01959             break
01960         if not key:
01961             break
01962         if endtoken and key == endtoken:
01963             break
01964 
01965         # call extension function with the current token, the parser, and the dict
01966         if extension:
01967             found_flag, msg = extension(key, parser, attrs)
01968             #logging.debug("%r = extension(%r, parser, %r)" % (msg, key, attrs))
01969             if found_flag:
01970                 continue
01971             elif msg:
01972                 break
01973             #else (we found nothing, but also didn't have an error msg) we just continue below:
01974 
01975         try:
01976             eq = parser.get_token()
01977         except ValueError, err:
01978             msg = str(err)
01979             break
01980         if eq != "=":
01981             msg = _('Expected "=" to follow "%(token)s"') % {'token': key}
01982             break
01983 
01984         try:
01985             val = parser.get_token()
01986         except ValueError, err:
01987             msg = str(err)
01988             break
01989         if not val:
01990             msg = _('Expected a value for key "%(token)s"') % {'token': key}
01991             break
01992 
01993         key = escape(key) # make sure nobody cheats
01994 
01995         # safely escape and quote value
01996         if val[0] in ["'", '"']:
01997             val = escape(val)
01998         else:
01999             val = '"%s"' % escape(val, 1)
02000 
02001         attrs[key.lower()] = val
02002 
02003     return attrs, msg or ''
02004 
02005 
02006 class ParameterParser:
02007     """ MoinMoin macro parameter parser
02008 
02009         Parses a given parameter string, separates the individual parameters
02010         and detects their type.
02011 
02012         Possible parameter types are:
02013 
02014         Name      | short  | example
02015         ----------------------------
02016          Integer  | i      | -374
02017          Float    | f      | 234.234 23.345E-23
02018          String   | s      | 'Stri\'ng'
02019          Boolean  | b      | 0 1 True false
02020          Name     |        | case_sensitive | converted to string
02021 
02022         So say you want to parse three things, name, age and if the
02023         person is male or not:
02024 
02025         The pattern will be: %(name)s%(age)i%(male)b
02026 
02027         As a result, the returned dict will put the first value into
02028         male, second into age etc. If some argument is missing, it will
02029         get None as its value. This also means that all the identifiers
02030         in the pattern will exist in the dict, they will just have the
02031         value None if they were not specified by the caller.
02032 
02033         So if we call it with the parameters as follows:
02034             ("John Smith", 18)
02035         this will result in the following dict:
02036             {"name": "John Smith", "age": 18, "male": None}
02037 
02038         Another way of calling would be:
02039             ("John Smith", male=True)
02040         this will result in the following dict:
02041             {"name": "John Smith", "age": None, "male": True}
02042     """
02043 
02044     def __init__(self, pattern):
02045         # parameter_re = "([^\"',]*(\"[^\"]*\"|'[^']*')?[^\"',]*)[,)]"
02046         name = "(?P<%s>[a-zA-Z_][a-zA-Z0-9_]*)"
02047         int_re = r"(?P<int>-?\d+)"
02048         bool_re = r"(?P<bool>(([10])|([Tt]rue)|([Ff]alse)))"
02049         float_re = r"(?P<float>-?\d+\.\d+([eE][+-]?\d+)?)"
02050         string_re = (r"(?P<string>('([^']|(\'))*?')|" +
02051                                 r'("([^"]|(\"))*?"))')
02052         name_re = name % "name"
02053         name_param_re = name % "name_param"
02054 
02055         param_re = r"\s*(\s*%s\s*=\s*)?(%s|%s|%s|%s|%s)\s*(,|$)" % (
02056                    name_re, float_re, int_re, bool_re, string_re, name_param_re)
02057         self.param_re = re.compile(param_re, re.U)
02058         self._parse_pattern(pattern)
02059 
02060     def _parse_pattern(self, pattern):
02061         param_re = r"(%(?P<name>\(.*?\))?(?P<type>[ibfs]{1,3}))|\|"
02062         i = 0
02063         # TODO: Optionals aren't checked.
02064         self.optional = []
02065         named = False
02066         self.param_list = []
02067         self.param_dict = {}
02068 
02069         for match in re.finditer(param_re, pattern):
02070             if match.group() == "|":
02071                 self.optional.append(i)
02072                 continue
02073             self.param_list.append(match.group('type'))
02074             if match.group('name'):
02075                 named = True
02076                 self.param_dict[match.group('name')[1:-1]] = i
02077             elif named:
02078                 raise ValueError("Named parameter expected")
02079             i += 1
02080 
02081     def __str__(self):
02082         return "%s, %s, optional:%s" % (self.param_list, self.param_dict,
02083                                         self.optional)
02084 
02085     def parse_parameters(self, params):
02086         # Default list/dict entries to None
02087         parameter_list = [None] * len(self.param_list)
02088         parameter_dict = dict([(key, None) for key in self.param_dict])
02089         check_list = [0] * len(self.param_list)
02090 
02091         i = 0
02092         start = 0
02093         fixed_count = 0
02094         named = False
02095 
02096         while start < len(params):
02097             match = re.match(self.param_re, params[start:])
02098             if not match:
02099                 raise ValueError("malformed parameters")
02100             start += match.end()
02101             if match.group("int"):
02102                 pvalue = int(match.group("int"))
02103                 ptype = 'i'
02104             elif match.group("bool"):
02105                 pvalue = (match.group("bool") == "1") or (match.group("bool") == "True") or (match.group("bool") == "true")
02106                 ptype = 'b'
02107             elif match.group("float"):
02108                 pvalue = float(match.group("float"))
02109                 ptype = 'f'
02110             elif match.group("string"):
02111                 pvalue = match.group("string")[1:-1]
02112                 ptype = 's'
02113             elif match.group("name_param"):
02114                 pvalue = match.group("name_param")
02115                 ptype = 'n'
02116             else:
02117                 raise ValueError("Parameter parser code does not fit param_re regex")
02118 
02119             name = match.group("name")
02120             if name:
02121                 if name not in self.param_dict:
02122                     # TODO we should think on inheritance of parameters
02123                     raise ValueError("unknown parameter name '%s'" % name)
02124                 nr = self.param_dict[name]
02125                 if check_list[nr]:
02126                     raise ValueError("parameter '%s' specified twice" % name)
02127                 else:
02128                     check_list[nr] = 1
02129                 pvalue = self._check_type(pvalue, ptype, self.param_list[nr])
02130                 parameter_dict[name] = pvalue
02131                 parameter_list[nr] = pvalue
02132                 named = True
02133             elif named:
02134                 raise ValueError("only named parameters allowed after first named parameter")
02135             else:
02136                 nr = i
02137                 if nr not in self.param_dict.values():
02138                     fixed_count = nr + 1
02139                 parameter_list[nr] = self._check_type(pvalue, ptype, self.param_list[nr])
02140 
02141             # Let's populate and map our dictionary to what's been found
02142             for name in self.param_dict:
02143                 tmp = self.param_dict[name]
02144                 parameter_dict[name] = parameter_list[tmp]
02145 
02146             i += 1
02147 
02148         for i in range(fixed_count):
02149             parameter_dict[i] = parameter_list[i]
02150 
02151         return fixed_count, parameter_dict
02152 
02153     def _check_type(self, pvalue, ptype, format):
02154         if ptype == 'n' and 's' in format: # n as s
02155             return pvalue
02156 
02157         if ptype in format:
02158             return pvalue # x -> x
02159 
02160         if ptype == 'i':
02161             if 'f' in format:
02162                 return float(pvalue) # i -> f
02163             elif 'b' in format:
02164                 return pvalue != 0 # i -> b
02165         elif ptype == 's':
02166             if 'b' in format:
02167                 if pvalue.lower() == 'false':
02168                     return False # s-> b
02169                 elif pvalue.lower() == 'true':
02170                     return True # s-> b
02171                 else:
02172                     raise ValueError('%r does not match format %r' % (pvalue, format))
02173 
02174         if 's' in format: # * -> s
02175             return str(pvalue)
02176 
02177         raise ValueError('%r does not match format %r' % (pvalue, format))
02178 
02179 
02180 #############################################################################
02181 ### Misc
02182 #############################################################################
02183 def normalize_pagename(name, cfg):
02184     """ Normalize page name
02185 
02186     Prevent creating page names with invisible characters or funny
02187     whitespace that might confuse the users or abuse the wiki, or
02188     just does not make sense.
02189 
02190     Restrict even more group pages, so they can be used inside acl lines.
02191 
02192     @param name: page name, unicode
02193     @rtype: unicode
02194     @return: decoded and sanitized page name
02195     """
02196     # Strip invalid characters
02197     name = config.page_invalid_chars_regex.sub(u'', name)
02198 
02199     # Split to pages and normalize each one
02200     pages = name.split(u'/')
02201     normalized = []
02202     for page in pages:
02203         # Ignore empty or whitespace only pages
02204         if not page or page.isspace():
02205             continue
02206 
02207         # Cleanup group pages.
02208         # Strip non alpha numeric characters, keep white space
02209         if isGroupPage(page, cfg):
02210             page = u''.join([c for c in page
02211                              if c.isalnum() or c.isspace()])
02212 
02213         # Normalize white space. Each name can contain multiple
02214         # words separated with only one space. Split handle all
02215         # 30 unicode spaces (isspace() == True)
02216         page = u' '.join(page.split())
02217 
02218         normalized.append(page)
02219 
02220     # Assemble components into full pagename
02221     name = u'/'.join(normalized)
02222     return name
02223 
02224 def taintfilename(basename):
02225     """
02226     Make a filename that is supposed to be a plain name secure, i.e.
02227     remove any possible path components that compromise our system.
02228 
02229     @param basename: (possibly unsafe) filename
02230     @rtype: string
02231     @return: (safer) filename
02232     """
02233     for x in (os.pardir, ':', '/', '\\', '<', '>'):
02234         basename = basename.replace(x, '_')
02235 
02236     return basename
02237 
02238 
02239 def drawing2fname(drawing):
02240     config.drawing_extensions = ['.tdraw', '.adraw',
02241                                  '.svg',
02242                                  '.png', '.jpg', '.jpeg', '.gif',
02243                                 ]
02244     fname, ext = os.path.splitext(drawing)
02245     # note: do not just check for empty extension or stuff like drawing:foo.bar
02246     # will fail, instead of being expanded to foo.bar.tdraw
02247     if ext not in config.drawing_extensions:
02248         # for backwards compatibility, twikidraw is the default:
02249         drawing += '.tdraw'
02250     return drawing
02251 
02252 
02253 def mapURL(request, url):
02254     """
02255     Map URLs according to 'cfg.url_mappings'.
02256 
02257     @param url: a URL
02258     @rtype: string
02259     @return: mapped URL
02260     """
02261     # check whether we have to map URLs
02262     if request.cfg.url_mappings:
02263         # check URL for the configured prefixes
02264         for prefix in request.cfg.url_mappings:
02265             if url.startswith(prefix):
02266                 # substitute prefix with replacement value
02267                 return request.cfg.url_mappings[prefix] + url[len(prefix):]
02268 
02269     # return unchanged url
02270     return url
02271 
02272 
02273 def getUnicodeIndexGroup(name):
02274     """
02275     Return a group letter for `name`, which must be a unicode string.
02276     Currently supported: Hangul Syllables (U+AC00 - U+D7AF)
02277 
02278     @param name: a string
02279     @rtype: string
02280     @return: group letter or None
02281     """
02282     c = name[0]
02283     if u'\uAC00' <= c <= u'\uD7AF': # Hangul Syllables
02284         return unichr(0xac00 + (int(ord(c) - 0xac00) / 588) * 588)
02285     else:
02286         return c.upper() # we put lower and upper case words into the same index group
02287 
02288 
02289 def isStrictWikiname(name, word_re=re.compile(ur"^(?:[%(u)s][%(l)s]+){2,}$" % {'u': config.chars_upper, 'l': config.chars_lower})):
02290     """
02291     Check whether this is NOT an extended name.
02292 
02293     @param name: the wikiname in question
02294     @rtype: bool
02295     @return: true if name matches the word_re
02296     """
02297     return word_re.match(name)
02298 
02299 
02300 def is_URL(arg, schemas=config.url_schemas):
02301     """ Return True if arg is a URL (with a schema given in the schemas list).
02302 
02303         Note: there are not that many requirements for generic URLs, basically
02304         the only mandatory requirement is the ':' between schema and rest.
02305         Schema itself could be anything, also the rest (but we only support some
02306         schemas, as given in config.url_schemas, so it is a bit less ambiguous).
02307     """
02308     if ':' not in arg:
02309         return False
02310     for schema in schemas:
02311         if arg.startswith(schema + ':'):
02312             return True
02313     return False
02314 
02315 
02316 def isPicture(url):
02317     """
02318     Is this a picture's url?
02319 
02320     @param url: the url in question
02321     @rtype: bool
02322     @return: true if url points to a picture
02323     """
02324     extpos = url.rfind(".") + 1
02325     return extpos > 1 and url[extpos:].lower() in config.browser_supported_images
02326 
02327 
02328 def link_tag(request, params, text=None, formatter=None, on=None, **kw):
02329     """ Create a link.
02330 
02331     TODO: cleanup css_class
02332 
02333     @param request: the request object
02334     @param params: parameter string appended to the URL after the scriptname/
02335     @param text: text / inner part of the <a>...</a> link - does NOT get
02336                  escaped, so you can give HTML here and it will be used verbatim
02337     @param formatter: the formatter object to use
02338     @param on: opening/closing tag only
02339     @keyword attrs: additional attrs (HTMLified string) (removed in 1.5.3)
02340     @rtype: string
02341     @return: formatted link tag
02342     """
02343     if formatter is None:
02344         formatter = request.html_formatter
02345     if 'css_class' in kw:
02346         css_class = kw['css_class']
02347         del kw['css_class'] # one time is enough
02348     else:
02349         css_class = None
02350     id = kw.get('id', None)
02351     name = kw.get('name', None)
02352     if text is None:
02353         text = params # default
02354     if formatter:
02355         url = "%s/%s" % (request.script_root, params)
02356         # formatter.url will escape the url part
02357         if on is not None:
02358             tag = formatter.url(on, url, css_class, **kw)
02359         else:
02360             tag = (formatter.url(1, url, css_class, **kw) +
02361                 formatter.rawHTML(text) +
02362                 formatter.url(0))
02363     else: # this shouldn't be used any more:
02364         if on is not None and not on:
02365             tag = '</a>'
02366         else:
02367             attrs = ''
02368             if css_class:
02369                 attrs += ' class="%s"' % css_class
02370             if id:
02371                 attrs += ' id="%s"' % id
02372             if name:
02373                 attrs += ' name="%s"' % name
02374             tag = '<a%s href="%s/%s">' % (attrs, request.script_root, params)
02375             if not on:
02376                 tag = "%s%s</a>" % (tag, text)
02377         logging.warning("wikiutil.link_tag called without formatter and without request.html_formatter. tag=%r" % (tag, ))
02378     return tag
02379 
02380 def containsConflictMarker(text):
02381     """ Returns true if there is a conflict marker in the text. """
02382     return "/!\\ '''Edit conflict" in text
02383 
02384 def pagediff(request, pagename1, rev1, pagename2, rev2, **kw):
02385     """
02386     Calculate the "diff" between two page contents.
02387 
02388     @param pagename1: name of first page
02389     @param rev1: revision of first page
02390     @param pagename2: name of second page
02391     @param rev2: revision of second page
02392     @keyword ignorews: if 1: ignore pure-whitespace changes.
02393     @rtype: list
02394     @return: lines of diff output
02395     """
02396     from MoinMoin.Page import Page
02397     from MoinMoin.util import diff_text
02398     lines1 = Page(request, pagename1, rev=rev1).getlines()
02399     lines2 = Page(request, pagename2, rev=rev2).getlines()
02400 
02401     lines = diff_text.diff(lines1, lines2, **kw)
02402     return lines
02403 
02404 def anchor_name_from_text(text):
02405     '''
02406     Generate an anchor name from the given text.
02407     This function generates valid HTML IDs matching: [A-Za-z][A-Za-z0-9:_.-]*
02408     Note: this transformation has a special feature: when you feed it with a
02409           valid ID/name, it will return it without modification (identity
02410           transformation).
02411     '''
02412     quoted = urllib.quote_plus(text.encode('utf-7'), safe=':')
02413     res = quoted.replace('%', '.').replace('+', '_')
02414     if not res[:1].isalpha():
02415         return 'A%s' % res
02416     return res
02417 
02418 def split_anchor(pagename):
02419     """
02420     Split a pagename that (optionally) has an anchor into the real pagename
02421     and the anchor part. If there is no anchor, it returns an empty string
02422     for the anchor.
02423 
02424     Note: if pagename contains a # (as part of the pagename, not as anchor),
02425           you can use a trick to make it work nevertheless: just append a
02426           # at the end:
02427           "C##" returns ("C#", "")
02428           "Problem #1#" returns ("Problem #1", "")
02429 
02430     TODO: We shouldn't deal with composite pagename#anchor strings, but keep
02431           it separate.
02432           Current approach: [[pagename#anchor|label|attr=val,&qarg=qval]]
02433           Future approach:  [[pagename|label|attr=val,&qarg=qval,#anchor]]
02434           The future approach will avoid problems when there is a # in the
02435           pagename part (and no anchor). Also, we need to append #anchor
02436           at the END of the generated URL (AFTER the query string).
02437     """
02438     parts = rsplit(pagename, '#', 1)
02439     if len(parts) == 2:
02440         return parts
02441     else:
02442         return pagename, ""
02443 
02444 ########################################################################
02445 ### Tickets - used by RenamePage and DeletePage
02446 ########################################################################
02447 
02448 def createTicket(request, tm=None, action=None):
02449     """ Create a ticket using a configured secret
02450 
02451         @param tm: unix timestamp (optional, uses current time if not given)
02452         @param action: action name (optional, uses current action if not given)
02453                        Note: if you create a ticket for a form that calls another
02454                              action than the current one, you MUST specify the
02455                              action you call when posting the form.
02456     """
02457 
02458     from MoinMoin.support.python_compatibility import hash_new
02459     if tm is None:
02460         tm = "%010x" % time.time()
02461 
02462     # make the ticket specific to the page and action:
02463     try:
02464         pagename = quoteWikinameURL(request.page.page_name)
02465     except:
02466         pagename = 'None'
02467 
02468     if action is None:
02469         try:
02470             action = request.action
02471         except:
02472             action = 'None'
02473 
02474     secret = request.cfg.secrets['wikiutil/tickets']
02475     digest = hash_new('sha1', secret)
02476 
02477     ticket = "%s.%s.%s" % (tm, pagename, action)
02478     digest.update(ticket)
02479 
02480     return "%s.%s" % (ticket, digest.hexdigest())
02481 
02482 
02483 def checkTicket(request, ticket):
02484     """Check validity of a previously created ticket"""
02485     try:
02486         timestamp_str = ticket.split('.')[0]
02487         timestamp = int(timestamp_str, 16)
02488     except ValueError:
02489         # invalid or empty ticket
02490         logging.debug("checkTicket: invalid or empty ticket %r" % ticket)
02491         return False
02492     now = time.time()
02493     if timestamp < now - 10 * 3600:
02494         # we don't accept tickets older than 10h
02495         logging.debug("checkTicket: too old ticket, timestamp %r" % timestamp)
02496         return False
02497     ourticket = createTicket(request, timestamp_str)
02498     logging.debug("checkTicket: returning %r, got %r, expected %r" % (ticket == ourticket, ticket, ourticket))
02499     return ticket == ourticket
02500 
02501 
02502 def renderText(request, Parser, text):
02503     """executes raw wiki markup with all page elements"""
02504     import StringIO
02505     out = StringIO.StringIO()
02506     request.redirect(out)
02507     wikiizer = Parser(text, request)
02508     wikiizer.format(request.formatter, inhibit_p=True)
02509     result = out.getvalue()
02510     request.redirect()
02511     del out
02512     return result
02513 
02514 def get_processing_instructions(body):
02515     """ Extract the processing instructions / acl / etc. at the beginning of a page's body.
02516 
02517         Hint: if you have a Page object p, you already have the result of this function in
02518               p.meta and (even better) parsed/processed stuff in p.pi.
02519 
02520         Returns a list of (pi, restofline) tuples and a string with the rest of the body.
02521     """
02522     pi = []
02523     while body.startswith('#'):
02524         try:
02525             line, body = body.split('\n', 1) # extract first line
02526         except ValueError:
02527             line = body
02528             body = ''
02529 
02530         # end parsing on empty (invalid) PI
02531         if line == "#":
02532             body = line + '\n' + body
02533             break
02534 
02535         if line[1] == '#':# two hash marks are a comment
02536             comment = line[2:]
02537             if not comment.startswith(' '):
02538                 # we don't require a blank after the ##, so we put one there
02539                 comment = ' ' + comment
02540                 line = '##%s' % comment
02541 
02542         verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
02543         pi.append((verb.lower(), args.strip()))
02544 
02545     return pi, body
02546 
02547 
02548 class Version(tuple):
02549     """
02550     Version objects store versions like 1.2.3-4.5alpha6 in a structured
02551     way and support version comparisons and direct version component access.
02552     1: major version (digits only)
02553     2: minor version (digits only)
02554     3: (maintenance) release version (digits only)
02555     4.5alpha6: optional additional version specification (str)
02556 
02557     You can create a Version instance either by giving the components, like:
02558         Version(1,2,3,'4.5alpha6')
02559     or by giving the composite version string, like:
02560         Version(version="1.2.3-4.5alpha6").
02561 
02562     Version subclasses tuple, so comparisons to tuples should work.
02563     Also, we inherit all the comparison logic from tuple base class.
02564     """
02565     VERSION_RE = re.compile(
02566         r"""(?P<major>\d+)
02567             \.
02568             (?P<minor>\d+)
02569             \.
02570             (?P<release>\d+)
02571             (-
02572              (?P<additional>.+)
02573             )?""",
02574             re.VERBOSE)
02575 
02576     @classmethod
02577     def parse_version(cls, version):
02578         match = cls.VERSION_RE.match(version)
02579         if match is None:
02580             raise ValueError("Unexpected version string format: %r" % version)
02581         v = match.groupdict()
02582         return int(v['major']), int(v['minor']), int(v['release']), str(v['additional'] or '')
02583 
02584     def __new__(cls, major=0, minor=0, release=0, additional='', version=None):
02585         if version:
02586             major, minor, release, additional = cls.parse_version(version)
02587         return tuple.__new__(cls, (major, minor, release, additional))
02588 
02589     # properties for easy access of version components
02590     major = property(lambda self: self[0])
02591     minor = property(lambda self: self[1])
02592     release = property(lambda self: self[2])
02593     additional = property(lambda self: self[3])
02594 
02595     def __str__(self):
02596         version_str = "%d.%d.%d" % (self.major, self.minor, self.release)
02597         if self.additional:
02598             version_str += "-%s" % self.additional
02599         return version_str
02600