Back to index

moin  1.9.0~rc2
Page.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Page class
00004 
00005     Page is used for read-only access to a wiki page. For r/w access see PageEditor.
00006     A Page object is used to access a wiki page (in general) as well as to access
00007     some specific revision of a wiki page.
00008 
00009     The RootPage is some virtual page located at / and is mainly used to do namespace
00010     operations like getting the page list.
00011 
00012     Currently, this is all a big mixture between high-level page code, intermediate
00013     data/underlay layering code, caching code and low-level filesystem storage code.
00014     To see the filesystem storage layout we use, best is to look into data/pages/
00015     (underlay dir uses the same format).
00016 
00017     TODO:
00018     * Cleanly separate the code into packages for:
00019       * Page (or rather: Item)
00020       * Layering
00021       * Cache
00022       * Storage
00023     * ACLs should be handled on a low layer, raising an Exception when access
00024       is denied, so we won't have security issues just because someone forgot to check
00025       user.may.read(secretpage).
00026     * The distinction between a item and a item revision should be clearer.
00027     * Items can be anything, not just wiki pages, but also files of any mimetype.
00028       The mimetype hierarchy should be modelled by a MimeTypeItem class hierarchy.
00029 
00030     @copyright: 2000-2004 by Juergen Hermann <jh@web.de>,
00031                 2005-2008 by MoinMoin:ThomasWaldmann,
00032                 2006 by MoinMoin:FlorianFesti,
00033                 2007 by MoinMoin:ReimarBauer
00034     @license: GNU GPL, see COPYING for details.
00035 """
00036 
00037 import os, re, codecs
00038 
00039 from MoinMoin import log
00040 logging = log.getLogger(__name__)
00041 
00042 from MoinMoin import config, caching, user, util, wikiutil
00043 from MoinMoin.logfile import eventlog
00044 from MoinMoin.util import filesys
00045 
00046 def is_cache_exception(e):
00047     args = e.args
00048     return not (len(args) != 1 or args[0] != 'CacheNeedsUpdate')
00049 
00050 
00051 class ItemCache:
00052     """ Cache some page item related data, as meta data or pagelist
00053 
00054         We only cache this to RAM in request.cfg (this is the only kind of
00055         server object we have), because it might be too big for pickling it
00056         in and out.
00057     """
00058     def __init__(self, name):
00059         """ Initialize ItemCache object.
00060             @param name: name of the object, used for display in logging and
00061                          influences behaviour of refresh().
00062         """
00063         self.name = name
00064         self.cache = {}
00065         self.log_pos = None # TODO: initialize this to EOF pos of log
00066                             # to avoid reading in the whole log on first request
00067         self.requests = 0
00068         self.hits = 0
00069         self.loglevel = logging.NOTSET
00070 
00071     def putItem(self, request, name, key, data):
00072         """ Remembers some data for item name under a key.
00073             @param request: currently unused
00074             @param name: name of the item (page), unicode
00075             @param key: used as secondary access key after name
00076             @param data: the data item that should be remembered
00077         """
00078         d = self.cache.setdefault(name, {})
00079         d[key] = data
00080 
00081     def getItem(self, request, name, key):
00082         """ Returns some item stored for item name under key.
00083             @param request: the request object
00084             @param name: name of the item (page), unicode
00085             @param key: used as secondary access key after name
00086             @return: the data or None, if there is no such name or key.
00087         """
00088         self.refresh(request)
00089         try:
00090             data = self.cache[name][key]
00091             self.hits += 1
00092             hit_str = 'hit'
00093         except KeyError:
00094             data = None
00095             hit_str = 'miss'
00096         self.requests += 1
00097         logging.log(self.loglevel, "%s cache %s (h/r %2.1f%%) for %r %r" % (
00098             self.name,
00099             hit_str,
00100             float(self.hits * 100) / self.requests,
00101             name,
00102             key,
00103         ))
00104         return data
00105 
00106     def refresh(self, request):
00107         """ Refresh the cache - if anything has changed in the wiki, we see it
00108             in the edit-log and either delete cached data for the changed items
00109             (for 'meta') or the complete cache ('pagelists').
00110             @param request: the request object
00111         """
00112         from MoinMoin.logfile import editlog
00113         elog = editlog.EditLog(request)
00114         old_pos = self.log_pos
00115         new_pos, items = elog.news(old_pos)
00116         if items:
00117             if self.name == 'meta':
00118                 for item in items:
00119                     logging.log(self.loglevel, "cache: removing %r" % item)
00120                     try:
00121                         del self.cache[item]
00122                     except:
00123                         pass
00124             elif self.name == 'pagelists':
00125                 logging.log(self.loglevel, "cache: clearing pagelist cache")
00126                 self.cache = {}
00127         self.log_pos = new_pos # important to do this at the end -
00128                                # avoids threading race conditions
00129 
00130 
00131 class Page(object):
00132     """ Page - Manage an (immutable) page associated with a WikiName.
00133         To change a page's content, use the PageEditor class.
00134     """
00135     def __init__(self, request, page_name, **kw):
00136         """ Create page object.
00137 
00138         Note that this is a 'lean' operation, since the text for the page
00139         is loaded on demand. Thus, things like `Page(name).link_to()` are
00140         efficient.
00141 
00142         @param page_name: WikiName of the page
00143         @keyword rev: number of older revision
00144         @keyword formatter: formatter instance or mimetype str,
00145                             None or no kw arg will use default formatter
00146         @keyword include_self: if 1, include current user (default: 0)
00147         """
00148         self.request = request
00149         self.cfg = request.cfg
00150         self.page_name = page_name
00151         self.rev = kw.get('rev', 0) # revision of this page
00152         self.include_self = kw.get('include_self', 0)
00153 
00154         formatter = kw.get('formatter', None)
00155         if isinstance(formatter, (str, unicode)): # mimetype given
00156             mimetype = str(formatter)
00157             self.formatter = None
00158             self.output_mimetype = mimetype
00159             self.default_formatter = mimetype == "text/html"
00160         elif formatter is not None: # formatter instance given
00161             self.formatter = formatter
00162             self.default_formatter = 0
00163             self.output_mimetype = "text/todo" # TODO where do we get this value from?
00164         else:
00165             self.formatter = None
00166             self.default_formatter = 1
00167             self.output_mimetype = "text/html"
00168 
00169         self.output_charset = config.charset # correct for wiki pages
00170 
00171         self._text_filename_force = None
00172         self.hilite_re = None
00173 
00174         self.__body = None # unicode page body == metadata + data
00175         self.__body_modified = 0 # was __body modified in RAM so it differs from disk?
00176         self.__meta = None # list of raw tuples of page metadata (currently: the # stuff at top of the page)
00177         self.__pi = None # dict of preprocessed page metadata (processing instructions)
00178         self.__data = None # unicode page data = body - metadata
00179 
00180         self.reset()
00181 
00182     def reset(self):
00183         """ Reset page state """
00184         page_name = self.page_name
00185         # page_name quoted for file system usage, needs to be reset to
00186         # None when pagename changes
00187 
00188         qpagename = wikiutil.quoteWikinameFS(page_name)
00189         self.page_name_fs = qpagename
00190 
00191         # the normal and the underlay path used for this page
00192         normalpath = os.path.join(self.cfg.data_dir, "pages", qpagename)
00193         if not self.cfg.data_underlay_dir is None:
00194             underlaypath = os.path.join(self.cfg.data_underlay_dir, "pages", qpagename)
00195         else:
00196             underlaypath = None
00197 
00198         # TUNING - remember some essential values
00199 
00200         # does the page come from normal page storage (0) or from
00201         # underlay dir (1) (can be used as index into following list)
00202         self._underlay = None
00203 
00204         # path to normal / underlay page dir
00205         self._pagepath = [normalpath, underlaypath]
00206 
00207     # now we define some properties to lazy load some attributes on first access:
00208 
00209     def get_body(self):
00210         if self.__body is None:
00211             # try to open file
00212             try:
00213                 f = codecs.open(self._text_filename(), 'rb', config.charset)
00214             except IOError, er:
00215                 import errno
00216                 if er.errno == errno.ENOENT:
00217                     # just doesn't exist, return empty text (note that we
00218                     # never store empty pages, so this is detectable and also
00219                     # safe when passed to a function expecting a string)
00220                     return ""
00221                 else:
00222                     raise
00223 
00224             # read file content and make sure it is closed properly
00225             try:
00226                 text = f.read()
00227                 text = self.decodeTextMimeType(text)
00228                 self.__body = text
00229             finally:
00230                 f.close()
00231         return self.__body
00232 
00233     def set_body(self, newbody):
00234         self.__body = newbody
00235         self.__meta = None
00236         self.__data = None
00237     body = property(fget=get_body, fset=set_body) # complete page text
00238 
00239     def get_meta(self):
00240         if self.__meta is None:
00241             self.__meta, self.__data = wikiutil.get_processing_instructions(self.body)
00242         return self.__meta
00243     meta = property(fget=get_meta) # processing instructions, ACLs (upper part of page text)
00244 
00245     def get_data(self):
00246         if self.__data is None:
00247             self.__meta, self.__data = wikiutil.get_processing_instructions(self.body)
00248         return self.__data
00249     data = property(fget=get_data) # content (lower part of page text)
00250 
00251     def get_pi(self):
00252         if self.__pi is None:
00253             self.__pi = self.parse_processing_instructions()
00254         return self.__pi
00255     pi = property(fget=get_pi) # processed meta stuff
00256 
00257     def getlines(self):
00258         """ Return a list of all lines in body.
00259 
00260         @rtype: list
00261         @return: list of strs body_lines
00262         """
00263         return self.body.split('\n')
00264 
00265     def get_raw_body(self):
00266         """ Load the raw markup from the page file.
00267 
00268         @rtype: unicode
00269         @return: raw page contents of this page, unicode
00270         """
00271         return self.body
00272 
00273     def get_raw_body_str(self):
00274         """ Returns the raw markup from the page file, as a string.
00275 
00276         @rtype: str
00277         @return: raw page contents of this page, utf-8-encoded
00278         """
00279         return self.body.encode("utf-8")
00280 
00281     def set_raw_body(self, body, modified=0):
00282         """ Set the raw body text (prevents loading from disk).
00283 
00284         TODO: this should not be a public function, as Page is immutable.
00285 
00286         @param body: raw body text
00287         @param modified: 1 means that we internally modified the raw text and
00288             that it is not in sync with the page file on disk.  This is
00289             used e.g. by PageEditor when previewing the page.
00290         """
00291         self.body = body
00292         self.__body_modified = modified
00293 
00294     def get_current_from_pagedir(self, pagedir):
00295         """ Get the current revision number from an arbitrary pagedir.
00296             Does not modify page object's state, uncached, direct disk access.
00297             @param pagedir: the pagedir with the 'current' file to read
00298             @return: int currentrev
00299         """
00300         revfilename = os.path.join(pagedir, 'current')
00301         try:
00302             revfile = file(revfilename)
00303             revstr = revfile.read().strip()
00304             revfile.close()
00305             rev = int(revstr)
00306         except:
00307             rev = 99999999 # XXX do some better error handling
00308         return rev
00309 
00310     def get_rev_dir(self, pagedir, rev=0):
00311         """ Get a revision of a page from an arbitrary pagedir.
00312 
00313         Does not modify page object's state, uncached, direct disk access.
00314 
00315         @param pagedir: the path to the page storage area
00316         @param rev: int revision to get (default is 0 and means the current
00317                     revision (in this case, the real revint is returned)
00318         @return: (str path to file of the revision,
00319                   int realrevint,
00320                   bool exists)
00321         """
00322         if rev == 0:
00323             rev = self.get_current_from_pagedir(pagedir)
00324 
00325         revstr = '%08d' % rev
00326         pagefile = os.path.join(pagedir, 'revisions', revstr)
00327         if rev != 99999999:
00328             exists = os.path.exists(pagefile)
00329             if exists:
00330                 self._setRealPageName(pagedir)
00331         else:
00332             exists = False
00333         return pagefile, rev, exists
00334 
00335     def _setRealPageName(self, pagedir):
00336         """ Set page_name to the real case of page name
00337 
00338         On case insensitive file system, "pagename" exists even if the
00339         real page name is "PageName" or "PAGENAME". This leads to
00340         confusion in urls, links and logs.
00341         See MoinMoinBugs/MacHfsPlusCaseInsensitive
00342 
00343         Correct the case of the page name. Elements created from the
00344         page name in reset() are not updated because it's too messy, and
00345         this fix seems to be enough for now.
00346 
00347         Problems to fix later:
00348 
00349          - ["helponnavigation"] link to HelpOnNavigation but not
00350            considered as backlink.
00351 
00352         @param pagedir: the storage path to the page directory
00353         """
00354         if self._text_filename_force is None:
00355             # we only do this for normal pages, but not for the MissingPage,
00356             # because the code below is wrong in that case
00357             realPath = util.filesys.realPathCase(pagedir)
00358             if realPath is not None:
00359                 realPath = wikiutil.unquoteWikiname(realPath)
00360                 self.page_name = realPath[-len(self.page_name):]
00361 
00362     def get_rev(self, use_underlay=-1, rev=0):
00363         """ Get information about a revision.
00364 
00365         filename, number, and (existance test) of this page and revision.
00366 
00367         @param use_underlay: -1 == auto, 0 == normal, 1 == underlay
00368         @param rev: int revision to get (default is 0 and means the current
00369                     revision (in this case, the real revint is returned)
00370         @return: (str path to current revision of page,
00371                   int realrevint,
00372                   bool exists)
00373         """
00374         def layername(underlay):
00375             if underlay == -1:
00376                 return 'layer_auto'
00377             elif underlay == 0:
00378                 return 'layer_normal'
00379             else: # 1
00380                 return 'layer_underlay'
00381 
00382         request = self.request
00383         cache_name = self.page_name
00384         cache_key = layername(use_underlay)
00385         if self._text_filename_force is None:
00386             cache_data = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
00387             if cache_data and (rev == 0 or rev == cache_data[1]):
00388                 # we got the correct rev data from the cache
00389                 #logging.debug("got data from cache: %r %r %r" % cache_data)
00390                 return cache_data
00391 
00392         # Figure out if we should use underlay or not, if needed.
00393         if use_underlay == -1:
00394             underlay, pagedir = self.getPageStatus(check_create=0)
00395         else:
00396             underlay, pagedir = use_underlay, self._pagepath[use_underlay]
00397 
00398         # Find current revision, if automatic selection is requested.
00399         if rev == 0:
00400             realrev = self.get_current_from_pagedir(pagedir)
00401         else:
00402             realrev = rev
00403 
00404         data = self.get_rev_dir(pagedir, realrev)
00405         if rev == 0 and self._text_filename_force is None:
00406             # we only save the current rev to the cache
00407             request.cfg.cache.meta.putItem(request, cache_name, cache_key, data)
00408 
00409         return data
00410 
00411     def current_rev(self):
00412         """ Return number of current revision.
00413 
00414         This is the same as get_rev()[1].
00415 
00416         @return: int revision
00417         """
00418         pagefile, rev, exists = self.get_rev()
00419         return rev
00420 
00421     def get_real_rev(self):
00422         """ Returns the real revision number of this page.
00423             A rev==0 is translated to the current revision.
00424 
00425         @returns: revision number > 0
00426         @rtype: int
00427         """
00428         if self.rev == 0:
00429             return self.current_rev()
00430         return self.rev
00431 
00432     def getPageBasePath(self, use_underlay=-1):
00433         """ Get full path to a page-specific storage area. `args` can
00434             contain additional path components that are added to the base path.
00435 
00436         @param use_underlay: force using a specific pagedir, default -1
00437                                 -1 = automatically choose page dir
00438                                 1 = use underlay page dir
00439                                 0 = use standard page dir
00440         @rtype: string
00441         @return: int underlay,
00442                  str the full path to the storage area
00443         """
00444         standardpath, underlaypath = self._pagepath
00445         if underlaypath is None:
00446             use_underlay = 0
00447 
00448         if use_underlay == -1: # automatic
00449             if self._underlay is None:
00450                 underlay, path = 0, standardpath
00451                 pagefile, rev, exists = self.get_rev(use_underlay=0)
00452                 if not exists:
00453                     pagefile, rev, exists = self.get_rev(use_underlay=1)
00454                     if exists:
00455                         underlay, path = 1, underlaypath
00456                 self._underlay = underlay
00457             else:
00458                 underlay = self._underlay
00459                 path = self._pagepath[underlay]
00460         else: # normal or underlay
00461             underlay, path = use_underlay, self._pagepath[use_underlay]
00462 
00463         return underlay, path
00464 
00465     def getPageStatus(self, *args, **kw):
00466         """ Get full path to a page-specific storage area. `args` can
00467             contain additional path components that are added to the base path.
00468 
00469         @param args: additional path components
00470         @keyword use_underlay: force using a specific pagedir, default '-1'
00471                                 -1 = automatically choose page dir
00472                                 1 = use underlay page dir
00473                                 0 = use standard page dir
00474         @keyword check_create: if true, ensures that the path requested really exists
00475                                (if it doesn't, create all directories automatically).
00476                                (default true)
00477         @keyword isfile: is the last component in args a filename? (default is false)
00478         @rtype: string
00479         @return: (int underlay (1 if using underlay, 0 otherwise),
00480                   str the full path to the storage area )
00481         """
00482         check_create = kw.get('check_create', 1)
00483         isfile = kw.get('isfile', 0)
00484         use_underlay = kw.get('use_underlay', -1)
00485         underlay, path = self.getPageBasePath(use_underlay)
00486         fullpath = os.path.join(*((path, ) + args))
00487         if check_create:
00488             if isfile:
00489                 dirname, filename = os.path.split(fullpath)
00490             else:
00491                 dirname = fullpath
00492             try:
00493                 os.makedirs(dirname)
00494             except OSError, err:
00495                 if not os.path.exists(dirname):
00496                     raise err
00497         return underlay, fullpath
00498 
00499     def getPagePath(self, *args, **kw):
00500         """ Return path to the page storage area. """
00501         return self.getPageStatus(*args, **kw)[1]
00502 
00503     def _text_filename(self, **kw):
00504         """ The name of the page file, possibly of an older page.
00505 
00506         @keyword rev: page revision, overriding self.rev
00507         @rtype: string
00508         @return: complete filename (including path) to this page
00509         """
00510         if self._text_filename_force is not None:
00511             return self._text_filename_force
00512         rev = kw.get('rev', 0)
00513         if not rev and self.rev:
00514             rev = self.rev
00515         fname, rev, exists = self.get_rev(-1, rev)
00516         return fname
00517 
00518     def editlog_entry(self):
00519         """ Return the edit-log entry for this Page object (can be an old revision).
00520         """
00521         request = self.request
00522         use_cache = self.rev == 0 # use the cache for current rev
00523         if use_cache:
00524             cache_name, cache_key = self.page_name, 'lastlog'
00525             entry = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
00526         else:
00527             entry = None
00528         if entry is None:
00529             from MoinMoin.logfile import editlog
00530             wanted_rev = "%08d" % self.get_real_rev()
00531             edit_log = editlog.EditLog(request, rootpagename=self.page_name)
00532             for entry in edit_log.reverse():
00533                 if entry.rev == wanted_rev:
00534                     break
00535             else:
00536                 entry = () # don't use None
00537             if use_cache:
00538                 request.cfg.cache.meta.putItem(request, cache_name, cache_key, entry)
00539         return entry
00540 
00541     def edit_info(self):
00542         """ Return timestamp/editor info for this Page object (can be an old revision).
00543 
00544             Note: if you ask about a deleted revision, it will report timestamp and editor
00545                   for the delete action (in the edit-log, this is just a SAVE).
00546 
00547         This is used by MoinMoin/xmlrpc/__init__.py.
00548 
00549         @rtype: dict
00550         @return: timestamp and editor information
00551         """
00552         line = self.editlog_entry()
00553         if line:
00554             editordata = line.getInterwikiEditorData(self.request)
00555             if editordata[0] == 'interwiki':
00556                 editor = "%s:%s" % editordata[1]
00557             else:
00558                 editor = editordata[1] # ip or email or anon
00559             result = {
00560                 'timestamp': line.ed_time_usecs,
00561                 'editor': editor,
00562             }
00563             del line
00564         else:
00565             result = {}
00566         return result
00567 
00568     def last_edit(self, request):
00569         # XXX usage of last_edit is DEPRECATED - use edit_info()
00570         if not self.exists(): # XXX doesn't make much sense, but still kept
00571             return None       # XXX here until we remove last_edit()
00572         return self.edit_info()
00573 
00574     def lastEditInfo(self, request=None):
00575         """ Return the last edit info.
00576 
00577             Note: if you ask about a deleted revision, it will report timestamp and editor
00578                   for the delete action (in the edit-log, this is just a SAVE).
00579 
00580         @param request: the request object (DEPRECATED, unused)
00581         @rtype: dict
00582         @return: timestamp and editor information
00583         """
00584         log = self.editlog_entry()
00585         if log:
00586             request = self.request
00587             editor = log.getEditor(request)
00588             time = wikiutil.version2timestamp(log.ed_time_usecs)
00589             time = request.user.getFormattedDateTime(time) # Use user time format
00590             result = {'editor': editor, 'time': time}
00591             del log
00592         else:
00593             result = {}
00594         return result
00595 
00596     def isWritable(self):
00597         """ Can this page be changed?
00598 
00599         @rtype: bool
00600         @return: true, if this page is writable or does not exist
00601         """
00602         return os.access(self._text_filename(), os.W_OK) or not self.exists()
00603 
00604     def isUnderlayPage(self, includeDeleted=True):
00605         """ Does this page live in the underlay dir?
00606 
00607         Return true even if the data dir has a copy of this page. To
00608         check for underlay only page, use ifUnderlayPage() and not
00609         isStandardPage()
00610 
00611         @param includeDeleted: include deleted pages
00612         @rtype: bool
00613         @return: true if page lives in the underlay dir
00614         """
00615         return self.exists(domain='underlay', includeDeleted=includeDeleted)
00616 
00617     def isStandardPage(self, includeDeleted=True):
00618         """ Does this page live in the data dir?
00619 
00620         Return true even if this is a copy of an underlay page. To check
00621         for data only page, use isStandardPage() and not isUnderlayPage().
00622 
00623         @param includeDeleted: include deleted pages
00624         @rtype: bool
00625         @return: true if page lives in the data dir
00626         """
00627         return self.exists(domain='standard', includeDeleted=includeDeleted)
00628 
00629     def exists(self, rev=0, domain=None, includeDeleted=False):
00630         """ Does this page exist?
00631 
00632         This is the lower level method for checking page existence. Use
00633         the higher level methods isUnderlayPage and isStandardPage for
00634         cleaner code.
00635 
00636         @param rev: revision to look for. Default: check current
00637         @param domain: where to look for the page. Default: look in all,
00638                        available values: 'underlay', 'standard'
00639         @param includeDeleted: ignore page state, just check its pagedir
00640         @rtype: bool
00641         @return: true, if page exists
00642         """
00643         # Edge cases
00644         if domain == 'underlay' and not self.request.cfg.data_underlay_dir:
00645             return False
00646 
00647         if includeDeleted:
00648             # Look for page directory, ignore page state
00649             if domain is None:
00650                 checklist = [0, 1]
00651             else:
00652                 checklist = [domain == 'underlay']
00653             for use_underlay in checklist:
00654                 pagedir = self.getPagePath(use_underlay=use_underlay, check_create=0)
00655                 if os.path.exists(pagedir):
00656                     return True
00657             return False
00658         else:
00659             # Look for non-deleted pages only, using get_rev
00660             if not rev and self.rev:
00661                 rev = self.rev
00662 
00663             if domain is None:
00664                 use_underlay = -1
00665             else:
00666                 use_underlay = domain == 'underlay'
00667             d, d, exists = self.get_rev(use_underlay, rev)
00668             return exists
00669 
00670     def size(self, rev=0):
00671         """ Get Page size.
00672 
00673         @rtype: int
00674         @return: page size, 0 for non-existent pages.
00675         """
00676         if rev == self.rev: # same revision as self
00677             if self.__body is not None:
00678                 return len(self.__body)
00679 
00680         try:
00681             return os.path.getsize(self._text_filename(rev=rev))
00682         except EnvironmentError, e:
00683             import errno
00684             if e.errno == errno.ENOENT:
00685                 return 0
00686             raise
00687 
00688     def mtime_usecs(self):
00689         """ Get modification timestamp of this page (from edit-log, can be for an old revision).
00690 
00691         @rtype: int
00692         @return: mtime of page (or 0 if page / edit-log entry does not exist)
00693         """
00694         entry = self.editlog_entry()
00695         return entry and entry.ed_time_usecs or 0
00696 
00697     def mtime_printable(self, request):
00698         """ Get printable (as per user's preferences) modification timestamp of this page.
00699 
00700         @rtype: string
00701         @return: formatted string with mtime of page
00702         """
00703         t = self.mtime_usecs()
00704         if not t:
00705             result = "0" # TODO: i18n, "Ever", "Beginning of time"...?
00706         else:
00707             result = request.user.getFormattedDateTime(
00708                 wikiutil.version2timestamp(t))
00709         return result
00710 
00711     def split_title(self, force=0):
00712         """ Return a string with the page name split by spaces, if the user wants that.
00713 
00714         @param force: if != 0, then force splitting the page_name
00715         @rtype: unicode
00716         @return: pagename of this page, splitted into space separated words
00717         """
00718         request = self.request
00719         if not force and not request.user.wikiname_add_spaces:
00720             return self.page_name
00721 
00722         # look for the end of words and the start of a new word,
00723         # and insert a space there
00724         splitted = config.split_regex.sub(r'\1 \2', self.page_name)
00725         return splitted
00726 
00727     def url(self, request, querystr=None, anchor=None, relative=False, **kw):
00728         """ Return complete URL for this page, including scriptname.
00729             The URL is NOT escaped, if you write it to HTML, use wikiutil.escape
00730             (at least if you have a querystr, to escape the & chars).
00731 
00732         @param request: the request object
00733         @param querystr: the query string to add after a "?" after the url
00734             (str or dict, see wikiutil.makeQueryString)
00735         @param anchor: if specified, make a link to this anchor
00736         @param relative: create a relative link (default: False), note that this
00737                          changed in 1.7, in 1.6, the default was True.
00738         @rtype: str
00739         @return: complete url of this page, including scriptname
00740         """
00741         assert(isinstance(anchor, (type(None), str, unicode)))
00742         # Create url, excluding scriptname
00743         url = wikiutil.quoteWikinameURL(self.page_name)
00744         if querystr:
00745             if isinstance(querystr, dict):
00746                 action = querystr.get('action', None)
00747             else:
00748                 action = None # we don't support getting the action out of a str
00749 
00750             querystr = wikiutil.makeQueryString(querystr)
00751 
00752             # make action URLs denyable by robots.txt:
00753             if action is not None and request.cfg.url_prefix_action is not None:
00754                 url = "%s/%s/%s" % (request.cfg.url_prefix_action, action, url)
00755             url = '%s?%s' % (url, querystr)
00756 
00757         if not relative:
00758             url = '%s/%s' % (request.script_root, url)
00759 
00760         # Add anchor
00761         if anchor:
00762             fmt = getattr(self, 'formatter', request.html_formatter)
00763             if fmt:
00764                 anchor = fmt.sanitize_to_id(anchor)
00765             url = "%s#%s" % (url, anchor)
00766 
00767         return url
00768 
00769     def link_to_raw(self, request, text, querystr=None, anchor=None, **kw):
00770         """ core functionality of link_to, without the magic """
00771         url = self.url(request, querystr, anchor=anchor, relative=True) # scriptName is added by link_tag
00772         # escaping is done by link_tag -> formatter.url -> ._open()
00773         link = wikiutil.link_tag(request, url, text,
00774                                  formatter=getattr(self, 'formatter', None), **kw)
00775         return link
00776 
00777     def link_to(self, request, text=None, querystr=None, anchor=None, **kw):
00778         """ Return HTML markup that links to this page.
00779 
00780         See wikiutil.link_tag() for possible keyword parameters.
00781 
00782         @param request: the request object
00783         @param text: inner text of the link - it gets automatically escaped
00784         @param querystr: the query string to add after a "?" after the url
00785         @param anchor: if specified, make a link to this anchor
00786         @keyword on: opening/closing tag only
00787         @keyword attachment_indicator: if 1, add attachment indicator after link tag
00788         @keyword css_class: css class to use
00789         @rtype: string
00790         @return: formatted link
00791         """
00792         if not text:
00793             text = self.split_title()
00794         text = wikiutil.escape(text)
00795 
00796         # Add css class for non existing page
00797         if not self.exists():
00798             kw['css_class'] = 'nonexistent'
00799 
00800         attachment_indicator = kw.get('attachment_indicator')
00801         if attachment_indicator is None:
00802             attachment_indicator = 0 # default is off
00803         else:
00804             del kw['attachment_indicator'] # avoid having this as <a> tag attribute
00805 
00806         link = self.link_to_raw(request, text, querystr, anchor, **kw)
00807 
00808         # Create a link to attachments if any exist
00809         if attachment_indicator:
00810             from MoinMoin.action import AttachFile
00811             link += AttachFile.getIndicator(request, self.page_name)
00812 
00813         return link
00814 
00815     def getSubscribers(self, request, **kw):
00816         """ Get all subscribers of this page.
00817 
00818         @param request: the request object
00819         @keyword include_self: if 1, include current user (default: 0)
00820         @keyword return_users: if 1, return user instances (default: 0)
00821         @rtype: dict
00822         @return: lists of subscribed email addresses in a dict by language key
00823         """
00824         include_self = kw.get('include_self', self.include_self)
00825         return_users = kw.get('return_users', 0)
00826 
00827         # extract categories of this page
00828         pageList = self.getCategories(request)
00829 
00830         # add current page name for list matching
00831         pageList.append(self.page_name)
00832 
00833         if self.cfg.SecurityPolicy:
00834             UserPerms = self.cfg.SecurityPolicy
00835         else:
00836             from MoinMoin.security import Default as UserPerms
00837 
00838         # get email addresses of the all wiki user which have a profile stored;
00839         # add the address only if the user has subscribed to the page and
00840         # the user is not the current editor
00841         userlist = user.getUserList(request)
00842         subscriber_list = {}
00843         for uid in userlist:
00844             if uid == request.user.id and not include_self:
00845                 continue # no self notification
00846             subscriber = user.User(request, uid)
00847 
00848             # The following tests should be ordered in order of
00849             # decreasing computation complexity, in particular
00850             # the permissions check may be expensive; see the bug
00851             # MoinMoinBugs/GetSubscribersPerformanceProblem
00852 
00853             # This is a bit wrong if return_users=1 (which implies that the caller will process
00854             # user attributes and may, for example choose to send an SMS)
00855             # So it _should_ be "not (subscriber.email and return_users)" but that breaks at the moment.
00856             if not subscriber.email:
00857                 continue # skip empty email addresses
00858 
00859             # skip people not subscribed
00860             if not subscriber.isSubscribedTo(pageList):
00861                 continue
00862 
00863             # skip people who can't read the page
00864             if not UserPerms(subscriber).read(self.page_name):
00865                 continue
00866 
00867             # add the user to the list
00868             lang = subscriber.language or request.cfg.language_default
00869             if not lang in subscriber_list:
00870                 subscriber_list[lang] = []
00871             if return_users:
00872                 subscriber_list[lang].append(subscriber)
00873             else:
00874                 subscriber_list[lang].append(subscriber.email)
00875 
00876         return subscriber_list
00877 
00878     def parse_processing_instructions(self):
00879         """ Parse page text and extract processing instructions,
00880             return a dict of PIs and the non-PI rest of the body.
00881         """
00882         from MoinMoin import i18n
00883         from MoinMoin import security
00884         request = self.request
00885         pi = {} # we collect the processing instructions here
00886 
00887         # default language from cfg
00888         pi['language'] = self.cfg.language_default or "en"
00889 
00890         body = self.body
00891         # TODO: remove this hack once we have separate metadata and can use mimetype there
00892         if body.startswith('<?xml'): # check for XML content
00893             pi['lines'] = 0
00894             pi['format'] = "xslt"
00895             pi['formatargs'] = ''
00896             pi['acl'] = security.AccessControlList(request.cfg, []) # avoid KeyError on acl check
00897             return pi
00898 
00899         meta = self.meta
00900 
00901         # default is wiki markup
00902         pi['format'] = self.cfg.default_markup or "wiki"
00903         pi['formatargs'] = ''
00904         pi['lines'] = len(meta)
00905         acl = []
00906 
00907         for verb, args in meta:
00908             if verb == "format": # markup format
00909                 format, formatargs = (args + ' ').split(' ', 1)
00910                 pi['format'] = format.lower()
00911                 pi['formatargs'] = formatargs.strip()
00912 
00913             elif verb == "acl":
00914                 acl.append(args)
00915 
00916             elif verb == "language":
00917                 # Page language. Check if args is a known moin language
00918                 if args in i18n.wikiLanguages():
00919                     pi['language'] = args
00920 
00921             elif verb == "refresh":
00922                 if self.cfg.refresh:
00923                     try:
00924                         mindelay, targetallowed = self.cfg.refresh
00925                         args = args.split()
00926                         if len(args) >= 1:
00927                             delay = max(int(args[0]), mindelay)
00928                         if len(args) >= 2:
00929                             target = args[1]
00930                         else:
00931                             target = self.page_name
00932                         if '://' in target:
00933                             if targetallowed == 'internal':
00934                                 raise ValueError
00935                             elif targetallowed == 'external':
00936                                 url = target
00937                         else:
00938                             url = Page(request, target).url(request)
00939                         pi['refresh'] = (delay, url)
00940                     except (ValueError, ):
00941                         pass
00942 
00943             elif verb == "redirect":
00944                 pi['redirect'] = args
00945 
00946             elif verb == "deprecated":
00947                 pi['deprecated'] = True
00948 
00949             elif verb == "openiduser":
00950                 if request.cfg.openid_server_enable_user:
00951                     pi['openid.user'] = args
00952 
00953             elif verb == "pragma":
00954                 try:
00955                     key, val = args.split(' ', 1)
00956                 except (ValueError, TypeError):
00957                     pass
00958                 else:
00959                     request.setPragma(key, val)
00960 
00961         pi['acl'] = security.AccessControlList(request.cfg, acl)
00962         return pi
00963 
00964     def send_raw(self, content_disposition=None, mimetype=None):
00965         """ Output the raw page data (action=raw).
00966             With no content_disposition, the browser usually just displays the
00967             data on the screen, with content_disposition='attachment', it will
00968             offer a dialogue to save it to disk (used by Save action).
00969             Supplied mimetype overrides default text/plain.
00970         """
00971         request = self.request
00972         request.mimetype = mimetype or 'text/plain'
00973         if self.exists():
00974             # use the correct last-modified value from the on-disk file
00975             # to ensure cacheability where supported. Because we are sending
00976             # RAW (file) content, the file mtime is correct as Last-Modified header.
00977             request.status_code = 200
00978             request.last_modified = os.path.getmtime(self._text_filename())
00979             text = self.encodeTextMimeType(self.body)
00980             #request.setHttpHeader("Content-Length: %d" % len(text))  # XXX WRONG! text is unicode obj, but we send utf-8!
00981             if content_disposition:
00982                 # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
00983                 # There is no solution that is compatible to IE except stripping non-ascii chars
00984                 filename_enc = "%s.txt" % self.page_name.encode(config.charset)
00985                 dispo_string = '%s; filename="%s"' % (content_disposition, filename_enc)
00986                 request.headers.add('Content-Disposition', dispo_string)
00987         else:
00988             request.status_code = 404
00989             text = u"Page %s not found." % self.page_name
00990 
00991         request.write(text)
00992 
00993     def send_page(self, **keywords):
00994         """ Output the formatted page.
00995 
00996         TODO: "kill send_page(), quick" (since 2002 :)
00997 
00998         @keyword content_only: if 1, omit http headers, page header and footer
00999         @keyword content_id: set the id of the enclosing div
01000         @keyword count_hit: if 1, add an event to the log
01001         @keyword send_special: if True, this is a special page send
01002         @keyword omit_footnotes: if True, do not send footnotes (used by include macro)
01003         """
01004         request = self.request
01005         _ = request.getText
01006         request.clock.start('send_page')
01007         emit_headers = keywords.get('emit_headers', 1)
01008         content_only = keywords.get('content_only', 0)
01009         omit_footnotes = keywords.get('omit_footnotes', 0)
01010         content_id = keywords.get('content_id', 'content')
01011         do_cache = keywords.get('do_cache', 1)
01012         send_special = keywords.get('send_special', False)
01013         print_mode = keywords.get('print_mode', 0)
01014         if print_mode:
01015             media = request.values.get('media', 'print')
01016         else:
01017             media = 'screen'
01018         self.hilite_re = (keywords.get('hilite_re') or
01019                           request.values.get('highlight'))
01020 
01021         # count hit?
01022         if keywords.get('count_hit', 0):
01023             eventlog.EventLog(request).add(request, 'VIEWPAGE', {'pagename': self.page_name})
01024 
01025         # load the text
01026         body = self.data
01027         pi = self.pi
01028 
01029         if 'redirect' in pi and not (
01030             'action' in request.values or 'redirect' in request.values or content_only):
01031             # redirect to another page
01032             # note that by including "action=show", we prevent endless looping
01033             # (see code in "request") or any cascaded redirection
01034             pagename, anchor = wikiutil.split_anchor(pi['redirect'])
01035             redirect_url = Page(request, pagename).url(request,
01036                                                        querystr={'action': 'show', 'redirect': self.page_name, },
01037                                                        anchor=anchor)
01038             request.http_redirect(redirect_url, code=301)
01039             return
01040 
01041         # if necessary, load the formatter
01042         if self.default_formatter:
01043             from MoinMoin.formatter.text_html import Formatter
01044             self.formatter = Formatter(request, store_pagelinks=1)
01045         elif not self.formatter:
01046             Formatter = wikiutil.searchAndImportPlugin(request.cfg, "formatter", self.output_mimetype)
01047             self.formatter = Formatter(request)
01048 
01049         # save formatter
01050         no_formatter = object()
01051         old_formatter = getattr(request, "formatter", no_formatter)
01052         request.formatter = self.formatter
01053 
01054         self.formatter.setPage(self)
01055         if self.hilite_re:
01056             try:
01057                 self.formatter.set_highlight_re(self.hilite_re)
01058             except re.error, err:
01059                 request.theme.add_msg(_('Invalid highlighting regular expression "%(regex)s": %(error)s') % {
01060                                           'regex': self.hilite_re,
01061                                           'error': str(err),
01062                                       }, "warning")
01063                 self.hilite_re = None
01064 
01065         if 'deprecated' in pi:
01066             # deprecated page, append last backup version to current contents
01067             # (which should be a short reason why the page is deprecated)
01068             request.theme.add_msg(_('The backed up content of this page is deprecated and will rank lower in search results!'), "warning")
01069 
01070             revisions = self.getRevList()
01071             if len(revisions) >= 2: # XXX shouldn't that be ever the case!? Looks like not.
01072                 oldpage = Page(request, self.page_name, rev=revisions[1])
01073                 body += oldpage.get_raw_body()
01074                 del oldpage
01075 
01076         lang = self.pi.get('language', request.cfg.language_default)
01077         request.setContentLanguage(lang)
01078 
01079         # start document output
01080         page_exists = self.exists()
01081         if not content_only:
01082             if emit_headers:
01083                 request.content_type = "%s; charset=%s" % (self.output_mimetype, self.output_charset)
01084                 if page_exists:
01085                     if not request.user.may.read(self.page_name):
01086                         request.status_code = 403
01087                     else:
01088                         request.status_code = 200
01089                     if not request.cacheable:
01090                         # use "nocache" headers if we're using a method that is not simply "display"
01091                         request.disableHttpCaching(level=2)
01092                     elif request.user.valid:
01093                         # use nocache headers if a user is logged in (which triggers personalisation features)
01094                         request.disableHttpCaching(level=1)
01095                     else:
01096                         # TODO: we need to know if a page generates dynamic content -
01097                         # if it does, we must not use the page file mtime as last modified value
01098                         # The following code is commented because it is incorrect for dynamic pages:
01099                         #lastmod = os.path.getmtime(self._text_filename())
01100                         #request.setHttpHeader("Last-Modified: %s" % util.timefuncs.formathttpdate(lastmod))
01101                         pass
01102                 else:
01103                     request.status_code = 404
01104 
01105             if not page_exists and self.request.isSpiderAgent:
01106                 # don't send any 404 content to bots
01107                 return
01108 
01109             request.write(self.formatter.startDocument(self.page_name))
01110 
01111             # send the page header
01112             if self.default_formatter:
01113                 if self.rev:
01114                     request.theme.add_msg("<strong>%s</strong><br>" % (
01115                         _('Revision %(rev)d as of %(date)s') % {
01116                             'rev': self.rev,
01117                             'date': self.mtime_printable(request)
01118                         }), "info")
01119 
01120                 # This redirect message is very annoying.
01121                 # Less annoying now without the warning sign.
01122                 if 'redirect' in request.values:
01123                     redir = request.values['redirect']
01124                     request.theme.add_msg('<strong>%s</strong><br>' % (
01125                         _('Redirected from page "%(page)s"') % {'page':
01126                             wikiutil.link_tag(request, wikiutil.quoteWikinameURL(redir) + "?action=show", self.formatter.text(redir))}), "info")
01127                 if 'redirect' in pi:
01128                     request.theme.add_msg('<strong>%s</strong><br>' % (
01129                         _('This page redirects to page "%(page)s"') % {'page': wikiutil.escape(pi['redirect'])}), "info")
01130 
01131                 # Page trail
01132                 trail = None
01133                 if not print_mode:
01134                     request.user.addTrail(self)
01135                     trail = request.user.getTrail()
01136 
01137                 title = self.split_title()
01138 
01139                 html_head = ''
01140                 if request.cfg.openid_server_enabled:
01141                     openid_username = self.page_name
01142                     userid = user.getUserId(request, openid_username)
01143 
01144                     if userid is None and 'openid.user' in self.pi:
01145                         openid_username = self.pi['openid.user']
01146                         userid = user.getUserId(request, openid_username)
01147 
01148                     openid_group_name = request.cfg.openid_server_restricted_users_group
01149                     if userid is not None and not openid_group_name or \
01150                             (openid_group_name in request.groups and openid_username in request.groups[openid_group_name]):
01151                         html_head = '<link rel="openid2.provider" href="%s">' % \
01152                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
01153                                                                                 querystr={'action': 'serveopenid'})), True)
01154                         html_head += '<link rel="openid.server" href="%s">' % \
01155                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
01156                                                                                 querystr={'action': 'serveopenid'})), True)
01157                         html_head += '<meta http-equiv="x-xrds-location" content="%s">' % \
01158                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
01159                                                                                 querystr={'action': 'serveopenid', 'yadis': 'ep'})), True)
01160                     elif self.page_name == request.cfg.page_front_page:
01161                         html_head = '<meta http-equiv="x-xrds-location" content="%s">' % \
01162                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
01163                                                                                 querystr={'action': 'serveopenid', 'yadis': 'idp'})), True)
01164 
01165                 request.theme.send_title(title, page=self,
01166                                     print_mode=print_mode,
01167                                     media=media, pi_refresh=pi.get('refresh'),
01168                                     allow_doubleclick=1, trail=trail,
01169                                     html_head=html_head,
01170                                     )
01171 
01172         # special pages handling, including denying access
01173         special = None
01174 
01175         if not send_special:
01176             if not page_exists and not body:
01177                 special = 'missing'
01178             elif not request.user.may.read(self.page_name):
01179                 special = 'denied'
01180 
01181             # if we have a special page, output it, unless
01182             #  - we should only output content (this is for say the pagelinks formatter)
01183             #  - we have a non-default formatter
01184             if special and not content_only and self.default_formatter:
01185                 self._specialPageText(request, special) # this recursively calls send_page
01186 
01187         # if we didn't short-cut to a special page, output this page
01188         if not special:
01189             # start wiki content div
01190             request.write(self.formatter.startContent(content_id))
01191 
01192             # parse the text and send the page content
01193             self.send_page_content(request, body,
01194                                    format=pi['format'],
01195                                    format_args=pi['formatargs'],
01196                                    do_cache=do_cache,
01197                                    start_line=pi['lines'])
01198 
01199             # check for pending footnotes
01200             if getattr(request, 'footnotes', None) and not omit_footnotes:
01201                 from MoinMoin.macro.FootNote import emit_footnotes
01202                 request.write(emit_footnotes(request, self.formatter))
01203 
01204             # end wiki content div
01205             request.write(self.formatter.endContent())
01206 
01207         # end document output
01208         if not content_only:
01209             # send the page footer
01210             if self.default_formatter:
01211                 request.theme.send_footer(self.page_name, print_mode=print_mode)
01212 
01213             request.write(self.formatter.endDocument())
01214 
01215         request.clock.stop('send_page')
01216         if not content_only and self.default_formatter:
01217             request.theme.send_closing_html()
01218 
01219         # cache the pagelinks
01220         if do_cache and self.default_formatter and page_exists:
01221             cache = caching.CacheEntry(request, self, 'pagelinks', scope='item', use_pickle=True)
01222             if cache.needsUpdate(self._text_filename()):
01223                 links = self.formatter.pagelinks
01224                 cache.update(links)
01225 
01226         # restore old formatter (hopefully we dont throw any exception that is catched again)
01227         if old_formatter is no_formatter:
01228             del request.formatter
01229         else:
01230             request.formatter = old_formatter
01231 
01232 
01233     def getFormatterName(self):
01234         """ Return a formatter name as used in the caching system
01235 
01236         @rtype: string
01237         @return: formatter name as used in caching
01238         """
01239         if not hasattr(self, 'formatter') or self.formatter is None:
01240             return ''
01241         module = self.formatter.__module__
01242         return module[module.rfind('.') + 1:]
01243 
01244     def canUseCache(self, parser=None):
01245         """ Is caching available for this request?
01246 
01247         This make sure we can try to use the caching system for this
01248         request, but it does not make sure that this will
01249         succeed. Themes can use this to decide if a Refresh action
01250         should be displayed.
01251 
01252         @param parser: the parser used to render the page
01253         @rtype: bool
01254         @return: if this page can use caching
01255         """
01256         if (not self.rev and
01257             not self.hilite_re and
01258             not self.__body_modified and
01259             self.getFormatterName() in self.cfg.caching_formats):
01260             # Everything is fine, now check the parser:
01261             if parser is None:
01262                 parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", self.pi['format'])
01263             return getattr(parser, 'caching', False)
01264         return False
01265 
01266     def send_page_content(self, request, body, format='wiki', format_args='', do_cache=1, **kw):
01267         """ Output the formatted wiki page, using caching if possible
01268 
01269         @param request: the request object
01270         @param body: text of the wiki page
01271         @param format: format of content, default 'wiki'
01272         @param format_args: #format arguments, used by some parsers
01273         @param do_cache: if True, use cached content
01274         """
01275         request.clock.start('send_page_content')
01276         # Load the parser
01277         Parser = wikiutil.searchAndImportPlugin(request.cfg, "parser", format)
01278         parser = Parser(body, request, format_args=format_args, **kw)
01279 
01280         if not (do_cache and self.canUseCache(Parser)):
01281             self.format(parser)
01282         else:
01283             try:
01284                 code = self.loadCache(request)
01285                 self.execute(request, parser, code)
01286             except Exception, e:
01287                 if not is_cache_exception(e):
01288                     raise
01289                 try:
01290                     code = self.makeCache(request, parser)
01291                     self.execute(request, parser, code)
01292                 except Exception, e:
01293                     if not is_cache_exception(e):
01294                         raise
01295                     logging.error('page cache failed after creation')
01296                     self.format(parser)
01297 
01298         request.clock.stop('send_page_content')
01299 
01300     def format(self, parser):
01301         """ Format and write page content without caching """
01302         parser.format(self.formatter)
01303 
01304     def execute(self, request, parser, code):
01305         """ Write page content by executing cache code """
01306         formatter = self.formatter
01307         request.clock.start("Page.execute")
01308         try:
01309             from MoinMoin.macro import Macro
01310             macro_obj = Macro(parser)
01311             # Fix __file__ when running from a zip package
01312             import MoinMoin
01313             if hasattr(MoinMoin, '__loader__'):
01314                 __file__ = os.path.join(MoinMoin.__loader__.archive, 'dummy')
01315             try:
01316                 exec code
01317             except "CacheNeedsUpdate": # convert the exception
01318                 raise Exception("CacheNeedsUpdate")
01319         finally:
01320             request.clock.stop("Page.execute")
01321 
01322     def loadCache(self, request):
01323         """ Return page content cache or raises 'CacheNeedsUpdate' """
01324         cache = caching.CacheEntry(request, self, self.getFormatterName(), scope='item')
01325         attachmentsPath = self.getPagePath('attachments', check_create=0)
01326         if cache.needsUpdate(self._text_filename(), attachmentsPath):
01327             raise Exception('CacheNeedsUpdate')
01328 
01329         import marshal
01330         try:
01331             return marshal.loads(cache.content())
01332         except (EOFError, ValueError, TypeError):
01333             # Bad marshal data, must update the cache.
01334             # See http://docs.python.org/lib/module-marshal.html
01335             raise Exception('CacheNeedsUpdate')
01336         except Exception, err:
01337             logging.info('failed to load "%s" cache: %s' %
01338                         (self.page_name, str(err)))
01339             raise Exception('CacheNeedsUpdate')
01340 
01341     def makeCache(self, request, parser):
01342         """ Format content into code, update cache and return code """
01343         import marshal
01344         from MoinMoin.formatter.text_python import Formatter
01345         formatter = Formatter(request, ["page"], self.formatter)
01346 
01347         # Save request state while formatting page
01348         saved_current_lang = request.current_lang
01349         try:
01350             text = request.redirectedOutput(parser.format, formatter)
01351         finally:
01352             request.current_lang = saved_current_lang
01353 
01354         src = formatter.assemble_code(text)
01355         code = compile(src.encode(config.charset),
01356                        self.page_name.encode(config.charset), 'exec')
01357         cache = caching.CacheEntry(request, self, self.getFormatterName(), scope='item')
01358         cache.update(marshal.dumps(code))
01359         return code
01360 
01361     def _specialPageText(self, request, special_type):
01362         """ Output the default page content for new pages.
01363 
01364         @param request: the request object
01365         """
01366         _ = request.getText
01367 
01368         if special_type == 'missing':
01369             if request.user.valid and request.user.name == self.page_name and \
01370                request.cfg.user_homewiki in ('Self', request.cfg.interwikiname):
01371                 page = wikiutil.getLocalizedPage(request, 'MissingHomePage')
01372             else:
01373                 page = wikiutil.getLocalizedPage(request, 'MissingPage')
01374 
01375             alternative_text = u"'''<<Action(action=edit, text=\"%s\")>>'''" % _('Create New Page')
01376         elif special_type == 'denied':
01377             page = wikiutil.getLocalizedPage(request, 'PermissionDeniedPage')
01378             alternative_text = u"'''%s'''" % _('You are not allowed to view this page.')
01379         else:
01380             assert False
01381 
01382         special_exists = page.exists()
01383 
01384         if special_exists:
01385             page._text_filename_force = page._text_filename()
01386         else:
01387             page.body = alternative_text
01388             logging.warn('The page "%s" could not be found. Check your'
01389                          ' underlay directory setting.' % page.page_name)
01390         page.page_name = self.page_name
01391 
01392         page.send_page(content_only=True, do_cache=not special_exists, send_special=True)
01393 
01394 
01395     def getRevList(self):
01396         """ Get a page revision list of this page, including the current version,
01397         sorted by revision number in descending order (current page first).
01398 
01399         @rtype: list of ints
01400         @return: page revisions
01401         """
01402         revisions = []
01403         if self.page_name:
01404             rev_dir = self.getPagePath('revisions', check_create=0)
01405             if os.path.isdir(rev_dir):
01406                 for rev in filesys.dclistdir(rev_dir):
01407                     try:
01408                         revint = int(rev)
01409                         revisions.append(revint)
01410                     except ValueError:
01411                         pass
01412                 revisions.sort()
01413                 revisions.reverse()
01414         return revisions
01415 
01416     def olderrevision(self, rev=0):
01417         """ Get revision of the next older page revision than rev.
01418         rev == 0 means this page objects revision (that may be an old
01419         revision already!)
01420         """
01421         if rev == 0:
01422             rev = self.rev
01423         revisions = self.getRevList()
01424         for r in revisions:
01425             if r < rev:
01426                 older = r
01427                 break
01428         return older
01429 
01430     def getPageText(self, start=0, length=None):
01431         """ Convenience function to get the page text, skipping the header
01432 
01433         @rtype: unicode
01434         @return: page text, excluding the header
01435         """
01436         if length is None:
01437             return self.data[start:]
01438         else:
01439             return self.data[start:start+length]
01440 
01441     def getPageHeader(self, start=0, length=None):
01442         """ Convenience function to get the page header
01443 
01444         @rtype: unicode
01445         @return: page header
01446         """
01447         header = ['#%s %s' % t for t in self.meta]
01448         header = '\n'.join(header)
01449         if header:
01450             if length is None:
01451                 return header[start:]
01452             else:
01453                 return header[start:start+length]
01454         return ''
01455 
01456     def getPageLinks(self, request):
01457         """ Get a list of the links on this page.
01458 
01459         @param request: the request object
01460         @rtype: list
01461         @return: page names this page links to
01462         """
01463         if self.exists():
01464             cache = caching.CacheEntry(request, self, 'pagelinks', scope='item', do_locking=False, use_pickle=True)
01465             if cache.needsUpdate(self._text_filename()):
01466                 links = self.parsePageLinks(request)
01467                 cache.update(links)
01468             else:
01469                 try:
01470                     links = cache.content()
01471                 except caching.CacheError:
01472                     links = self.parsePageLinks(request)
01473                     cache.update(links)
01474         else:
01475             links = []
01476         return links
01477 
01478     def parsePageLinks(self, request):
01479         """ Parse page links by formatting with a pagelinks formatter
01480 
01481         This is a old hack to get the pagelinks by rendering the page
01482         with send_page. We can remove this hack after factoring
01483         send_page and send_page_content into small reuseable methods.
01484 
01485         More efficient now by using special pagelinks formatter and
01486         redirecting possible output into null file.
01487         """
01488         pagename = self.page_name
01489         if request.parsePageLinks_running.get(pagename, False):
01490             #logging.debug("avoid recursion for page %r" % pagename)
01491             return [] # avoid recursion
01492 
01493         #logging.debug("running parsePageLinks for page %r" % pagename)
01494         # remember we are already running this function for this page:
01495         request.parsePageLinks_running[pagename] = True
01496 
01497         request.clock.start('parsePageLinks')
01498 
01499         class Null:
01500             def write(self, data):
01501                 pass
01502 
01503         request.redirect(Null())
01504         request.mode_getpagelinks += 1
01505         #logging.debug("mode_getpagelinks == %r" % request.mode_getpagelinks)
01506         try:
01507             try:
01508                 from MoinMoin.formatter.pagelinks import Formatter
01509                 formatter = Formatter(request, store_pagelinks=1)
01510                 page = Page(request, pagename, formatter=formatter)
01511                 page.send_page(content_only=1)
01512             except:
01513                 logging.exception("pagelinks formatter failed, traceback follows")
01514         finally:
01515             request.mode_getpagelinks -= 1
01516             #logging.debug("mode_getpagelinks == %r" % request.mode_getpagelinks)
01517             request.redirect()
01518             if hasattr(request, '_fmt_hd_counters'):
01519                 del request._fmt_hd_counters
01520             request.clock.stop('parsePageLinks')
01521         return formatter.pagelinks
01522 
01523     def getCategories(self, request):
01524         """ Get categories this page belongs to.
01525 
01526         @param request: the request object
01527         @rtype: list
01528         @return: categories this page belongs to
01529         """
01530         return wikiutil.filterCategoryPages(request, self.getPageLinks(request))
01531 
01532     def getParentPage(self):
01533         """ Return parent page or None
01534 
01535         @rtype: Page
01536         @return: parent page or None
01537         """
01538         if self.page_name:
01539             pos = self.page_name.rfind('/')
01540             if pos > 0:
01541                 parent = Page(self.request, self.page_name[:pos])
01542                 if parent.exists():
01543                     return parent
01544         return None
01545 
01546     def getACL(self, request):
01547         """ Get cached ACLs of this page.
01548 
01549         Return cached ACL or invoke parseACL and update the cache.
01550 
01551         @param request: the request object
01552         @rtype: MoinMoin.security.AccessControlList
01553         @return: ACL of this page
01554         """
01555         try:
01556             return self.__acl # for request.page, this is n-1 times used
01557         except AttributeError:
01558             # the caching here is still useful for pages != request.page,
01559             # when we have multiple page objects for the same page name.
01560             request.clock.start('getACL')
01561             # Try the cache or parse acl and update the cache
01562             currentRevision = self.current_rev()
01563             cache_name = self.page_name
01564             cache_key = 'acl'
01565             cache_data = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
01566             if cache_data is None:
01567                 aclRevision, acl = None, None
01568             else:
01569                 aclRevision, acl = cache_data
01570             #logging.debug("currrev: %r, cachedaclrev: %r" % (currentRevision, aclRevision))
01571             if aclRevision != currentRevision:
01572                 acl = self.parseACL()
01573                 if currentRevision != 99999999:
01574                     # don't use cache for non existing pages
01575                     # otherwise in the process of creating copies by filesys.copytree (PageEditor.copyPage)
01576                     # the first may test will create a cache entry with the default_acls for a non existing page
01577                     # At the time the page is created acls on that page would be ignored until the process
01578                     # is completed by adding a log entry into edit-log
01579                     cache_data = (currentRevision, acl)
01580                     request.cfg.cache.meta.putItem(request, cache_name, cache_key, cache_data)
01581             self.__acl = acl
01582             request.clock.stop('getACL')
01583             return acl
01584 
01585     def parseACL(self):
01586         """ Return ACLs parsed from the last available revision
01587 
01588         The effective ACL is always from the last revision, even if
01589         you access an older revision.
01590         """
01591         from MoinMoin import security
01592         if self.exists() and self.rev == 0:
01593             return self.pi['acl']
01594         try:
01595             lastRevision = self.getRevList()[0]
01596         except IndexError:
01597             return security.AccessControlList(self.request.cfg)
01598         if self.rev == lastRevision:
01599             return self.pi['acl']
01600 
01601         return Page(self.request, self.page_name, rev=lastRevision).parseACL()
01602 
01603     # Text format -------------------------------------------------------
01604 
01605     def encodeTextMimeType(self, text):
01606         """ Encode text from moin internal representation to text/* mime type
01607 
01608         Make sure text uses CRLF line ends, keep trailing newline.
01609 
01610         @param text: text to encode (unicode)
01611         @rtype: unicode
01612         @return: encoded text
01613         """
01614         if text:
01615             lines = text.splitlines()
01616             # Keep trailing newline
01617             if text.endswith(u'\n') and not lines[-1] == u'':
01618                 lines.append(u'')
01619             text = u'\r\n'.join(lines)
01620         return text
01621 
01622     def decodeTextMimeType(self, text):
01623         """ Decode text from text/* mime type to moin internal representation
01624 
01625         @param text: text to decode (unicode). Text must use CRLF!
01626         @rtype: unicode
01627         @return: text using internal representation
01628         """
01629         text = text.replace(u'\r', u'')
01630         return text
01631 
01632     def isConflict(self):
01633         """ Returns true if there is a known editing conflict for that page.
01634 
01635         @return: true if there is a known conflict.
01636         """
01637 
01638         cache = caching.CacheEntry(self.request, self, 'conflict', scope='item')
01639         return cache.exists()
01640 
01641     def setConflict(self, state):
01642         """ Sets the editing conflict flag.
01643 
01644         @param state: bool, true if there is a conflict.
01645         """
01646         cache = caching.CacheEntry(self.request, self, 'conflict', scope='item')
01647         if state:
01648             cache.update("") # touch it!
01649         else:
01650             cache.remove()
01651 
01652 
01653 class RootPage(Page):
01654     """ These functions were removed from the Page class to remove hierarchical
01655         page storage support until after we have a storage api (and really need it).
01656         Currently, there is only 1 instance of this class: request.rootpage
01657     """
01658     def __init__(self, request):
01659         page_name = u''
01660         Page.__init__(self, request, page_name)
01661 
01662     def getPageBasePath(self, use_underlay=0):
01663         """ Get full path to a page-specific storage area. `args` can
01664             contain additional path components that are added to the base path.
01665 
01666         @param use_underlay: force using a specific pagedir, default 0:
01667                                 1 = use underlay page dir
01668                                 0 = use standard page dir
01669                                 Note: we do NOT have special support for -1
01670                                       here, that will just behave as 0!
01671         @rtype: string
01672         @return: int underlay,
01673                  str the full path to the storage area
01674         """
01675         if self.cfg.data_underlay_dir is None:
01676             use_underlay = 0
01677 
01678         # 'auto' doesn't make sense here. maybe not even 'underlay':
01679         if use_underlay == 1:
01680             underlay, path = 1, self.cfg.data_underlay_dir
01681         # no need to check 'standard' case, we just use path in that case!
01682         else:
01683             # this is the location of the virtual root page
01684             underlay, path = 0, self.cfg.data_dir
01685 
01686         return underlay, path
01687 
01688     def getPageList(self, user=None, exists=1, filter=None, include_underlay=True, return_objects=False):
01689         """ List user readable pages under current page
01690 
01691         Currently only request.rootpage is used to list pages, but if we
01692         have true sub pages, any page can list its sub pages.
01693 
01694         The default behavior is listing all the pages readable by the
01695         current user. If you want to get a page list for another user,
01696         specify the user name.
01697 
01698         If you want to get the full page list, without user filtering,
01699         call with user="". Use this only if really needed, and do not
01700         display pages the user can not read.
01701 
01702         filter is usually compiled re match or search method, but can be
01703         any method that get a unicode argument and return bool. If you
01704         want to filter the page list, do it with this filter function,
01705         and NOT on the output of this function. page.exists() and
01706         user.may.read are very expensive, and should be done on the
01707         smallest data set.
01708 
01709         @param user: the user requesting the pages (MoinMoin.user.User)
01710         @param filter: filter function
01711         @param exists: filter existing pages
01712         @param include_underlay: determines if underlay pages are returned as well
01713         @param return_objects: lets it return a list of Page objects instead of
01714             names
01715         @rtype: list of unicode strings
01716         @return: user readable wiki page names
01717         """
01718         request = self.request
01719         request.clock.start('getPageList')
01720         # Check input
01721         if user is None:
01722             user = request.user
01723 
01724         # Get pages cache or create it
01725         cachedlist = request.cfg.cache.pagelists.getItem(request, 'all', None)
01726         if cachedlist is None:
01727             cachedlist = {}
01728             for name in self._listPages():
01729                 # Unquote file system names
01730                 pagename = wikiutil.unquoteWikiname(name)
01731 
01732                 # Filter those annoying editor backups - current moin does not create
01733                 # those pages any more, but users have them already in data/pages
01734                 # until we remove them by a mig script...
01735                 if pagename.endswith(u'/MoinEditorBackup'):
01736                     continue
01737 
01738                 cachedlist[pagename] = None
01739             request.cfg.cache.pagelists.putItem(request, 'all', None, cachedlist)
01740 
01741         if user or exists or filter or not include_underlay or return_objects:
01742             # Filter names
01743             pages = []
01744             for name in cachedlist:
01745                 # First, custom filter - exists and acl check are very
01746                 # expensive!
01747                 if filter and not filter(name):
01748                     continue
01749 
01750                 page = Page(request, name)
01751 
01752                 # Filter underlay pages
01753                 if not include_underlay and page.getPageStatus()[0]: # is an underlay page
01754                     continue
01755 
01756                 # Filter deleted pages
01757                 if exists and not page.exists():
01758                     continue
01759 
01760                 # Filter out page user may not read.
01761                 if user and not user.may.read(name):
01762                     continue
01763 
01764                 if return_objects:
01765                     pages.append(page)
01766                 else:
01767                     pages.append(name)
01768         else:
01769             pages = cachedlist.keys()
01770 
01771         request.clock.stop('getPageList')
01772         return pages
01773 
01774     def getPageDict(self, user=None, exists=1, filter=None, include_underlay=True):
01775         """ Return a dictionary of filtered page objects readable by user
01776 
01777         Invoke getPageList then create a dict from the page list. See
01778         getPageList docstring for more details.
01779 
01780         @param user: the user requesting the pages
01781         @param filter: filter function
01782         @param exists: only existing pages
01783         @rtype: dict {unicode: Page}
01784         @return: user readable pages
01785         """
01786         pages = {}
01787         for name in self.getPageList(user=user, exists=exists, filter=filter, include_underlay=include_underlay):
01788             pages[name] = Page(self.request, name)
01789         return pages
01790 
01791     def _listPages(self):
01792         """ Return a list of file system page names
01793 
01794         This is the lowest level disk access, don't use it unless you
01795         really need it.
01796 
01797         NOTE: names are returned in file system encoding, not in unicode!
01798 
01799         @rtype: dict
01800         @return: dict of page names using file system encoding
01801         """
01802         # Get pages in standard dir
01803         path = self.getPagePath('pages')
01804         pages = self._listPageInPath(path)
01805 
01806         if self.cfg.data_underlay_dir is not None:
01807             # Merge with pages from underlay
01808             path = self.getPagePath('pages', use_underlay=1)
01809             underlay = self._listPageInPath(path)
01810             pages.update(underlay)
01811 
01812         return pages
01813 
01814     def _listPageInPath(self, path):
01815         """ List page names in domain, using path
01816 
01817         This is the lowest level disk access, don't use it unless you
01818         really need it.
01819 
01820         NOTE: names are returned in file system encoding, not in unicode!
01821 
01822         @param path: directory to list (string)
01823         @rtype: dict
01824         @return: dict of page names using file system encoding
01825         """
01826         pages = {}
01827         for name in filesys.dclistdir(path):
01828             # Filter non-pages in quoted wiki names
01829             # List all pages in pages directory - assume flat namespace.
01830             # We exclude everything starting with '.' to get rid of . and ..
01831             # directory entries. If we ever create pagedirs starting with '.'
01832             # it will be with the intention to have them not show up in page
01833             # list (like .name won't show up for ls command under UNIX).
01834             # Note that a . within a wiki page name will be quoted to (2e).
01835             if not name.startswith('.'):
01836                 pages[name] = None
01837 
01838         if 'CVS' in pages:
01839             del pages['CVS'] # XXX DEPRECATED: remove this directory name just in
01840                              # case someone has the pages dir under CVS control.
01841         return pages
01842 
01843     def getPageCount(self, exists=0):
01844         """ Return page count
01845 
01846         The default value does the fastest listing, and return count of
01847         all pages, including deleted pages, ignoring acl rights.
01848 
01849         If you want to get a more accurate number, call with
01850         exists=1. This will be about 100 times slower though.
01851 
01852         @param exists: filter existing pages
01853         @rtype: int
01854         @return: number of pages
01855         """
01856         self.request.clock.start('getPageCount')
01857         if exists:
01858             # WARNING: SLOW
01859             pages = self.getPageList(user='')
01860         else:
01861             pages = self._listPages()
01862         count = len(pages)
01863         self.request.clock.stop('getPageCount')
01864 
01865         return count