Back to index

moin  1.9.0~rc2
cache.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Send a raw object from the caching system (and offer utility
00004     functions to put data into cache, calculate cache key, etc.).
00005 
00006     Sample usage
00007     ------------
00008     Assume we have a big picture (bigpic) and we want to efficiently show some
00009     thumbnail (thumbpic) for it:
00010 
00011     # first calculate a (hard to guess) cache key (this key will change if the
00012     # original data (bigpic) changes):
00013     key = cache.key(..., attachname=bigpic, ...)
00014 
00015     # check if we don't have it in cache yet
00016     if not cache.exists(..., key):
00017         # if we don't have it in cache, we need to render it - this is an
00018         # expensive operation that we want to avoid by caching:
00019         thumbpic = render_thumb(bigpic)
00020         # put expensive operation's results into cache:
00021         cache.put(..., key, thumbpic, ...)
00022 
00023     url = cache.url(..., key)
00024     html = '<img src="%s">' % url
00025 
00026     @copyright: 2008 MoinMoin:ThomasWaldmann
00027     @license: GNU GPL, see COPYING for details.
00028 """
00029 
00030 from MoinMoin import log
00031 logging = log.getLogger(__name__)
00032 
00033 # keep both imports below as they are, order is important:
00034 from MoinMoin import wikiutil
00035 import mimetypes
00036 
00037 from MoinMoin import config, caching
00038 from MoinMoin.util import filesys
00039 from MoinMoin.action import AttachFile
00040 from MoinMoin.support.python_compatibility import hmac_new
00041 
00042 action_name = __name__.split('.')[-1]
00043 
00044 # Do NOT get this directly from request.values or user would be able to read any cache!
00045 cache_arena = 'sendcache'  # just using action_name is maybe rather confusing
00046 
00047 # We maybe could use page local caching (not 'wiki' global) to have less directory entries.
00048 # Local is easier to automatically cleanup if an item changes. Global is easier to manually cleanup.
00049 # Local makes data_dir much larger, harder to backup.
00050 cache_scope = 'wiki'
00051 
00052 do_locking = False
00053 
00054 def key(request, wikiname=None, itemname=None, attachname=None, content=None, secret=None):
00055     """
00056     Calculate a (hard-to-guess) cache key.
00057 
00058     Important key properties:
00059     * The key must be hard to guess (this is because do=get does no ACL checks,
00060       so whoever got the key [e.g. from html rendering of an ACL protected wiki
00061       page], will be able to see the cached content.
00062     * The key must change if the (original) content changes. This is because
00063       ACLs on some item may change and even if somebody was allowed to see some
00064       revision of some item, it does not implicate that he is allowed to see
00065       any other revision also. There will be no harm if he can see exactly the
00066       same content again, but there could be harm if he could access a revision
00067       with different content.
00068 
00069     If content is supplied, we will calculate and return a hMAC of the content.
00070 
00071     If wikiname, itemname, attachname is given, we don't touch the content (nor do
00072     we read it ourselves from the attachment file), but we just calculate a key
00073     from the given metadata values and some metadata we get from the filesystem.
00074 
00075     Hint: if you need multiple cache objects for the same source content (e.g.
00076           thumbnails of different sizes for the same image), calculate the key
00077           only once and then add some different prefixes to it to get the final
00078           cache keys.
00079 
00080     @param request: the request object
00081     @param wikiname: the name of the wiki (if not given, will be read from cfg)
00082     @param itemname: the name of the page
00083     @param attachname: the filename of the attachment
00084     @param content: content data as unicode object (e.g. for page content or
00085                     parser section content)
00086     @param secret: secret for hMAC calculation (default: use secret from cfg)
00087     """
00088     if secret is None:
00089         secret = request.cfg.secrets['action/cache']
00090     if content:
00091         hmac_data = content
00092     elif itemname is not None and attachname is not None:
00093         wikiname = wikiname or request.cfg.interwikiname or request.cfg.siteid
00094         fuid = filesys.fuid(AttachFile.getFilename(request, itemname, attachname))
00095         hmac_data = u''.join([wikiname, itemname, attachname, repr(fuid)])
00096     else:
00097         raise AssertionError('cache_key called with unsupported parameters')
00098 
00099     hmac_data = hmac_data.encode('utf-8')
00100     key = hmac_new(secret, hmac_data).hexdigest()
00101     return key
00102 
00103 
00104 def put(request, key, data,
00105         filename=None,
00106         content_type=None,
00107         content_disposition=None,
00108         content_length=None,
00109         last_modified=None,
00110         original=None):
00111     """
00112     Put an object into the cache to send it with cache action later.
00113 
00114     @param request: the request object
00115     @param key: non-guessable key into cache (str)
00116     @param data: content data (str or open file-like obj)
00117     @param filename: filename for content-disposition header and for autodetecting
00118                      content_type (unicode, default: None)
00119     @param content_type: content-type header value (str, default: autodetect from filename)
00120     @param content_disposition: type for content-disposition header (str, default: None)
00121     @param content_length: data length for content-length header (int, default: autodetect)
00122     @param last_modified: last modified timestamp (int, default: autodetect)
00123     @param original: location of original object (default: None) - this is just written to
00124                      the metadata cache "as is" and could be used for cache cleanup,
00125                      use (wikiname, itemname, attachname or None))
00126     """
00127     import os.path
00128     from MoinMoin.util import timefuncs
00129 
00130     if filename:
00131         # make sure we just have a simple filename (without path)
00132         filename = os.path.basename(filename)
00133 
00134         if content_type is None:
00135             # try autodetect
00136             mt, enc = mimetypes.guess_type(filename)
00137             if mt:
00138                 content_type = mt
00139 
00140     if content_type is None:
00141         content_type = 'application/octet-stream'
00142 
00143     data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
00144     data_cache.update(data)
00145     content_length = content_length or data_cache.size()
00146     last_modified = last_modified or data_cache.mtime()
00147 
00148     httpdate_last_modified = timefuncs.formathttpdate(int(last_modified))
00149     headers = [('Content-Type', content_type),
00150                ('Last-Modified', httpdate_last_modified),
00151                ('Content-Length', content_length),
00152               ]
00153     if content_disposition and filename:
00154         # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
00155         # There is no solution that is compatible to IE except stripping non-ascii chars
00156         filename = filename.encode(config.charset)
00157         headers.append(('Content-Disposition', '%s; filename="%s"' % (content_disposition, filename)))
00158 
00159     meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
00160     meta_cache.update({
00161         'httpdate_last_modified': httpdate_last_modified,
00162         'last_modified': last_modified,
00163         'headers': headers,
00164         'original': original,
00165     })
00166 
00167 
00168 def exists(request, key, strict=False):
00169     """
00170     Check if a cached object for this key exists.
00171 
00172     @param request: the request object
00173     @param key: non-guessable key into cache (str)
00174     @param strict: if True, also check the data cache, not only meta (bool, default: False)
00175     @return: is object cached? (bool)
00176     """
00177     if strict:
00178         data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
00179         data_cached = data_cache.exists()
00180     else:
00181         data_cached = True  # we assume data will be there if meta is there
00182 
00183     meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
00184     meta_cached = meta_cache.exists()
00185 
00186     return meta_cached and data_cached
00187 
00188 
00189 def remove(request, key):
00190     """ delete headers/data cache for key """
00191     meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
00192     meta_cache.remove()
00193     data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
00194     data_cache.remove()
00195 
00196 
00197 def url(request, key, do='get'):
00198     """ return URL for the object cached for key """
00199     return request.href(action=action_name, do=do, key=key)
00200 
00201 def _get_headers(request, key):
00202     """ get last_modified and headers cached for key """
00203     meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
00204     meta = meta_cache.content()
00205     return meta['httpdate_last_modified'], meta['headers']
00206 
00207 
00208 def _get_datafile(request, key):
00209     """ get an open data file for the data cached for key """
00210     data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
00211     data_cache.open(mode='r')
00212     return data_cache
00213 
00214 
00215 def _do_get(request, key):
00216     """ send a complete http response with headers/data cached for key """
00217     try:
00218         last_modified, headers = _get_headers(request, key)
00219         if request.if_modified_since == last_modified:
00220             request.status_code = 304
00221         else:
00222             data_file = _get_datafile(request, key)
00223             for key, value in headers:
00224                 lkey = key.lower()
00225                 if lkey == 'content-type':
00226                     request.content_type = value
00227                 elif lkey == 'last-modified':
00228                     request.last_modified = value
00229                 elif lkey == 'content-length':
00230                     request.content_length = value
00231                 else:
00232                     request.headers.add(key, value)
00233             request.send_file(data_file)
00234     except caching.CacheError:
00235         request.status_code = 404
00236 
00237 
00238 def _do_remove(request, key):
00239     """ delete headers/data cache for key """
00240     remove(request, key)
00241 
00242 
00243 def _do(request, do, key):
00244     if do == 'get':
00245         _do_get(request, key)
00246     elif do == 'remove':
00247         _do_remove(request, key)
00248 
00249 def execute(pagename, request):
00250     do = request.values.get('do')
00251     key = request.values.get('key')
00252     _do(request, do, key)
00253