Back to index

moin  1.9.0~rc2
cache.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     werkzeug.contrib.cache
00004     ~~~~~~~~~~~~~~~~~~~~~~
00005 
00006     The main problem with dynamic Web sites is, well, they're dynamic.  Each
00007     time a user requests a page, the webserver executes a lot of code, queries
00008     the database, renders templates until the visitor gets the page he sees.
00009 
00010     This is a lot more expensive than just loading a file from the file system
00011     and sending it to the visitor.
00012 
00013     For most Web applications, this overhead isn't a big deal but once it
00014     becomes, you will be glad to have a cache system in place.
00015 
00016     How Caching Works
00017     =================
00018 
00019     Caching is pretty simple.  Basically you have a cache object lurking around
00020     somewhere that is connected to a remote cache or the file system or
00021     something else.  When the request comes in you check if the current page
00022     is already in the cache and if, you're returning it.  Otherwise you generate
00023     the page and put it into the cache.  (Or a fragment of the page, you don't
00024     have to cache the full thing)
00025 
00026     Here a simple example of how to cache a sidebar for a template::
00027 
00028         def get_sidebar(user):
00029             identifier = 'sidebar_for/user%d' % user.id
00030             value = cache.get(identifier)
00031             if value is not None:
00032                 return value
00033             value = generate_sidebar_for(user=user)
00034             cache.set(identifier, value, timeout=60 * 5)
00035             return value
00036 
00037     Creating a Cache Object
00038     =======================
00039 
00040     To create a cache object you just import the cache system of your choice
00041     from the cache module and instanciate it.  Then you can start working
00042     with that object:
00043 
00044     >>> from werkzeug.contrib.cache import SimpleCache
00045     >>> c = SimpleCache()
00046     >>> c.set("foo", "value")
00047     >>> c.get("foo")
00048     'value'
00049     >>> c.get("missing") is None
00050     True
00051 
00052     Please keep in mind that you have to create the cache and put it somewhere
00053     you have access to it (either as a module global you can import or if you
00054     put it onto your WSGI application).
00055 
00056     :copyright: (c) 2009 by the Werkzeug Team, see AUTHORS for more details.
00057     :license: BSD, see LICENSE for more details.
00058 """
00059 import os
00060 import re
00061 try:
00062     from hashlib import md5
00063 except ImportError:
00064     from md5 import new as md5
00065 from itertools import izip
00066 from time import time
00067 from cPickle import loads, dumps, load, dump, HIGHEST_PROTOCOL
00068 
00069 
00070 class BaseCache(object):
00071     """Baseclass for the cache systems.  All the cache systems implement this
00072     API or a superset of it.
00073 
00074     :param default_timeout: the default timeout that is used if no timeout is
00075                             specified on :meth:`set`.
00076     """
00077 
00078     def __init__(self, default_timeout=300):
00079         self.default_timeout = default_timeout
00080 
00081     def get(self, key):
00082         """Looks up key in the cache and returns it.  If the key does not
00083         exist `None` is returned instead.
00084 
00085         :param key: the key to be looked up.
00086         """
00087         return None
00088 
00089     def delete(self, key):
00090         """Deletes `key` from the cache.  If it does not exist in the cache
00091         nothing happens.
00092 
00093         :param key: the key to delete.
00094         """
00095         pass
00096 
00097     def get_many(self, *keys):
00098         """Returns a list of keys.  For each key a item in the list is
00099         created.  Example::
00100 
00101             foo, bar = cache.get_many("foo", "bar")
00102 
00103         If a key can't be looked up `None` is returned for that key
00104         instead.
00105 
00106         :param keys: The function accepts multiple keys as positional
00107                      arguments.
00108         """
00109         return map(self.get, keys)
00110 
00111     def get_dict(self, *keys):
00112         """Works like :meth:`get_many` but returns a dict::
00113 
00114             d = cache.get_dict("foo", "bar")
00115             foo = d["foo"]
00116             bar = d["bar"]
00117 
00118         :param keys: The function accepts multiple keys as positional
00119                      arguments.
00120         """
00121         return dict(izip(keys, self.get_many(*keys)))
00122 
00123     def set(self, key, value, timeout=None):
00124         """Adds or overrides a key in the cache.
00125 
00126         :param key: the key to set
00127         :param value: the value for the key
00128         :param timeout: the cache timeout for the key or the default
00129                         timeout if not specified.
00130         """
00131         pass
00132 
00133     def add(self, key, value, timeout=None):
00134         """Works like :meth:`set` but does not override already existing
00135         values.
00136 
00137         :param key: the key to set
00138         :param value: the value for the key
00139         :param timeout: the cache timeout for the key or the default
00140                         timeout if not specified.
00141         """
00142         pass
00143 
00144     def set_many(self, mapping, timeout=None):
00145         """Sets multiple keys and values from a dict.
00146 
00147         :param mapping: a dict with the values to set.
00148         :param timeout: the cache timeout for the key or the default
00149                         timeout if not specified.
00150         """
00151         for key, value in mapping.iteritems():
00152             self.set(key, value, timeout)
00153 
00154     def delete_many(self, *keys):
00155         """Deletes multiple keys at once.
00156 
00157         :param keys: The function accepts multiple keys as positional
00158                      arguments.
00159         """
00160         for key in keys:
00161             self.delete(key)
00162 
00163     def clear(self):
00164         """Clears the cache.  Keep in mind that not all caches support
00165         clearning of the full cache.
00166         """
00167         pass
00168 
00169     def inc(self, key, delta=1):
00170         """Increments the value of a key by `delta`.  If the key does
00171         not yet exist it is initialized with `delta`.
00172 
00173         For supporting caches this is an atomic operation.
00174 
00175         :param key: the key to increment.
00176         :param delta: the delta to add.
00177         """
00178         self.set(key, (self.get(key) or 0) + delta)
00179 
00180     def dec(self, key, delta=1):
00181         """Decrements the value of a key by `delta`.  If the key does
00182         not yet exist it is initialized with `-delta`.
00183 
00184         For supporting caches this is an atomic operation.
00185 
00186         :param key: the key to increment.
00187         :param delta: the delta to subtract.
00188         """
00189         self.set(key, (self.get(key) or 0) - delta)
00190 
00191 
00192 class NullCache(BaseCache):
00193     """A cache that doesn't cache.  This can be useful for unit testing.
00194 
00195     :param default_timeout: a dummy parameter that is ignored but exists
00196                             for API compatibility with other caches.
00197     """
00198 
00199 
00200 class SimpleCache(BaseCache):
00201     """Simple memory cache for single process environments.  This class exists
00202     mainly for the development server and is not 100% thread safe.  It tries
00203     to use as many atomic operations as possible and no locks for simplicity
00204     but it could happen under heavy load that keys are added multiple times.
00205 
00206     :param threshold: the maximum number of items the cache stores before
00207                       it starts deleting some.
00208     :param default_timeout: the default timeout that is used if no timeout is
00209                             specified on :meth:`~BaseCache.set`.
00210     """
00211 
00212     def __init__(self, threshold=500, default_timeout=300):
00213         BaseCache.__init__(self, default_timeout)
00214         self._cache = {}
00215         self.clear = self._cache.clear
00216         self._threshold = threshold
00217 
00218     def _prune(self):
00219         if len(self._cache) > self._threshold:
00220             now = time()
00221             for idx, (key, (expires, _)) in enumerate(self._cache.items()):
00222                 if expires <= now or idx % 3 == 0:
00223                     self._cache.pop(key, None)
00224 
00225     def get(self, key):
00226         now = time()
00227         expires, value = self._cache.get(key, (0, None))
00228         if expires > time():
00229             return loads(value)
00230 
00231     def set(self, key, value, timeout=None):
00232         if timeout is None:
00233             timeout = self.default_timeout
00234         self._prune()
00235         self._cache[key] = (time() + timeout, dumps(value, HIGHEST_PROTOCOL))
00236 
00237     def add(self, key, value, timeout=None):
00238         if timeout is None:
00239             timeout = self.default_timeout
00240         if len(self._cache) > self._threshold:
00241             self._prune()
00242         item = (time() + timeout, dumps(value, HIGHEST_PROTOCOL))
00243         self._cache.setdefault(key, item)
00244 
00245     def delete(self, key):
00246         self._cache.pop(key, None)
00247 
00248 
00249 _test_memcached_key = re.compile(r'[^\x00-\x21\xff]{1,250}$').match
00250 
00251 class MemcachedCache(BaseCache):
00252     """A cache that uses memcached as backend.
00253 
00254     The first argument can either be a list or tuple of server addresses
00255     in which case Werkzeug tries to import the memcache module and connect
00256     to it, or an object that resembles the API of a :class:`memcache.Client`.
00257 
00258     Implementation notes:  This cache backend works around some limitations in
00259     memcached to simplify the interface.  For example unicode keys are encoded
00260     to utf-8 on the fly.  Methods such as :meth:`~BaseCache.get_dict` return
00261     the keys in the same format as passed.  Furthermore all get methods
00262     silently ignore key errors to not cause problems when untrusted user data
00263     is passed to the get methods which is often the case in web applications.
00264 
00265     :param servers: a list or tuple of server addresses or alternatively
00266                     a :class:`memcache.Client` or a compatible client.
00267     :param default_timeout: the default timeout that is used if no timeout is
00268                             specified on :meth:`~BaseCache.set`.
00269     :param key_prefix: a prefix that is added before all keys.  This makes it
00270                        possible to use the same memcached server for different
00271                        applications.  Keep in mind that
00272                        :meth:`~BaseCache.clear` will also clear keys with a
00273                        different prefix.
00274     """
00275 
00276     def __init__(self, servers, default_timeout=300, key_prefix=None):
00277         BaseCache.__init__(self, default_timeout)
00278         if isinstance(servers, (list, tuple)):
00279             try:
00280                 import cmemcache as memcache
00281                 is_cmemcache = True
00282             except ImportError:
00283                 try:
00284                     import memcache
00285                     is_cmemcache = False
00286                 except ImportError:
00287                     raise RuntimeError('no memcache module found')
00288 
00289             # cmemcache has a bug that debuglog is not defined for the
00290             # client.  Whenever pickle fails you get a weird AttributError.
00291             if is_cmemcache:
00292                 client = memcache.Client(map(str, servers))
00293                 try:
00294                     client.debuglog = lambda *a: None
00295                 except:
00296                     pass
00297             else:
00298                 client = memcache.Client(servers, False, HIGHEST_PROTOCOL)
00299         else:
00300             client = servers
00301 
00302         self._client = client
00303         self.key_prefix = key_prefix
00304 
00305     def get(self, key):
00306         if isinstance(key, unicode):
00307             key = key.encode('utf-8')
00308         if self.key_prefix:
00309             key = self.key_prefix + key
00310         # memcached doesn't support keys longer than that.  Because often
00311         # checks for so long keys can occour because it's tested from user
00312         # submitted data etc we fail silently for getting.
00313         if _test_memcached_key(key):
00314             return self._client.get(key)
00315 
00316     def get_dict(self, *keys):
00317         key_mapping = {}
00318         have_encoded_keys = False
00319         for idx, key in enumerate(keys):
00320             if isinstance(key, unicode):
00321                 encoded_key = key.encode('utf-8')
00322                 have_encoded_keys = True
00323             else:
00324                 encoded_key = key
00325             if self.key_prefix:
00326                 encoded_key = self.key_prefix + encoded_key
00327             if _test_memcached_key(key):
00328                 key_mapping[encoded_key] = key
00329         # the keys call here is important because otherwise cmemcache
00330         # does ugly things.  What exaclty I don't know, i think it does
00331         # Py_DECREF but quite frankly i don't care.
00332         d = rv = self._client.get_multi(key_mapping.keys())
00333         if have_encoded_keys or self.key_prefix:
00334             rv = {}
00335             for key, value in d.iteritems():
00336                 rv[key_mapping[key]] = value
00337         if len(rv) < len(keys):
00338             for key in keys:
00339                 if key not in rv:
00340                     rv[key] = None
00341         return rv
00342 
00343     def add(self, key, value, timeout=None):
00344         if timeout is None:
00345             timeout = self.default_timeout
00346         if isinstance(key, unicode):
00347             key = key.encode('utf-8')
00348         if self.key_prefix:
00349             key = self.key_prefix + key
00350         self._client.add(key, value, timeout)
00351 
00352     def set(self, key, value, timeout=None):
00353         if timeout is None:
00354             timeout = self.default_timeout
00355         if isinstance(key, unicode):
00356             key = key.encode('utf-8')
00357         if self.key_prefix:
00358             key = self.key_prefix + key
00359         self._client.set(key, value, timeout)
00360 
00361     def get_many(self, *keys):
00362         d = self.get_dict(*keys)
00363         return [d[key] for key in keys]
00364 
00365     def set_many(self, mapping, timeout=None):
00366         if timeout is None:
00367             timeout = self.default_timeout
00368         new_mapping = {}
00369         for key, value in mapping.iteritems():
00370             if isinstance(key, unicode):
00371                 key = key.encode('utf-8')
00372             if self.key_prefix:
00373                 key = self.key_prefix + key
00374             new_mapping[key] = value
00375         self._client.set_multi(new_mapping, timeout)
00376 
00377     def delete(self, key):
00378         if isinstance(key, unicode):
00379             key = key.encode('utf-8')
00380         if self.key_prefix:
00381             key = self.key_prefix + key
00382         if _test_memcached_key(key):
00383             self._client.delete(key)
00384 
00385     def delete_many(self, *keys):
00386         new_keys = []
00387         for key in keys:
00388             if isinstance(key, unicode):
00389                 key = key.encode('utf-8')
00390             if self.key_prefix:
00391                 key = self.key_prefix + key
00392             if _test_memcached_key(key):
00393                 new_keys.append(key)
00394         self._client.delete_multi(new_keys)
00395 
00396     def clear(self):
00397         self._client.flush_all()
00398 
00399     def inc(self, key, delta=1):
00400         if isinstance(key, unicode):
00401             key = key.encode('utf-8')
00402         if self.key_prefix:
00403             key = self.key_prefix + key
00404         self._client.incr(key, delta)
00405 
00406     def dec(self, key, delta=1):
00407         if isinstance(key, unicode):
00408             key = key.encode('utf-8')
00409         if self.key_prefix:
00410             key = self.key_prefix + key
00411         self._client.decr(key, delta)
00412 
00413 
00414 class GAEMemcachedCache(MemcachedCache):
00415     """Connects to the Google appengine memcached Cache.
00416 
00417     :param default_timeout: the default timeout that is used if no timeout is
00418                             specified on :meth:`~BaseCache.set`.
00419     :param key_prefix: a prefix that is added before all keys.  This makes it
00420                        possible to use the same memcached server for different
00421                        applications.  Keep in mind that
00422                        :meth:`~BaseCache.clear` will also clear keys with a
00423                        different prefix.
00424     """
00425 
00426     def __init__(self, default_timeout=300, key_prefix=None):
00427         from google.appengine.api import memcache
00428         MemcachedCache.__init__(self, memcache.Client(),
00429                                 default_timeout, key_prefix)
00430 
00431 
00432 class FileSystemCache(BaseCache):
00433     """A cache that stores the items on the file system.  This cache depends
00434     on being the only user of the `cache_dir`.  Make absolutely sure that
00435     nobody but this cache stores files there or otherwise the chace will
00436     randomely delete files therein.
00437 
00438     :param cache_dir: the directory where cached files are stored.
00439     :param threshold: the maximum number of items the cache stores before
00440                       it starts deleting some.
00441     :param default_timeout: the default timeout that is used if no timeout is
00442                             specified on :meth:`~BaseCache.set`.
00443     """
00444 
00445     def __init__(self, cache_dir, threshold=500, default_timeout=300):
00446         BaseCache.__init__(self, default_timeout)
00447         self._path = cache_dir
00448         self._threshold = threshold
00449         if not os.path.exists(self._path):
00450             os.makedirs(self._path)
00451 
00452     def _prune(self):
00453         entries = os.listdir(self._path)
00454         if len(entries) > self._threshold:
00455             now = time()
00456             for idx, key in enumerate(entries):
00457                 try:
00458                     f = file(self._get_filename(key))
00459                     if load(f) > now and idx % 3 != 0:
00460                         f.close()
00461                         continue
00462                 except:
00463                     f.close()
00464                 self.delete(key)
00465 
00466     def _get_filename(self, key):
00467         hash = md5(key).hexdigest()
00468         return os.path.join(self._path, hash)
00469 
00470     def get(self, key):
00471         filename = self._get_filename(key)
00472         try:
00473             f = file(filename, 'rb')
00474             try:
00475                 if load(f) >= time():
00476                     return load(f)
00477             finally:
00478                 f.close()
00479             os.remove(filename)
00480         except:
00481             return None
00482 
00483     def add(self, key, value, timeout=None):
00484         filename = self._get_filename(key)
00485         if not os.path.exists(filename):
00486             self.set(key, value, timeout)
00487 
00488     def set(self, key, value, timeout=None):
00489         if timeout is None:
00490             timeout = self.default_timeout
00491         filename = self._get_filename(key)
00492         self._prune()
00493         try:
00494             f = file(filename, 'wb')
00495             try:
00496                 dump(int(time() + timeout), f, 1)
00497                 dump(value, f, HIGHEST_PROTOCOL)
00498             finally:
00499                 f.close()
00500         except (IOError, OSError):
00501             pass
00502 
00503     def delete(self, key):
00504         try:
00505             os.remove(self._get_filename(key))
00506         except (IOError, OSError):
00507             pass
00508 
00509     def clear(self):
00510         for key in os.listdir(self._path):
00511             self.delete(key)