Back to index

moin  1.9.0~rc2
antispam.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     This implements a global (and a local) blacklist against wiki spammers.
00004 
00005     @copyright: 2005-2008 MoinMoin:ThomasWaldmann
00006     @license: GNU GPL, see COPYING for details
00007 """
00008 
00009 import re, time, datetime
00010 
00011 from MoinMoin import log
00012 logging = log.getLogger(__name__)
00013 
00014 from MoinMoin.support.python_compatibility import frozenset
00015 from MoinMoin.security import Permissions
00016 from MoinMoin import caching, wikiutil
00017 
00018 # Errors ---------------------------------------------------------------
00019 
00020 class Error(Exception):
00021     """Base class for antispam errors."""
00022 
00023     def __str__(self):
00024         return repr(self)
00025 
00026 class WikirpcError(Error):
00027     """ Raised when we get xmlrpclib.Fault """
00028 
00029     def __init__(self, msg, fault):
00030         """ Init with msg and xmlrpclib.Fault dict """
00031         self.msg = msg
00032         self.fault = fault
00033 
00034     def __str__(self):
00035         """ Format the using description and data from the fault """
00036         return self.msg + ": [%(faultCode)s]  %(faultString)s" % self.fault
00037 
00038 
00039 # Functions ------------------------------------------------------------
00040 
00041 def makelist(text):
00042     """ Split text into lines, strip them, skip # comments """
00043     lines = text.splitlines()
00044     result = []
00045     for line in lines:
00046         line = line.split(' # ', 1)[0] # rest of line comment
00047         line = line.strip()
00048         if line and not line.startswith('#'):
00049             result.append(line)
00050     return result
00051 
00052 
00053 def getblacklist(request, pagename, do_update):
00054     """ Get blacklist, possibly downloading new copy
00055 
00056     @param request: current request (request instance)
00057     @param pagename: bad content page name (unicode)
00058     @rtype: list
00059     @return: list of blacklisted regular expressions
00060     """
00061     from MoinMoin.PageEditor import PageEditor
00062     p = PageEditor(request, pagename, uid_override="Antispam subsystem")
00063     mymtime = wikiutil.version2timestamp(p.mtime_usecs())
00064     if do_update:
00065         tooold = time.time() - 1800
00066         failure = caching.CacheEntry(request, "antispam", "failure", scope='wiki')
00067         fail_time = failure.mtime() # only update if no failure in last hour
00068         if (mymtime < tooold) and (fail_time < tooold):
00069             logging.info("%d *BadContent too old, have to check for an update..." % tooold)
00070             import xmlrpclib
00071             import socket
00072 
00073             timeout = 15 # time out for reaching the master server via xmlrpc
00074             old_timeout = socket.getdefaulttimeout()
00075             socket.setdefaulttimeout(timeout)
00076 
00077             master_url = request.cfg.antispam_master_url
00078             master = xmlrpclib.ServerProxy(master_url)
00079             try:
00080                 # Get BadContent info
00081                 master.putClientInfo('ANTISPAM-CHECK', request.url)
00082                 response = master.getPageInfo(pagename)
00083 
00084                 # It seems that response is always a dict
00085                 if isinstance(response, dict) and 'faultCode' in response:
00086                     raise WikirpcError("failed to get BadContent information",
00087                                        response)
00088 
00089                 # Compare date against local BadContent copy
00090                 masterdate = response['lastModified']
00091 
00092                 if isinstance(masterdate, datetime.datetime):
00093                     # for python 2.5
00094                     mydate = datetime.datetime(*tuple(time.gmtime(mymtime))[0:6])
00095                 else:
00096                     # for python <= 2.4.x
00097                     mydate = xmlrpclib.DateTime(tuple(time.gmtime(mymtime)))
00098 
00099                 logging.debug("master: %s mine: %s" % (masterdate, mydate))
00100                 if mydate < masterdate:
00101                     # Get new copy and save
00102                     logging.info("Fetching page from %s..." % master_url)
00103                     master.putClientInfo('ANTISPAM-FETCH', request.url)
00104                     response = master.getPage(pagename)
00105                     if isinstance(response, dict) and 'faultCode' in response:
00106                         raise WikirpcError("failed to get BadContent data", response)
00107                     p._write_file(response)
00108                     mymtime = wikiutil.version2timestamp(p.mtime_usecs())
00109                 else:
00110                     failure.update("") # we didn't get a modified version, this avoids
00111                                        # permanent polling for every save when there
00112                                        # is no updated master page
00113 
00114             except (socket.error, xmlrpclib.ProtocolError), err:
00115                 logging.error('Timeout / socket / protocol error when accessing %s: %s' % (master_url, str(err)))
00116                 # update cache to wait before the next try
00117                 failure.update("")
00118 
00119             except (xmlrpclib.Fault, ), err:
00120                 logging.error('Fault on %s: %s' % (master_url, str(err)))
00121                 # update cache to wait before the next try
00122                 failure.update("")
00123 
00124             except Error, err:
00125                 # In case of Error, we log the error and use the local BadContent copy.
00126                 logging.error(str(err))
00127 
00128             # set back socket timeout
00129             socket.setdefaulttimeout(old_timeout)
00130 
00131     blacklist = p.get_raw_body()
00132     return mymtime, makelist(blacklist)
00133 
00134 
00135 class SecurityPolicy(Permissions):
00136     """ Extend the default security policy with antispam feature """
00137 
00138     def save(self, editor, newtext, rev, **kw):
00139         BLACKLISTPAGES = ["BadContent", "LocalBadContent"]
00140         if not editor.page_name in BLACKLISTPAGES:
00141             request = editor.request
00142 
00143             # Start timing of antispam operation
00144             request.clock.start('antispam')
00145 
00146             blacklist = []
00147             latest_mtime = 0
00148             for pn in BLACKLISTPAGES:
00149                 do_update = (pn != "LocalBadContent" and
00150                              request.cfg.interwikiname != 'MoinMaster') # MoinMaster wiki shall not fetch updates from itself
00151                 blacklist_mtime, blacklist_entries = getblacklist(request, pn, do_update)
00152                 blacklist += blacklist_entries
00153                 latest_mtime = max(latest_mtime, blacklist_mtime)
00154 
00155             if blacklist:
00156                 invalid_cache = not getattr(request.cfg.cache, "antispam_blacklist", None)
00157                 if invalid_cache or request.cfg.cache.antispam_blacklist[0] < latest_mtime:
00158                     mmblcache = []
00159                     for blacklist_re in blacklist:
00160                         try:
00161                             mmblcache.append(re.compile(blacklist_re, re.I))
00162                         except re.error, err:
00163                             logging.error("Error in regex '%s': %s. Please check the pages %s." % (
00164                                           blacklist_re,
00165                                           str(err),
00166                                           ', '.join(BLACKLISTPAGES)))
00167                     request.cfg.cache.antispam_blacklist = (latest_mtime, mmblcache)
00168 
00169                 from MoinMoin.Page import Page
00170 
00171                 oldtext = ""
00172                 if rev > 0: # rev is the revision of the old page
00173                     page = Page(request, editor.page_name, rev=rev)
00174                     oldtext = page.get_raw_body()
00175 
00176                 newset = frozenset(newtext.splitlines(1))
00177                 oldset = frozenset(oldtext.splitlines(1))
00178                 difference = newset - oldset
00179                 addedtext = kw.get('comment', u'') + u''.join(difference)
00180 
00181                 for blacklist_re in request.cfg.cache.antispam_blacklist[1]:
00182                     match = blacklist_re.search(addedtext)
00183                     if match:
00184                         # Log error and raise SaveError, PageEditor should handle this.
00185                         _ = editor.request.getText
00186                         msg = _('Sorry, can not save page because "%(content)s" is not allowed in this wiki.') % {
00187                                   'content': wikiutil.escape(match.group())
00188                               }
00189                         logging.info(msg)
00190                         raise editor.SaveError(msg)
00191             request.clock.stop('antispam')
00192 
00193         # No problem to save if my base class agree
00194         return Permissions.save(self, editor, newtext, rev, **kw)
00195