Back to index

moin  1.9.0~rc2
SpellCheck.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - Spelling Action
00004 
00005     Word adding based on code by Christian Bird <chris.bird@lineo.com>
00006 
00007     This action checks for spelling errors in a page using one or several
00008     word lists.
00009 
00010     MoinMoin looks for dictionary files in the directory "dict" within the
00011     MoinMoin package directory. To load the default UNIX word files, you
00012     have to manually create symbolic links to those files (usually
00013     '/usr/dict/words' or '/usr/share/dict/words').
00014 
00015     Additionally, all words on the page "LocalSpellingWords" are added to
00016     the list of valid words, if that page exists.
00017 
00018     @copyright: 2001 Richard Jones <richard@bizarsoftware.com.au>,
00019                 2001-2004 Juergen Hermann <jh@web.de>
00020     @license: GNU GPL, see COPYING for details.
00021 """
00022 
00023 import os, re, codecs
00024 from MoinMoin import config, wikiutil
00025 from MoinMoin.Page import Page
00026 
00027 
00028 def _getWordsFiles(request):
00029     """Check a list of possible word files"""
00030     candidates = []
00031 
00032     # load a list of possible word files
00033     for basedir in (request.cfg.moinmoin_dir, request.cfg.data_dir):
00034         localdict = os.path.join(basedir, 'dict')
00035         if os.path.isdir(localdict):
00036             candidates.extend(
00037                 [os.path.join(localdict, fn) for fn in os.listdir(localdict)])
00038 
00039     # validate candidate list (leave out directories!)
00040     wordsfiles = []
00041     for f in candidates:
00042         if os.path.isfile(f) and os.access(f, os.F_OK | os.R_OK):
00043             wordsfiles.append(f)
00044 
00045     # return validated file list
00046     return wordsfiles
00047 
00048 def _loadWords(lines, dict):
00049     for line in lines:
00050         words = line.split()
00051         for word in words:
00052             dict[word.encode(config.charset)] = ''
00053 
00054 def _loadWordsFile(request, dict, filename):
00055     request.clock.start('spellread')
00056     try:
00057         try:
00058             f = codecs.open(filename, 'rt', config.charset)
00059             lines = f.readlines()
00060         except UnicodeError:
00061             f = codecs.open(filename, 'rt', 'iso-8859-1')
00062             lines = f.readlines()
00063     finally:
00064         f.close()
00065     _loadWords(lines, dict)
00066     request.clock.stop('spellread')
00067 
00068 def _loadWordsPage(request, dict, page):
00069     lines = page.getlines()
00070     _loadWords(lines, dict)
00071 
00072 
00073 def _loadDict(request):
00074     """ Load words from words files or cached dict """
00075     # check for "dbhash" module
00076     try:
00077         import dbhash
00078     except ImportError:
00079         dbhash = None
00080 
00081     # load the words
00082     cachename = os.path.join(request.cfg.data_dir, 'cache', 'spellchecker.dict')
00083     if dbhash and os.path.exists(cachename):
00084         wordsdict = dbhash.open(cachename, "r")
00085     else:
00086         wordsfiles = _getWordsFiles(request)
00087         if dbhash:
00088             wordsdict = dbhash.open(cachename, 'n')
00089         else:
00090             wordsdict = {}
00091 
00092         for wordsfile in wordsfiles:
00093             _loadWordsFile(request, wordsdict, wordsfile)
00094 
00095         if dbhash:
00096             wordsdict.sync()
00097 
00098     return wordsdict
00099 
00100 
00101 def _addLocalWords(request):
00102     from MoinMoin.PageEditor import PageEditor
00103     # get the new words as a string (if any are marked at all)
00104     try:
00105         newwords = request.form.getlist('newwords')
00106     except KeyError:
00107         # no new words checked
00108         return
00109     newwords = u' '.join(newwords)
00110 
00111     # get the page contents
00112     lsw_page = PageEditor(request, request.cfg.page_local_spelling_words)
00113     words = lsw_page.get_raw_body()
00114 
00115     # add the words to the page and save it
00116     if words and words[-1] != '\n':
00117         words = words + '\n'
00118     lsw_page.saveText(words + '\n' + newwords, 0)
00119 
00120 
00121 def checkSpelling(page, request, own_form=1):
00122     """ Do spell checking, return a tuple with the result.
00123     """
00124     _ = request.getText
00125 
00126     # first check to see if we we're called with a "newwords" parameter
00127     if 'button_newwords' in request.form:
00128         _addLocalWords(request)
00129 
00130     # load words
00131     wordsdict = _loadDict(request)
00132 
00133     localwords = {}
00134     lsw_page = Page(request, request.cfg.page_local_spelling_words)
00135     if lsw_page.exists():
00136         _loadWordsPage(request, localwords, lsw_page)
00137 
00138     # init status vars & load page
00139     request.clock.start('spellcheck')
00140     badwords = {}
00141     text = page.get_raw_body()
00142 
00143     # checker regex and matching substitute function
00144     word_re = re.compile(r'([%s]?[%s]+)' % (
00145         config.chars_upper, config.chars_lower), re.UNICODE)
00146 
00147     def checkword(match, wordsdict=wordsdict, badwords=badwords,
00148             localwords=localwords, num_re=re.compile(r'^\d+$', re.UNICODE)):
00149         word = match.group(1)
00150         if len(word) == 1:
00151             return ""
00152         w_enc = word.encode(config.charset)
00153         wl_enc = word.lower().encode(config.charset)
00154         if not (w_enc in wordsdict or wl_enc in wordsdict or
00155                 w_enc in localwords or wl_enc in localwords):
00156             if not num_re.match(word):
00157                 badwords[word] = 1
00158         return ""
00159 
00160     # do the checking
00161     for line in text.split('\n'):
00162         if line == '' or line[0] == '#':
00163             continue
00164         word_re.sub(checkword, line)
00165 
00166     if badwords:
00167         badwords = badwords.keys()
00168         badwords.sort(lambda x, y: cmp(x.lower(), y.lower()))
00169 
00170         # build regex recognizing the bad words
00171         badwords_re = r'(^|(?<!\w))(%s)(?!\w)'
00172         badwords_re = badwords_re % ("|".join([re.escape(bw) for bw in badwords]), )
00173         badwords_re = re.compile(badwords_re, re.UNICODE)
00174 
00175         lsw_msg = ''
00176         if localwords:
00177             lsw_msg = ' ' + _('(including %(localwords)d %(pagelink)s)') % {
00178                 'localwords': len(localwords), 'pagelink': lsw_page.link_to(request)}
00179         msg = _('The following %(badwords)d words could not be found in the dictionary of '
00180                 '%(totalwords)d words%(localwords)s and are highlighted below:') % {
00181             'badwords': len(badwords),
00182             'totalwords': len(wordsdict)+len(localwords),
00183             'localwords': lsw_msg} + "<br>"
00184 
00185         # figure out what this action is called
00186         action_name = os.path.splitext(os.path.basename(__file__))[0]
00187 
00188         # add a form containing the bad words
00189         if own_form:
00190             msg = msg + ('<form method="post" action="%s">\n'
00191                          '<input type="hidden" name="action" value="%s">\n') % (request.href(page.page_name), action_name)
00192 
00193         checkbox = '<input type="checkbox" name="newwords" value="%(word)s">%(word)s&nbsp;&nbsp;'
00194         msg = msg + (
00195             " ".join([checkbox % {'word': wikiutil.escape(w, True), } for w in badwords]) +
00196             '<p><input type="submit" name="button_newwords" value="%s"></p>' %
00197                 _('Add checked words to dictionary')
00198         )
00199         if own_form:
00200             msg = msg + '</form>'
00201     else:
00202         badwords_re = None
00203         msg = _("No spelling errors found!")
00204 
00205     request.clock.stop('spellcheck')
00206 
00207     return badwords, badwords_re, msg
00208 
00209 
00210 def execute(pagename, request):
00211     _ = request.getText
00212 
00213     page = Page(request, pagename)
00214     if not request.user.may.write(request.cfg.page_local_spelling_words):
00215         request.theme.add_msg(_("You can't save spelling words."), "error")
00216         page.send_page()
00217         return
00218 
00219     if request.user.may.read(pagename):
00220         badwords, badwords_re, msg = checkSpelling(page, request)
00221     else:
00222         badwords = []
00223         request.theme.add_msg(_("You can't check spelling on a page you can't read."), "error")
00224 
00225     request.theme.add_msg(msg, "dialog")
00226     if badwords:
00227         page.send_page(hilite_re=badwords_re)
00228     else:
00229         page.send_page()
00230