Back to index

plone3  3.1.7
spellcheck.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 
00003 """SpellChecker for Kupu"""
00004 
00005 COMMAND = 'aspell -a'
00006 
00007 import popen2, re
00008 
00009 try:
00010     from Globals import ClassSecurityInfo
00011 except ImportError:
00012     pass
00013 else:
00014     # hmmm... Zope 2...
00015     __allow_access_to_unprotected_subobjects__ = 1
00016 
00017 class SpellChecker:
00018     """Simple spell checker, uses ispell (or aspell) with pipes"""
00019 
00020     __allow_access_to_unprotected_subobjects__ = 1
00021 
00022     reg_unknown = re.compile('^& (.*?) \d* \d*: (.*)$', re.U)
00023     reg_unknown_no_replacement = re.compile('^\# (.*?) \d*.*$', re.U)
00024 
00025     def __init__(self):
00026         self.chout, self.chin = popen2.popen2(COMMAND)
00027         # throw away intro
00028         self.read_line()
00029 
00030     def __del__(self):
00031         self.chout.close()
00032         self.chin.close()
00033 
00034     def check(self, text):
00035         """checks a line of text
00036         
00037             returns None if spelling was okay, and an HTML string with words 
00038             that weren't recognized marked (with a span class="wrong_spelling")
00039         """
00040         result = {}
00041         for line in text.split('\n'):
00042             line = line.strip()
00043             if line:
00044                 self.write_line(line)
00045             while 1:
00046                 resline = self.read_line()
00047                 if not resline.strip():
00048                     break
00049                 if resline.strip() != '*':
00050                     match = self.reg_unknown.match(resline)
00051                     have_replacement = True
00052                     if not match:
00053                         match = self.reg_unknown_no_replacement.match(resline)
00054                         have_replacement = False
00055                     assert match, 'Unknown formatted line: %s' % resline
00056                     word = match.group(1)
00057                     if result.has_key(word):
00058                         continue
00059                     replacements = []
00060                     if have_replacement:
00061                         replacements = match.group(2).split(', ')
00062                     result[word] = replacements
00063         return result
00064 
00065     def read_line(self):
00066         buf = []
00067         while 1:
00068             char = self.read_char()
00069             if not char:
00070                 return ''
00071             if char == '\n':
00072                 return ''.join(buf)
00073             buf.append(char)
00074 
00075     def write_line(self, line):
00076         try:
00077             self.chin.write('%s\n' % line)
00078             self.chin.flush()
00079             return
00080         except IOError:
00081             self.reconnect()
00082             self.chin.write('%s\n' % line)
00083             self.chin.flush()
00084             return
00085         raise
00086 
00087     def read_char(self):
00088         try:
00089             return self.chout.read(1)
00090         except IOError:
00091             self.reconnect()
00092             return self.chout.read(1)
00093         raise
00094 
00095     def reconnect(self):
00096         try:
00097             self.chout.close()
00098         except IOError:
00099             pass
00100         try:
00101             self.chin.close()
00102         except IOError:
00103             pass
00104         self.chout, self.chin = popen2.popen2(COMMAND)
00105 
00106 def format_result(result):
00107     """convert the result dict to XML"""
00108     buf = ['<?xml version="1.0" encoding="UTF-8" ?>\n<spellcheck_result>']
00109     for key, value in result.items():
00110         buf.append('<incorrect><word>')
00111         buf.append(key)
00112         buf.append('</word><replacements>')
00113         buf.append(' '.join(value))
00114         buf.append('</replacements></incorrect>')
00115     buf.append('</spellcheck_result>')
00116     return ''.join(buf)
00117 
00118 if __name__ == '__main__':
00119     c = SpellChecker()
00120     while 1:
00121         line = raw_input('Enter text to check: ')
00122         if line == 'q':
00123             break
00124         ret = c.check(line)
00125         if ret is None:
00126             print 'okay'
00127         else:
00128             print ret