Back to index

moin  1.9.0~rc2
check_i18n.py
Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 # -*- coding: iso-8859-1 -*-
00003 """check_i18n - compare texts in the source with the language files
00004 
00005 Searches in the MoinMoin sources for calls of _() and tries to extract
00006 the parameter.  Then it checks the language modules if those parameters
00007 are in the dictionary.
00008 
00009 Usage: check_i18n.py [lang ...]
00010 
00011 Without arguments, checks all languages in i18n or the specified
00012 languages. Look into MoinMoin.i18n.__init__ for availeable language
00013 names.
00014 
00015 The script will run from the moin root directory, where the MoinMoin
00016 package lives, or from MoinMoin/i18n where this script lives.
00017 
00018 TextFinder class based on code by Seo Sanghyeon and the python compiler
00019 package.
00020 
00021 TODO: fix it for the changed i18n stuff of moin 1.6
00022 
00023 @copyright: 2003 Florian Festi, Nir Soffer, Thomas Waldmann
00024 @license: GNU GPL, see COPYING for details.
00025 """
00026 
00027 output_encoding = 'utf-8'
00028 
00029 # These lead to crashes (MemoryError - due to missing codecs?)
00030 #blacklist_files = ["ja.py", "zh.py", "zh_tw.py"]
00031 #blacklist_langs = ["ja", "zh", "zh-tw"]
00032 
00033 # If you have cjkcodecs installed, use this:
00034 blacklist_files = []
00035 blacklist_langs = []
00036 
00037 import sys, os, compiler
00038 from compiler.ast import Name, Const, CallFunc, Getattr
00039 
00040 class TextFinder:
00041     """ Walk through AST tree and collect text from gettext calls
00042 
00043     Find all calls to gettext function in the source tree and collect
00044     the texts in a dict. Use compiler to create an abstract syntax tree
00045     from each source file, then find the nodes for gettext function
00046     call, and get the text from the call.
00047 
00048     Localized texts are used usually translated during runtime by
00049     gettext functions and apear in the source as
00050     _('text...'). TextFinder class finds calls to the '_' function in
00051     any namespace, or your prefered gettext function.
00052 
00053     Note that TextFinder will only retrieve text from function calls
00054     with a constant argument like _('text'). Calls like _('text' % locals()),
00055     _('text 1' + 'text 2') are marked as bad call in the report, and the
00056     text is not retrieved into the dictionary.
00057 
00058     Note also that texts in source can appear several times in the same
00059     file or different files, but they will only apear once in the
00060     dictionary that this tool creates.
00061 
00062     The dictionary value for each text is a dictionary of filenames each
00063     containing a list of (best guess) lines numbers containning the text.
00064     """
00065 
00066     def __init__(self, name='_'):
00067         """ Init with the gettext function name or '_'"""
00068         self._name = name       # getText function name
00069         self._dictionary = {}   # Unique texts in the found texts
00070         self._found = 0         # All good calls including duplicates
00071         self._bad = 0           # Bad calls: _('%s' % var) or _('a' + 'b')
00072 
00073     def setFilename(self, filename):
00074         """Remember the filename we are parsing"""
00075         self._filename = filename
00076 
00077     def visitModule(self, node):
00078         """ Start the search from the top node of a module
00079 
00080         This is the entry point into the search. When compiler.walk is
00081         called it calls this method with the module node.
00082 
00083         This is the place to initialize module specific data.
00084         """
00085         self._visited = {}  # init node cache - we will visit each node once
00086         self._lineno = 'NA' # init line number
00087 
00088         # Start walking in the module node
00089         self.walk(node)
00090 
00091     def walk(self, node):
00092         """ Walk through all nodes """
00093         if node in self._visited:
00094             # We visited this node already
00095             return
00096 
00097         self._visited[node] = 1
00098         if not self.parseNode(node):
00099             for child in node.getChildNodes():
00100                 self.walk(child)
00101 
00102     def parseNode(self, node):
00103         """ Parse function call nodes and collect text """
00104 
00105         # Get the current line number. Since not all nodes have a line number
00106         # we save the last line number - it should be close to the gettext call
00107         if node.lineno is not None:
00108             self._lineno = node.lineno
00109 
00110         if node.__class__ == CallFunc and node.args:
00111             child = node.node
00112             klass = child.__class__
00113             if (# Standard call _('text')
00114                 (klass == Name and child.name == self._name) or
00115                 # A call to an object attribute: object._('text')
00116                 (klass == Getattr and child.attrname == self._name)):
00117                 if node.args[0].__class__ == Const:
00118                     # Good call with a constant _('text')
00119                     self.addText(node.args[0].value)
00120                 else:
00121                     self.addBadCall(node)
00122                 return 1
00123         return 0
00124 
00125     def addText(self, text):
00126         """ Add text to dictionary and count found texts.
00127 
00128         Note that number of texts in dictionary could be different from
00129         the number of texts found, because some texts appear several
00130         times in the code.
00131 
00132         Each text value is a dictionary of filenames that contain the
00133         text and each filename value is the list of line numbers with
00134         the text. Missing line numbers are recorded as 'NA'.
00135 
00136         self._lineno is the last line number we checked. It may be the line
00137         number of the text, or near it.
00138         """
00139 
00140         self._found = self._found + 1
00141 
00142         # Create key for this text if needed
00143         if text not in self._dictionary:
00144             self._dictionary[text] = {}
00145 
00146         # Create key for this filename if needed
00147         textInfo = self._dictionary[text]
00148         if self._filename not in textInfo:
00149             textInfo[self._filename] = [self._lineno]
00150         else:
00151             textInfo[self._filename].append(self._lineno)
00152 
00153     def addBadCall(self, node):
00154         """Called when a bad call like _('a' + 'b') is found"""
00155         self._bad = self._bad + 1
00156         print
00157         print "<!> Warning: non-constant _ call:"
00158         print " `%s`" % str(node)
00159         print " `%s`:%s" % (self._filename, self._lineno)
00160 
00161     # Accessors
00162 
00163     def dictionary(self):
00164         return self._dictionary
00165 
00166     def bad(self):
00167         return self._bad
00168 
00169     def found(self):
00170         return self._found
00171 
00172 
00173 def visit(path, visitor):
00174     visitor.setFilename(path)
00175     tree = compiler.parseFile(path)
00176     compiler.walk(tree, visitor)
00177 
00178 
00179 # MoinMoin specific stuff follows
00180 
00181 
00182 class Report:
00183     """Language status report"""
00184     def __init__(self, lang, sourceDict):
00185         self.__lang = lang
00186         self.__sourceDict = sourceDict
00187         self.__langDict = None
00188         self.__missing = {}
00189         self.__unused = {}
00190         self.__error = None
00191         self.__ready = 0
00192         self.create()
00193 
00194     def loadLanguage(self):
00195         filename = i18n.filename(self.__lang)
00196         self.__langDict = pysupport.importName("MoinMoin.i18n." + filename, "text")
00197 
00198     def create(self):
00199         """Compare language text dict against source dict"""
00200         self.loadLanguage()
00201         if not self.__langDict:
00202             self.__error = "Language %s not found!" % self.__lang
00203             self.__ready = 1
00204             return
00205 
00206         # Collect missing texts
00207         for text in self.__sourceDict:
00208             if text not in self.__langDict:
00209                 self.__missing[text] = self.__sourceDict[text]
00210 
00211         # Collect unused texts
00212         for text in self.__langDict:
00213             if text not in self.__sourceDict:
00214                 self.__unused[text] = self.__langDict[text]
00215         self.__ready = 1
00216 
00217     def summary(self):
00218         """Return summary dict"""
00219         summary = {
00220             'name': i18n.languages[self.__lang][i18n.ENAME].encode(output_encoding),
00221             'maintainer': i18n.languages[self.__lang][i18n.MAINTAINER],
00222             'total': len(self.__langDict),
00223             'missing': len(self.__missing),
00224             'unused': len(self.__unused),
00225             'error': self.__error
00226             }
00227         return summary
00228 
00229     def missing(self):
00230         return self.__missing
00231 
00232     def unused(self):
00233         return self.__unused
00234 
00235 
00236 if __name__ == '__main__':
00237     import time
00238 
00239     # Check that we run from the root directory where MoinMoin package lives
00240     # or from the i18n directory when this script lives
00241     if os.path.exists('MoinMoin/__init__.py'):
00242         # Running from the root directory
00243         MoinMoin_dir = os.curdir
00244     elif os.path.exists(os.path.join(os.pardir, 'i18n')):
00245         # Runing from i18n
00246         MoinMoin_dir = os.path.join(os.pardir, os.pardir)
00247     else:
00248         print __doc__
00249         sys.exit(1)
00250 
00251     # Insert MoinMoin_dir into sys.path
00252     sys.path.insert(0, MoinMoin_dir)
00253     from MoinMoin import i18n
00254     from MoinMoin.util import pysupport
00255 
00256     textFinder = TextFinder()
00257     found = 0
00258     unique = 0
00259     bad = 0
00260 
00261     # Find gettext calls in the source
00262     for root, dirs, files in os.walk(os.path.join(MoinMoin_dir, 'MoinMoin')):
00263         for name in files:
00264             if name.endswith('.py'):
00265                 if name in blacklist_files: continue
00266                 path = os.path.join(root, name)
00267                 #print '%(path)s:' % locals(),
00268                 visit(path, textFinder)
00269 
00270                 # Report each file's results
00271                 new_unique = len(textFinder.dictionary()) - unique
00272                 new_found = textFinder.found() - found
00273                 #print '%(new_unique)d (of %(new_found)d)' % locals()
00274 
00275                 # Warn about bad calls - these should be fixed!
00276                 new_bad = textFinder.bad() - bad
00277                 #if new_bad:
00278                 #    print '### Warning: %(new_bad)d bad call(s)' % locals()
00279 
00280                 unique = unique + new_unique
00281                 bad = bad + new_bad
00282                 found = found + new_found
00283 
00284     # Print report using wiki markup, so we can publish this on MoinDev
00285     # !!! Todo:
00286     #     save executive summary for the wiki
00287     #     save separate report for each language to be sent to the
00288     #     language translator.
00289     #     Update the wiki using XML-RPC??
00290 
00291     print "This page is generated by `MoinMoin/i18n/check_i18n.py`."
00292     print "To recreate this report run `make check-i18n` and paste here"
00293     print
00294     print '----'
00295     print
00296     print '<<TableOfContents(2)>>'
00297     print
00298     print
00299     print "= Translation Report ="
00300     print
00301     print "== Summary =="
00302     print
00303     print 'Created on %s' % time.asctime()
00304     print
00305 
00306     print ('\n%(unique)d unique texts in dictionary of %(found)d texts '
00307            'in source.') % locals()
00308     if bad:
00309         print '\n%(bad)d bad calls.' % locals()
00310     print
00311 
00312     # Check languages from the command line or from moin.i18n against
00313     # the source
00314     if sys.argv[1:]:
00315         languages = sys.argv[1:]
00316     else:
00317         languages = i18n.languages.keys()
00318         for lang in blacklist_langs:
00319             # problems, maybe due to encoding?
00320             if lang in languages:
00321                 languages.remove(lang)
00322     if 'en' in languages:
00323         languages.remove('en') # there is no en lang file
00324     languages.sort()
00325 
00326     # Create report for all languages
00327     report = {}
00328     for lang in languages:
00329         report[lang] = Report(lang, textFinder.dictionary())
00330 
00331     # Print summary for all languages
00332     print ("||<:>'''Language'''||<:>'''Texts'''||<:>'''Missing'''"
00333            "||<:>'''Unused'''||")
00334     for lang in languages:
00335         print ("||%(name)s||<)>%(total)s||<)>%(missing)s||<)>%(unused)s||"
00336                ) % report[lang].summary()
00337 
00338     # Print details
00339     for lang in languages:
00340         dict = report[lang].summary()
00341         print
00342         print "== %(name)s ==" % dict
00343         print
00344         print "Maintainer: <<MailTo(%(maintainer)s)>>" % dict
00345 
00346         # Print missing texts, if any
00347         if report[lang].missing():
00348             print """
00349 === Missing texts ===
00350 
00351 These items should ''definitely'' get fixed.
00352 
00353 Maybe the corresponding english text in the source code was only changed
00354 slightly, then you want to look for a similar text in the ''unused''
00355 section below and modify i18n, so that it will match again.
00356 """
00357             for text in report[lang].missing():
00358                 print " 1. `%r`" % text
00359 
00360         # Print unused texts, if any
00361         if report[lang].unused():
00362             print """
00363 === Possibly unused texts ===
00364 
00365 Be ''very careful'' and double-check before removing any of these
00366 potentially unused items.
00367 
00368 This program can't detect references done from wiki pages, from
00369 userprefs options, from Icon titles etc.!
00370 """
00371             for text in report[lang].unused():
00372                 print " 1. `%r`" % text
00373 
00374