Back to index

moin  1.9.0~rc2
__init__.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - internationalization (aka i18n)
00004 
00005     We use Python's gettext module for loading <language>.<domain>.mo files.
00006     Domain is "MoinMoin" for MoinMoin distribution code and something else for
00007     extension translations.
00008 
00009     Public attributes:
00010         languages -- dict of languages that MoinMoin knows metadata about
00011 
00012     Public functions:
00013         requestLanguage(request, usecache=1) -- return the request language
00014         wikiLanguages() -- return the available wiki user languages
00015         browserLanguages() -- return the browser accepted languages
00016         getDirection(lang) -- return the lang direction either 'ltr' or 'rtl'
00017         getText(str, request, lang,  **kw) -- return str translation into lang
00018 
00019     TODO: as soon as we have some "farm / server plugin dir", extend this to
00020           load translations from there, too.
00021 
00022     @copyright: 2001-2004 Juergen Hermann <jh@web.de>,
00023                 2005-2008 MoinMoin:ThomasWaldmann
00024     @license: GNU GPL, see COPYING for details.
00025 """
00026 
00027 import os, gettext, glob
00028 from StringIO import StringIO
00029 
00030 from MoinMoin import log
00031 logging = log.getLogger(__name__)
00032 
00033 from MoinMoin import caching
00034 from MoinMoin.i18n import strings
00035 
00036 # This is a global for a reason: in persistent environments all languages in
00037 # use will be cached; Note: you have to restart if you update language data.
00038 
00039 # key: language, value: language metadata
00040 # this gets loaded early and completely:
00041 languages = None
00042 
00043 # system_pages has a dictionary containing all english
00044 # system page names and also all translated system pages names as keys,
00045 # see also wikiutil.isSystemPage:
00046 system_pages = {}
00047 
00048 translations = {}
00049 
00050 def po_filename(request, language, domain, i18n_dir='i18n'):
00051     """ we use MoinMoin/i18n/<language>[.<domain>].mo as filename for the PO file.
00052 
00053         TODO: later, when we have a farm scope plugin dir, we can also load
00054               language data from there.
00055     """
00056     return os.path.join(request.cfg.moinmoin_dir, i18n_dir, "%s.%s.po" % (language, domain))
00057 
00058 def i18n_init(request):
00059     """ this is called early from request initialization and makes sure we
00060         have metadata (like what languages are available, direction of language)
00061         loaded into the global "languages".
00062         The very first time, this will be slow as it will load all languages,
00063         but next time it will be fast due to caching.
00064     """
00065     global languages
00066     request.clock.start('i18n_init')
00067     if languages is None:
00068         logging.debug("trying to load translations from cache")
00069         # the scope of the i18n cache needs to be per-wiki, because some translations
00070         # have http links (to some help pages) and they must not point to another
00071         # wiki in the farm (confusing and maybe not even readable due to ACLs):
00072         meta_cache = caching.CacheEntry(request, 'i18n', 'meta', scope='wiki', use_pickle=True)
00073         i18n_dir = os.path.join(request.cfg.moinmoin_dir, 'i18n')
00074         if meta_cache.needsUpdate(i18n_dir):
00075             logging.debug("cache needs update")
00076             _languages = {}
00077             _system_pages = {}
00078             for pagename in strings.all_pages:
00079                 _system_pages[pagename] = ('en', pagename)
00080             for lang_file in glob.glob(po_filename(request, language='*', domain='MoinMoin')): # XXX only MoinMoin domain for now
00081                 language, domain, ext = os.path.basename(lang_file).split('.')
00082                 t = Translation(language, domain)
00083                 f = file(lang_file)
00084                 t.load_po(f)
00085                 f.close()
00086                 logging.debug("loading translation %r" % language)
00087                 encoding = 'utf-8'
00088                 _languages[language] = {}
00089                 for key, value in t.info.items():
00090                     #logging.debug("meta key %s value %r" % (key, value))
00091                     _languages[language][key] = value.decode(encoding)
00092                 for pagename in strings.all_pages:
00093                     try:
00094                         pagename_translated = t.translation._catalog[pagename]
00095                     except KeyError:
00096                         pass
00097                     else:
00098                         _system_pages[pagename_translated] = (language, pagename)
00099             logging.debug("dumping language metadata to disk cache")
00100             try:
00101                 meta_cache.update({
00102                     'languages': _languages,
00103                     'system_pages': _system_pages,
00104                 })
00105             except caching.CacheError:
00106                 pass
00107 
00108         if languages is None: # another thread maybe has done it before us
00109             try:
00110                 logging.debug("loading language metadata from disk cache")
00111                 d = meta_cache.content()
00112                 if languages is None:
00113                     globals().update(d)
00114             except caching.CacheError:
00115                 pass
00116     request.clock.stop('i18n_init')
00117 
00118 def bot_translations(request):
00119     """Return translations to be used by notification bot
00120 
00121     This is called by XML RPC code.
00122 
00123     @return: a dict (indexed by language) of dicts of translated strings (indexed by original ones)
00124     """
00125     translations = {}
00126     po_dir = os.path.join('i18n', 'jabberbot')
00127     encoding = 'utf-8'
00128 
00129     for lang_file in glob.glob(po_filename(request, i18n_dir=po_dir, language='*', domain='JabberBot')):
00130         language, domain, ext = os.path.basename(lang_file).split('.')
00131         t = Translation(language, domain)
00132         f = file(lang_file)
00133         t.load_po(f)
00134         f.close()
00135         t.loadLanguage(request, trans_dir=po_dir)
00136         translations[language] = {}
00137 
00138         for key, text in t.raw.items():
00139             translations[language][key] = text
00140 
00141     return translations
00142 
00143 class Translation(object):
00144     """ This class represents a translation. Usually this is a translation
00145         from English original texts to a single language, like e.g. "de" (german).
00146 
00147         The domain value defaults to 'MoinMoin' and this is reserved for
00148         translation of the MoinMoin distribution. If you do a translation for
00149         a third-party plugin, you have to use a different and unique value.
00150     """
00151     def __init__(self, language, domain='MoinMoin'):
00152         self.language = language
00153         self.domain = domain
00154 
00155     def load_po(self, f):
00156         """ load the po file """
00157         from MoinMoin.i18n.msgfmt import MsgFmt
00158         mf = MsgFmt()
00159         mf.read_po(f.readlines())
00160         mo_data = mf.generate_mo()
00161         f = StringIO(mo_data)
00162         self.load_mo(f)
00163         f.close()
00164 
00165     def load_mo(self, f):
00166         """ load the mo file, setup some attributes from metadata """
00167         # binary files have to be opened in the binary file mode!
00168         self.translation = gettext.GNUTranslations(f)
00169         self.info = info = self.translation.info()
00170         try:
00171             self.name = info['x-language']
00172             self.ename = info['x-language-in-english']
00173             self.direction = info['x-direction']
00174             self.maintainer = info['last-translator']
00175         except KeyError, err:
00176             logging.warning("metadata problem in %r: %s" % (self.language, str(err)))
00177         try:
00178             assert self.direction in ('ltr', 'rtl', )
00179         except (AttributeError, AssertionError), err:
00180             logging.warning("direction problem in %r: %s" % (self.language, str(err)))
00181 
00182     def formatMarkup(self, request, text, percent):
00183         """ Formats the text using the wiki parser/formatter.
00184 
00185         This raises an exception if a text needs itself to be translated,
00186         this could possibly happen with macros.
00187 
00188         @param request: the request object
00189         @param text: the text to format
00190         @param percent: True if result is used as left-side of a % operator and
00191                         thus any GENERATED % needs to be escaped as %%.
00192         """
00193         logging.debug("formatting: %r" % text)
00194 
00195         from MoinMoin.Page import Page
00196         from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
00197         if percent:
00198             from MoinMoin.formatter.text_html_percent import Formatter
00199         else:
00200             from MoinMoin.formatter.text_html import Formatter
00201 
00202         out = StringIO()
00203         request.redirect(out)
00204         parser = WikiParser(text, request, line_anchors=False)
00205         formatter = Formatter(request, terse=True)
00206         reqformatter = None
00207         if hasattr(request, 'formatter'):
00208             reqformatter = request.formatter
00209         request.formatter = formatter
00210         p = Page(request, "$$$$i18n$$$$")
00211         formatter.setPage(p)
00212         parser.format(formatter)
00213         text = out.getvalue()
00214         if reqformatter is None:
00215             del request.formatter
00216         else:
00217             request.formatter = reqformatter
00218         request.redirect()
00219         text = text.strip()
00220         return text
00221 
00222     def loadLanguage(self, request, trans_dir="i18n"):
00223         request.clock.start('loadLanguage')
00224         # see comment about per-wiki scope above
00225         cache = caching.CacheEntry(request, arena='i18n', key=self.language, scope='wiki', use_pickle=True)
00226         langfilename = po_filename(request, self.language, self.domain, i18n_dir=trans_dir)
00227         needsupdate = cache.needsUpdate(langfilename)
00228         if not needsupdate:
00229             try:
00230                 unformatted = cache.content()
00231                 logging.debug("pickle %s load success" % self.language)
00232             except caching.CacheError:
00233                 logging.debug("pickle %s load failed" % self.language)
00234                 needsupdate = 1
00235 
00236         if needsupdate:
00237             logging.debug("langfilename %s needs update" % langfilename)
00238             f = file(langfilename)
00239             self.load_po(f)
00240             f.close()
00241             trans = self.translation
00242             unformatted = trans._catalog
00243             self.has_wikimarkup = self.info.get('x-haswikimarkup', 'False') == 'True'
00244             logging.debug("dumping lang %s" % self.language)
00245             try:
00246                 cache.update(unformatted)
00247             except caching.CacheError:
00248                 pass
00249 
00250         self.formatted = {}
00251         self.raw = unformatted
00252         request.clock.stop('loadLanguage')
00253 
00254 
00255 def getDirection(lang):
00256     """ Return the text direction for a language, either 'ltr' or 'rtl'. """
00257     return languages[lang]['x-direction']
00258 
00259 def getText(original, request, lang, **kw):
00260     """ Return a translation of some original text.
00261 
00262     @param original: the original (english) text
00263     @param request: the request object
00264     @lang: the target language for the translation
00265     @keyword wiki: True to use the wiki parser/formatter on the translation result,
00266                    False to return the translation result "as is"
00267     @keyword percent: True if we need special escaping because we use the translation
00268                       result as the left side of a % operator: e.g. % chars need to
00269                       become %% for that usage. This will only escape generated % chars,
00270                       e.g. in wiki links to non-ascii pagenames (%XX%XX%XX).
00271                       False, if we don't use it as a left-side of % operator.
00272                       Only specify this option for wiki==True, it doesn't do
00273                       anything for wiki==False.
00274     """
00275     formatted = kw.get('wiki', False) # 1.6 and early 1.7 (until 2/2008) used 'formatted' with True as default!
00276     percent = kw.get('percent', False)
00277     if original == u"":
00278         return u"" # we don't want to get *.po files metadata!
00279 
00280     global translations
00281     if not lang in translations: # load translation if needed
00282         t = Translation(lang)
00283         t.loadLanguage(request)
00284         translations[lang] = t
00285 
00286     # get the matching entry in the mapping table
00287     translated = original
00288     translation = translations[lang]
00289     if original in translation.raw:
00290         translated = translation.raw[original]
00291         if formatted:
00292             key = (original, percent)
00293             if key in translation.formatted:
00294                 translated = translation.formatted[key]
00295                 if translated is None:
00296                     logging.error("formatting a %r text that is already being formatted: %r" % (lang, original))
00297                     translated = original + u'*' # get some error indication to the UI
00298             else:
00299                 translation.formatted[key] = None # we use this as "formatting in progress" indicator
00300                 translated = translation.formatMarkup(request, translated, percent)
00301                 translation.formatted[key] = translated # remember it
00302     else:
00303         try:
00304             if languages is None:
00305                 # languages not initialized yet
00306                 raise KeyError
00307             language = languages[lang]['x-language-in-english']
00308             dictpagename = "%sDict" % language.replace(' ', '')
00309             dicts = request.dicts
00310             if dictpagename in dicts:
00311                 userdict = dicts[dictpagename]
00312                 translated = userdict[original]
00313             else:
00314                 raise KeyError
00315         except KeyError:
00316             # do not simply return trans with str, but recursively call
00317             # to get english translation, maybe formatted.
00318             # if we don't find an english "translation", we just format it
00319             # on the fly (this is needed for quickhelp).
00320             if lang != 'en':
00321                 logging.debug("falling back to english, requested string not in %r translation: %r" % (lang, original))
00322                 translated = getText(original, request, 'en', wiki=formatted, percent=percent)
00323             elif formatted: # and lang == 'en'
00324                 logging.debug("formatting for %r on the fly: %r" % (lang, original))
00325                 translated = translations[lang].formatMarkup(request, original, percent)
00326     return translated
00327 
00328 
00329 def requestLanguage(request, try_user=True):
00330     """
00331     Return the user interface language for this request.
00332 
00333     The user interface language is taken from the user preferences for
00334     registered users, or request environment, or the default language of
00335     the wiki, or English.
00336 
00337     This should be called once per request, then you should get the value from
00338     request object lang attribute.
00339 
00340     Unclear what this means: "Until the code for get
00341     text is fixed, we are caching the request language locally."
00342 
00343     @param request: the request object
00344     @param try_user: try getting language from request.user
00345     @keyword usecache: whether to get the value form the local cache or
00346                        actually look for it. This will update the cache data.
00347     @rtype: string
00348     @return: ISO language code, e.g. 'en'
00349     """
00350     # Return the user language preferences for registered users
00351     if try_user and request.user.valid and request.user.language:
00352         return request.user.language
00353 
00354     # Or try to return one of the user browser accepted languages, if it
00355     # is available on this wiki...
00356     lang = get_browser_language(request)
00357     if not lang:
00358         available = wikiLanguages() or ["en"]
00359         # Or return the wiki default language...
00360         if request.cfg.language_default in available:
00361             lang = request.cfg.language_default
00362         # If everything else fails, read the manual... or return 'en'
00363         else:
00364             lang = 'en'
00365     return lang
00366 
00367 
00368 def wikiLanguages():
00369     """
00370     Return the available user languages in this wiki.
00371     As we do everything in unicode (or utf-8) now, everything is available.
00372     """
00373     return languages
00374 
00375 
00376 def browserLanguages(request):
00377     """
00378     Return the accepted languages as set in the user browser.
00379 
00380     Parse the HTTP headers and extract the accepted languages, according to:
00381     http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
00382 
00383     Return a list of languages and base languages - as they are specified in
00384     the request, normalizing to lower case.
00385     """
00386     fallback = []
00387     accepted = request.accept_languages
00388     if accepted:
00389         # Add base language for each sub language. If the user specified
00390         # a sub language like "en-us", we will try to to provide it or
00391         # a least the base language "en" in this case.
00392         for lang, quality in accepted:
00393             lang = lang.lower()
00394             fallback.append(lang)
00395             if '-' in lang:
00396                 baselang = lang.split('-')[0]
00397                 fallback.append(baselang)
00398     return fallback
00399 
00400 def get_browser_language(request):
00401     """
00402     Return the language that is supported by wiki and what user browser
00403     would prefer to get. Return empty string if there is no such language
00404     or language_ignore_browser is true.
00405 
00406     @param request: the request object
00407     @rtype: string
00408     @return: ISO language code, e.g. 'en'
00409     """
00410     available = wikiLanguages()
00411     if available and not request.cfg.language_ignore_browser:
00412             for lang in browserLanguages(request):
00413                 if lang in available:
00414                     return lang
00415     return ''
00416