Back to index

plone3  3.1.7
msgfmt.py
Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 # -*- coding: iso-8859-1 -*-
00003 # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
00004 #
00005 # Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
00006 # translation service (PTS) of zope
00007 #
00008 # Slightly updated by Hanno Schlichting <plone@hannosch.info>
00009 
00010 """Generate binary message catalog from textual translation description.
00011 
00012 This program converts a textual Uniforum-style message catalog (.po file) into
00013 a binary GNU catalog (.mo file).  This is essentially the same function as the
00014 GNU msgfmt program, however, it is a simpler implementation.
00015 
00016 This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
00017 Now you can simply use it from another python module:
00018 
00019   from msgfmt import Msgfmt
00020   mo = Msgfmt(po).get()
00021 
00022 where po is path to a po file as string, an opened po file ready for reading or
00023 a list of strings (readlines of a po file) and mo is the compiled mo
00024 file as binary string.
00025 
00026 Exceptions:
00027 
00028   * IOError if the file couldn't be read
00029 
00030   * msgfmt.PoSyntaxError if the po file has syntax errors
00031 
00032 """
00033 import struct
00034 import array
00035 from cStringIO import StringIO
00036 
00037 __version__ = "1.1pts"
00038 
00039 class PoSyntaxError(Exception):
00040     """ Syntax error in a po file """
00041     def __init__(self, msg):
00042         self.msg = msg
00043 
00044     def __str__(self):
00045         return 'Po file syntax error: %s' % self.msg
00046 
00047 class Msgfmt:
00048     """ """
00049     def __init__(self, po, name='unknown'):
00050         self.po = po
00051         self.name = name
00052         self.messages = {}
00053 
00054     def readPoData(self):
00055         """ read po data from self.po and store it in self.poLines """
00056         output = []
00057         if isinstance(self.po, file):
00058             self.po.seek(0)
00059             output = self.po.readlines()
00060         if isinstance(self.po, list):
00061             output = self.po
00062         if isinstance(self.po, str):
00063             output = open(self.po, 'rb').readlines()
00064         if not output:
00065             raise ValueError, "self.po is invalid! %s" % type(self.po)
00066         return output
00067 
00068     def add(self, id, str, fuzzy):
00069         "Add a non-empty and non-fuzzy translation to the dictionary."
00070         if str and not fuzzy:
00071             self.messages[id] = str
00072 
00073     def generate(self):
00074         "Return the generated output."
00075         keys = self.messages.keys()
00076         # the keys are sorted in the .mo file
00077         keys.sort()
00078         offsets = []
00079         ids = strs = ''
00080         for id in keys:
00081             # For each string, we need size and file offset.  Each string is NUL
00082             # terminated; the NUL does not count into the size.
00083             offsets.append((len(ids), len(id), len(strs), len(self.messages[id])))
00084             ids += id + '\0'
00085             strs += self.messages[id] + '\0'
00086         output = ''
00087         # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
00088         # the keys start right after the index tables.
00089         # translated string.
00090         keystart = 7*4+16*len(keys)
00091         # and the values start after the keys
00092         valuestart = keystart + len(ids)
00093         koffsets = []
00094         voffsets = []
00095         # The string table first has the list of keys, then the list of values.
00096         # Each entry has first the size of the string, then the file offset.
00097         for o1, l1, o2, l2 in offsets:
00098             koffsets += [l1, o1+keystart]
00099             voffsets += [l2, o2+valuestart]
00100         offsets = koffsets + voffsets
00101         output = struct.pack("Iiiiiii",
00102                              0x950412deL,       # Magic
00103                              0,                 # Version
00104                              len(keys),         # # of entries
00105                              7*4,               # start of key index
00106                              7*4+len(keys)*8,   # start of value index
00107                              0, 0)              # size and offset of hash table
00108         output += array.array("i", offsets).tostring()
00109         output += ids
00110         output += strs
00111         return output
00112 
00113 
00114     def get(self):
00115         """ """
00116         ID = 1
00117         STR = 2
00118 
00119         section = None
00120         fuzzy = 0
00121 
00122         lines = self.readPoData()
00123 
00124         # Parse the catalog
00125         lno = 0
00126         for l in lines:
00127             lno += 1
00128             # If we get a comment line after a msgstr or a line starting with
00129             # msgid, this is a new entry
00130             # XXX: l.startswith('msgid') is needed because not all msgid/msgstr
00131             # pairs in the plone pos have a leading comment
00132             if (l[0] == '#' or l.startswith('msgid')) and section == STR:
00133                 self.add(msgid, msgstr, fuzzy)
00134                 section = None
00135                 fuzzy = 0
00136             # Record a fuzzy mark
00137             if l[:2] == '#,' and 'fuzzy' in l:
00138                 fuzzy = 1
00139             # Skip comments
00140             if l[0] == '#':
00141                 continue
00142             # Now we are in a msgid section, output previous section
00143             if l.startswith('msgid'):
00144                 section = ID
00145                 l = l[5:]
00146                 msgid = msgstr = ''
00147             # Now we are in a msgstr section
00148             elif l.startswith('msgstr'):
00149                 section = STR
00150                 l = l[6:]
00151             # Skip empty lines
00152             l = l.strip()
00153             if not l:
00154                 continue
00155             # XXX: Does this always follow Python escape semantics?
00156             # XXX: eval is evil because it could be abused
00157             try:
00158                 l = eval(l, globals())
00159             except Exception, msg:
00160                 raise PoSyntaxError('%s (line %d of po file %s): \n%s' % (msg, lno, self.name, l))
00161             if section == ID:
00162                 msgid += l
00163             elif section == STR:
00164                 msgstr += l
00165             else:
00166                 raise PoSyntaxError('error in line %d of po file %s' % (lno, self.name))
00167 
00168         # Add last entry
00169         if section == STR:
00170             self.add(msgid, msgstr, fuzzy)
00171 
00172         # Compute output
00173         return self.generate()
00174 
00175     def getAsFile(self):
00176         return StringIO(self.get())
00177 
00178     def __call__(self):
00179         return self.getAsFile()