Back to index

moin  1.9.0~rc2
msgfmt.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: iso-8859-1 -*-
00003 """Generate binary message catalog from textual translation description.
00004 
00005 This program converts a textual Uniforum-style message catalog (.po file) into
00006 a binary GNU catalog (.mo file).  This is essentially the same function as the
00007 GNU msgfmt program, however, it is a simpler implementation.
00008 
00009 Usage: msgfmt.py [OPTIONS] filename.po
00010 
00011 Options:
00012     -o file
00013     --output-file=file
00014         Specify the output file to write to.  If omitted, output will go to a
00015         file named filename.mo (based off the input file name).
00016 
00017     -h
00018     --help
00019         Print this message and exit.
00020 
00021     -V
00022     --version
00023         Display version information and exit.
00024 
00025 Written by Martin v. L÷wis <loewis@informatik.hu-berlin.de>,
00026 refactored / fixed by Thomas Waldmann <tw AT waldmann-edv DOT de>.
00027 """
00028 
00029 import sys, os
00030 import getopt, struct, array
00031 
00032 __version__ = "1.3"
00033 
00034 class SyntaxErrorException(Exception):
00035     """raised when having trouble parsing the po file content"""
00036     pass
00037 
00038 class MsgFmt(object):
00039     """transform .po -> .mo format"""
00040     def __init__(self):
00041         self.messages = {}
00042 
00043     def make_filenames(self, filename, outfile=None):
00044         """Compute .mo name from .po name or language"""
00045         if filename.endswith('.po'):
00046             infile = filename
00047         else:
00048             infile = filename + '.po'
00049         if outfile is None:
00050             outfile = os.path.splitext(infile)[0] + '.mo'
00051         return infile, outfile
00052 
00053     def add(self, id, str, fuzzy):
00054         """Add a non-fuzzy translation to the dictionary."""
00055         if not fuzzy and str:
00056             self.messages[id] = str
00057 
00058     def read_po(self, lines):
00059         ID = 1
00060         STR = 2
00061         section = None
00062         fuzzy = False
00063         line_no = 0
00064         msgid = msgstr = ''
00065         # Parse the catalog
00066         for line in lines:
00067             line_no += 1
00068             # If we get a comment line after a msgstr, this is a new entry
00069             if line.startswith('#') and section == STR:
00070                 self.add(msgid, msgstr, fuzzy)
00071                 section = None
00072                 fuzzy = False
00073             # Record a fuzzy mark
00074             if line.startswith('#,') and 'fuzzy' in line:
00075                 fuzzy = True
00076             # Skip comments
00077             if line.startswith('#'):
00078                 continue
00079             # Now we are in a msgid section, output previous section
00080             if line.startswith('msgid'):
00081                 if section == STR:
00082                     self.add(msgid, msgstr, fuzzy)
00083                     fuzzy = False
00084                 section = ID
00085                 line = line[5:]
00086                 msgid = msgstr = ''
00087             # Now we are in a msgstr section
00088             elif line.startswith('msgstr'):
00089                 section = STR
00090                 line = line[6:]
00091             # Skip empty lines
00092             line = line.strip()
00093             if not line:
00094                 continue
00095             # XXX: Does this always follow Python escape semantics?
00096             line = eval(line)
00097             if section == ID:
00098                 msgid += line
00099             elif section == STR:
00100                 msgstr += line
00101             else:
00102                 raise SyntaxErrorException('Syntax error on line %d, before:\n%s' % (line_no, line))
00103         # Add last entry
00104         if section == STR:
00105             self.add(msgid, msgstr, fuzzy)
00106 
00107     def generate_mo(self):
00108         """Return the generated output."""
00109         keys = self.messages.keys()
00110         # the keys are sorted in the .mo file
00111         keys.sort()
00112         offsets = []
00113         ids = ''
00114         strs = ''
00115         for id in keys:
00116             # For each string, we need size and file offset.  Each string is NUL
00117             # terminated; the NUL does not count into the size.
00118             offsets.append((len(ids), len(id), len(strs), len(self.messages[id])))
00119             ids += id + '\0'
00120             strs += self.messages[id] + '\0'
00121         output = []
00122         # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
00123         # the keys start right after the index tables.
00124         # translated string.
00125         keystart = 7*4 + 16*len(keys)
00126         # and the values start after the keys
00127         valuestart = keystart + len(ids)
00128         koffsets = []
00129         voffsets = []
00130         # The string table first has the list of keys, then the list of values.
00131         # Each entry has first the size of the string, then the file offset.
00132         for o1, l1, o2, l2 in offsets:
00133             koffsets += [l1, o1 + keystart]
00134             voffsets += [l2, o2 + valuestart]
00135         offsets = koffsets + voffsets
00136         output.append(struct.pack("Iiiiiii",
00137                              0x950412deL,       # Magic
00138                              0,                 # Version
00139                              len(keys),         # # of entries
00140                              7*4,               # start of key index
00141                              7*4 + len(keys)*8, # start of value index
00142                              0, 0))             # size and offset of hash table
00143         output.append(array.array("i", offsets).tostring())
00144         output.append(ids)
00145         output.append(strs)
00146         return ''.join(output)
00147 
00148 
00149 def make(filename, outfile):
00150     mf = MsgFmt()
00151     infile, outfile = mf.make_filenames(filename, outfile)
00152     try:
00153         lines = file(infile).readlines()
00154     except IOError, msg:
00155         print >> sys.stderr, msg
00156         sys.exit(1)
00157     try:
00158         mf.read_po(lines)
00159         output = mf.generate_mo()
00160     except SyntaxErrorException, msg:
00161         print >> sys.stderr, msg
00162 
00163     try:
00164         open(outfile, "wb").write(output)
00165     except IOError, msg:
00166         print >> sys.stderr, msg
00167 
00168 
00169 def usage(code, msg=''):
00170     print >> sys.stderr, __doc__
00171     if msg:
00172         print >> sys.stderr, msg
00173     sys.exit(code)
00174 
00175 
00176 def main():
00177     try:
00178         opts, args = getopt.getopt(sys.argv[1:], 'hVo:', ['help', 'version', 'output-file='])
00179     except getopt.error, msg:
00180         usage(1, msg)
00181 
00182     outfile = None
00183     # parse options
00184     for opt, arg in opts:
00185         if opt in ('-h', '--help'):
00186             usage(0)
00187         elif opt in ('-V', '--version'):
00188             print >> sys.stderr, "msgfmt.py", __version__
00189             sys.exit(0)
00190         elif opt in ('-o', '--output-file'):
00191             outfile = arg
00192     # do it
00193     if not args:
00194         print >> sys.stderr, 'No input file given'
00195         print >> sys.stderr, "Try `msgfmt --help' for more information."
00196         return
00197 
00198     for filename in args:
00199         make(filename, outfile)
00200 
00201 
00202 if __name__ == '__main__':
00203     main()
00204