Back to index

plone3  3.1.7
makepox.py
Go to the documentation of this file.
00001 """Simple script to generate .pox files
00002 
00003     parses XML for i18n attrs and JS files for _() calls and generates an
00004     XML .pox template document (.poxt file)
00005 
00006     (c) Guido Wesdorp 2005
00007 
00008 """
00009 
00010 from xml.dom.minidom import parseString, getDOMImplementation
00011 import sys, re, os
00012 
00013 stderr = sys.stderr
00014 
00015 warn_on_broken_xml = True
00016 
00017 class POX:
00018     """container for the results"""
00019     def __init__(self):
00020         impl = getDOMImplementation()
00021         self.doc = impl.createDocument(None, 'catalog', None)
00022         self.root = self.doc.documentElement
00023         self.processed = {} # mapping from mid to ([filenames], node)
00024 
00025     def add(self, msgid, filename):
00026         # strip and reduce whitespace
00027         msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ')
00028         while msgid.find('  ') > -1:
00029             msgid.replace('  ', ' ')
00030         if self.processed.has_key(msgid):
00031             filenames, node = self.processed[msgid]
00032             if not filename in filenames:
00033                 filenames.append(filename)
00034                 node.setAttribute('filenames', 
00035                     '%s %s' % (node.getAttribute('filenames'), filename))
00036             return
00037         doc = self.doc
00038         root = self.root
00039         # add the nodes
00040         msgnode = doc.createElement('message')
00041         msgnode.setAttribute('filenames', filename)
00042         root.appendChild(msgnode)
00043         msgidnode = doc.createElement('msgid')
00044         msgidnode.appendChild(doc.createTextNode(msgid))
00045         msgnode.appendChild(msgidnode)
00046         msgstrnode = doc.createElement('msgstr')
00047         msgstrnode.appendChild(doc.createTextNode(msgid))
00048         msgnode.appendChild(msgstrnode)
00049         msgstrnode.setAttribute('i18n:translate', '')
00050         root.appendChild(msgnode)
00051         self.processed[msgid] = ([filename], msgnode)
00052 
00053     def get_result(self):
00054         return self.doc.toprettyxml()
00055 
00056 class XMLParser:
00057     """scans XML files (or well-formed HTML files, obviously) for i18 attrs"""
00058     def __init__(self, files, pox):
00059         self._current = None
00060         for file in files:
00061             self.parse_file(file, pox)
00062 
00063     def parse_file(self, filename, pox):
00064         fp = open(filename)
00065         try:
00066             dom = parseString(fp.read())
00067         except:
00068             exc, e, tb = sys.exc_info()
00069             del tb
00070             if warn_on_broken_xml:
00071                 print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e)
00072             return
00073         # walk through all the nodes and scan for i18n: stuff
00074         while 1:
00075             node = self.next_node(dom)
00076             if not node:
00077                 break
00078             if node.nodeType == 1:
00079                 attrs = node.attributes
00080                 translate = attrs.getNamedItem('i18n:translate')
00081                 if translate:
00082                     msgid = translate.value
00083                     if not msgid.strip():
00084                         msgid = self.extract_text(node)
00085                     pox.add(msgid, filename)
00086                 attributes = attrs.getNamedItem('i18n:attributes')
00087                 if attributes:
00088                     attributes = [a.strip() for a in 
00089                                         attributes.value.split(';')]
00090                     for attr in attributes:
00091                         attritem = attrs.getNamedItem(attr)
00092                         if not attritem:
00093                             raise AttributeError, \
00094                                 'No %s on %s in %s' % (
00095                                     attr, node.nodeName, filename)
00096                         msgid = attritem.value;
00097                         pox.add(msgid, filename)
00098 
00099     def extract_text(self, node):
00100         xml = ''
00101         for child in node.childNodes:
00102             xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ')
00103         while xml.find('  ') > -1:
00104             xml = xml.replace('  ', ' ')
00105         return xml
00106 
00107     def next_node(self, dom):
00108         if not self._current or self._current.ownerDocument != dom:
00109             self._current = dom.documentElement
00110         else:
00111             cur = self._current
00112             if cur.hasChildNodes():
00113                 self._current = cur.childNodes[0]
00114             elif cur != cur.parentNode.lastChild:
00115                 self._current = cur.nextSibling
00116             else:
00117                 self._current = cur.parentNode.nextSibling
00118         return self._current
00119 
00120 class JSParser:
00121     """scans JS files for _() calls"""
00122     def __init__(self, files, pox):
00123         for file in files:
00124             self.parse_file(file, pox)
00125 
00126     _startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(')
00127     _startfuncreg_2 = re.compile('^_\(')
00128     def parse_file(self, filename, pox):
00129         lines = open(filename).readlines()
00130         lineno = 0
00131         more = False
00132         chunks = []
00133         for line in lines:
00134             lineno += 1
00135             if more is True or self._startfuncreg.search(line):
00136                 chunk, more = self._get_func_content(line, filename, 
00137                                                         lineno, more)
00138                 chunks.append(chunk)
00139             if chunks and more is False:
00140                 literal = ''.join(chunks).strip()
00141                 if not literal:
00142                     raise ValueError, ('Unrecognized function content -- ' 
00143                                         'file %s, line %s' % (
00144                                             filename, lineno))
00145                 literal = literal.replace('\t', ' ').replace('\n', ' ')
00146                 while literal.find('  ') > -1:
00147                     literal = literal.replace('  ', ' ')
00148                 more = False
00149                 chunks = []
00150                 pox.add(literal, filename)
00151                 
00152     def _get_func_content(self, line, filename, lineno, more=False):
00153         """return the content of the _() call in line
00154 
00155             if more is True, this will assume the function is already opened
00156             and continue adding to the result from the start of the line 
00157             without searching for '[^a-zA-Z_]_(' first
00158 
00159             returns a tuple (content, more) where more is True if the end of
00160             the function body is not reached, in that case this method should
00161             be called again with the 'more' argument set to True
00162         """
00163         line = line.strip()
00164         if not more:
00165             match = self._startfuncreg.search(line) or \
00166                         self._startfuncreg_2.search(line)
00167             line = line.replace(match.group(0), '')
00168         line = line.strip()
00169         quote = line[0]
00170         line = line[1:]
00171         if not quote in ['"', "'"]:
00172             raise ValueError, ('beginning of function body not a recognized '
00173                                 'quote character: %s -- (file %s, line %s)' % (
00174                                     quote, filename, lineno))
00175         ret = []
00176         previous_char = None
00177         while 1:
00178             new_char = line[0]
00179             line = line[1:]
00180             if new_char == quote:
00181                 if previous_char != '\\':
00182                     break
00183             ret.append(new_char)
00184             previous_char = new_char
00185         
00186         # find out if we should continue after this (do we have a '+' 
00187         # or a ');'?)
00188         more = False
00189         line = line.strip()
00190         if line and line[0] == '+':
00191             line = line[1:].strip()
00192             if line:
00193                 raise ValueError, ('string concatenation only allowed for '
00194                                     'multiline strings, not for variable '
00195                                     'interpolation (use ${} instead) -- '
00196                                     '(file %s, line %s)' % (
00197                                         filename, lineno))
00198             more = True
00199         return ''.join(ret), more
00200 
00201 if __name__ == '__main__':
00202     print >>stderr, 'POX extract v0.1'
00203     print >>stderr, '(c) Guido Wesdorp 2004'
00204     files = sys.argv[1:]
00205     print >>stderr, 'Going to parse files', ', '.join(files)
00206     pox = POX()
00207     xml = [f for f in files if not f.endswith('.js')]
00208     js = [f for f in files if f.endswith('.js')]
00209     XMLParser(xml, pox)
00210     JSParser(js, pox)
00211     pres = pox.get_result()
00212     pres = pres.replace('<catalog>',
00213         ('<catalog xmlns:i18n="http://xml.zope.org/namespaces/i18n" '
00214         'i18n:domain="kupupox">'))
00215     print pres
00216     print >>stderr, 'Done'