Back to index

plone3  3.1.7
xml.py
Go to the documentation of this file.
00001 """
00002 A custom transform using external command
00003 """
00004 
00005 __revision__ = '$Id: xml.py 4787 2005-08-19 21:43:41Z dreamcatcher $'
00006 
00007 from os.path import join, dirname, exists
00008 import re
00009 from os import popen3, popen4, system
00010 from cStringIO import StringIO
00011 
00012 from Products.PortalTransforms.interfaces import itransform
00013 from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
00014 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
00015 from Products.PortalTransforms.utils import log
00016 
00017 class XsltTransform(commandtransform):
00018     """ Custom external command
00019 
00020     transform xml content by launching an external XSLT processor
00021 
00022     Input and output mime types must be set correctly !
00023 
00024     You can associate different document type to different transformations.
00025     """
00026     __implements__ = (itransform,)
00027 
00028     __name__ = "xml_to_html"
00029 
00030     def __init__(self, name=None, **kwargs):
00031         self.config = {
00032             # sample configuration
00033             'binary_path'  : bin_search('xsltproc'),
00034             'command_line' : '%(transform)s %(input)s',
00035             'inputs'       : ('text/xml',),
00036             'output'       : 'text/html',
00037             'output_encoding' : 'UTF-8',
00038             'dtds'         : {
00039             '-//OASIS//DTD DocBook V4.1//EN' : '/usr/share/sgml/docbook/xsl-stylesheets-1.29/html/docbook.xsl'
00040             },
00041             'default_transform': ''
00042             }
00043         self.config_metadata = {
00044             'binary_path'  : ('string', 'Binary path',
00045                               'Path of the executable on the server.'),
00046             'command_line' : ('string', 'Command line',
00047                               '''Additional command line option.
00048 There should be at least the input file (designed by "%(input)s") and the xsl
00049 file (designed by "%(transform)s").The transformation\'s result must be printed on stdout.
00050 '''),
00051             'inputs'       : ('list', 'Inputs', 'Input(s) MIME type. Change with care.'),
00052             'output'       : ('string', 'Output', 'Output MIME type. Change with care.'),
00053             'output_encoding': ('string', 'Output encoding', 'Output encoding.'),
00054             'dtds'         : ('dict', 'DTDs',
00055                               'Association of public ids or dtds to XSL transformations.',
00056                               ('Public id', 'XSLT path')),
00057             'default_transform' : ('string', 'Default xslt',
00058                                    'Default xslt, used when no specific transformation is found.'),
00059             }
00060         self.config.update(kwargs)
00061         if name:
00062             self.__name__ = name
00063 
00064     def __getattr__(self, attr):
00065         if attr == 'inputs':
00066             return self.config['inputs']
00067         if attr == 'output':
00068             return self.config['output']
00069         if attr == 'output_encoding':
00070             return self.config['output_encoding']
00071         raise AttributeError(attr)
00072 
00073 
00074     def convert(self, data, cache, **kwargs):
00075         base_name = sansext(kwargs.get("filename") or 'unknown.xml')
00076         dtds = self.config['dtds']
00077         tmpdir, fullname = self.initialize_tmpdir(data, filename=base_name)
00078         try:
00079             try:
00080                 doctype = get_doctype(data)
00081             except DTException:
00082                 try:
00083                     doctype = get_dtd(data)
00084                 except DTException:
00085                     log('Unable to get doctype nor dtd in %s' % data)
00086                     doctype = None
00087             if doctype and dtds.has_key(doctype):
00088                 data = self.invokeCommand(fullname, dtds[doctype])
00089             elif self.config['default_transform']:
00090                 data = self.invokeCommand(fullname, self.config['default_transform'])
00091             cache.setData(data)
00092             path, images = self.subObjects(tmpdir)
00093             objects = {}
00094             if images:
00095                 self.fixImages(path, images, objects)
00096                 cache.setSubObjects(objects)
00097             return cache
00098         finally:
00099             self.cleanDir(tmpdir)
00100 
00101 
00102     def invokeCommand(self, input_name, xsl):
00103         dest_dir = dirname(input_name)
00104         output_file = join(dirname(input_name), 'tr_output')
00105         command = '%(binary_path)s %(command_line)s' % self.config
00106         data = {'input': input_name, 'output': output_file, 'transform': xsl}
00107         system(command % data)
00108 
00109         if exists(output_file):
00110             data = open(output_file).read()
00111         else:
00112             data = 'error occurs during transform. See error log'
00113         return data
00114 
00115 
00116 
00117 def register():
00118     return XsltTransform()
00119 
00120 DT_RGX = re.compile('<!DOCTYPE \w* PUBLIC \"([^"]*)\" \"([^"]*)\"')
00121 DT_RGX2 = re.compile('<!DOCTYPE \w* SYSTEM \"([^"]*)\"')
00122 
00123 class DTException(Exception): pass
00124 
00125 def get_doctype(data):
00126     """ return the public id for the doctype given some raw xml data
00127     """
00128     if not hasattr(data, 'readlines'):
00129         data = StringIO(data)
00130     for line in data.readlines():
00131         line = line.strip()
00132         if not line:
00133             continue
00134         if line.startswith('<?xml') or line.startswith('<!-- '):
00135             continue
00136         m = DT_RGX.match(line)
00137         if m is not None:
00138             return m.group(1)
00139         else:
00140             raise DTException('Unable to match doctype in "%s"' % line)
00141 
00142 def get_dtd(data):
00143     """ return the public id for the doctype given some raw xml data
00144     """
00145     if not hasattr(data, 'readlines'):
00146         data = StringIO(data)
00147     for line in data.readlines():
00148         line = line.strip()
00149         if not line:
00150             continue
00151         if line.startswith('<?xml') or line.startswith('<!-- '):
00152             continue
00153         m = DT_RGX.match(line)
00154         if m is not None:
00155             return m.group(2)
00156         m = DT_RGX2.match(line)
00157         if m is not None:
00158             return m.group(1)
00159         else:
00160             raise DTException('Unable to match doctype in "%s"' % line)
00161 
00162 
00163 if __name__ == '__main__':
00164     print get_doctype('''<?xml version="1.0" encoding="iso-8859-1"?>
00165 <!DOCTYPE article PUBLIC "-//LOGILAB/DTD DocBook V4.1.2-Based Extension V0.1//EN" "dcbk-logilab.dtd" []>
00166 
00167 <book id="devtools_user_manual" lang="fr">
00168 ''')