Back to index

plone3  3.1.7
word_to_html.py
Go to the documentation of this file.
00001 from Products.PortalTransforms.interfaces import itransform
00002 
00003 EXTRACT_BODY  = 1
00004 EXTRACT_STYLE = 0
00005 
00006 FIX_IMAGES    = 1
00007 IMAGE_PREFIX  = "img_"
00008 
00009 # disable office_uno because it doesn't support multithread yet
00010 ENABLE_UNO = False
00011 
00012 import os
00013 if os.name == 'posix':
00014     try:
00015         if ENABLE_UNO:
00016             from office_uno import document
00017         else:
00018             raise
00019     except:
00020         from office_wvware import document
00021 else:
00022     try:
00023         if ENABLE_UNO:
00024             from office_uno import document
00025         else:
00026             raise
00027     except:
00028         from office_com import document
00029 
00030 import os.path
00031 
00032 class word_to_html:
00033     __implements__ = itransform
00034 
00035     __name__ = "word_to_html"
00036     inputs   = ('application/msword',)
00037     output  = 'text/html'
00038     output_encoding = 'utf-8'
00039 
00040     tranform_engine = document.__module__
00041 
00042     def name(self):
00043         return self.__name__
00044 
00045     def convert(self, data, cache, **kwargs):
00046         orig_file = 'unknown.doc'
00047 
00048         doc = document(orig_file, data)
00049         doc.convert()
00050         html = doc.html()
00051 
00052         path, images = doc.subObjects(doc.tmpdir)
00053         objects = {}
00054         if images:
00055             doc.fixImages(path, images, objects)
00056         doc.cleanDir(doc.tmpdir)
00057 
00058         cache.setData(html)
00059         cache.setSubObjects(objects)
00060         return cache
00061 
00062 def register():
00063     return word_to_html()