Back to index

plone3  3.1.7
office_wvware.py
Go to the documentation of this file.
00001 import re, tempfile
00002 import os, os.path
00003 from Products.PortalTransforms.libtransforms.utils import bin_search, \
00004      sansext, bodyfinder, scrubHTML
00005 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
00006 
00007 class document(commandtransform):
00008 
00009     def __init__(self, name, data):
00010         """ Initialization: create tmp work directory and copy the
00011         document into a file"""
00012         commandtransform.__init__(self, name, binary="wvHtml")
00013         name = self.name()
00014         if not name.endswith('.doc'):
00015             name = name + ".doc"
00016         self.tmpdir, self.fullname = self.initialize_tmpdir(data, filename=name)
00017 
00018     def convert(self):
00019         "Convert the document"
00020         tmpdir = self.tmpdir
00021 
00022         # for windows, install wvware from GnuWin32 at C:\Program Files\GnuWin32\bin
00023         # you can use:
00024         # wvware.exe -c ..\share\wv\wvHtml.xml --charset=utf-8 -d d:\temp d:\temp\test.doc > test.html
00025 
00026         if os.name == 'posix':
00027             os.system('cd "%s" && %s --charset=utf-8 "%s" "%s.html"' % (tmpdir, self.binary,
00028                                                              self.fullname,
00029                                                              self.__name__))
00030 
00031     def html(self):
00032         htmlfile = open("%s/%s.html" % (self.tmpdir, self.__name__), 'r')
00033         html = htmlfile.read()
00034         htmlfile.close()
00035         html = scrubHTML(html)
00036         body = bodyfinder(html)
00037         return body