Back to index

plone3  3.1.7
office_com.py
Go to the documentation of this file.
00001 # Need to be imported first to avoid dll loading problems.
00002 import pywintypes
00003 import pythoncom
00004 
00005 import os.path
00006 
00007 import win32com, sys, string, win32api, traceback, re, tempfile, os
00008 import win32com.client
00009 from win32com.client import gencache
00010 from win32com.client import constants, Dispatch
00011 
00012 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
00013 from Products.PortalTransforms.libtransforms.utils import bodyfinder, scrubHTML
00014 
00015 class document(commandtransform):
00016 
00017     def __init__(self, name, data):
00018         """Initialization: create tmp work
00019         directory and copy the document into a file"""
00020         commandtransform.__init__(self, name)
00021         name = self.name()
00022         if not name.endswith('.doc'):
00023             name = name + ".doc"
00024         self.tmpdir, self.fullname = self.initialize_tmpdir(data, filename=name)
00025 
00026     def convert(self):
00027         try:
00028             # initialize COM for multi-threading, ignoring any errors
00029             # when someone else has already initialized differently.
00030             pythoncom.CoInitializeEx(pythoncom.COINIT_MULTITHREADED)
00031         except pythoncom.com_error:
00032             pass
00033 
00034         word = Dispatch("Word.Application")
00035         word.Visible = 0
00036         word.DisplayAlerts = 0
00037         doc = word.Documents.Open(self.fullname)
00038         # Let's set up some html saving options for this document
00039         doc.WebOptions.RelyOnCSS = 1
00040         doc.WebOptions.OptimizeForBrowser = 1
00041         doc.WebOptions.BrowserLevel = 0 # constants.wdBrowserLevelV4
00042         doc.WebOptions.OrganizeInFolder = 0
00043         doc.WebOptions.UseLongFileNames = 1
00044         doc.WebOptions.RelyOnVML = 0
00045         doc.WebOptions.AllowPNG = 1
00046         # And then save the document into HTML
00047         doc.SaveAs(FileName = "%s.htm" % (self.fullname),
00048                    FileFormat = 8) # constants.wdFormatHTML)
00049 
00050         # TODO -- Extract Metadata (author, title, keywords) so we
00051         # can populate the dublin core
00052         # Converter will need to be extended to return a dict of
00053         # possible MD fields
00054 
00055         doc.Close()
00056         # word.Quit()
00057 
00058     def html(self):
00059         htmlfile = open(self.fullname + '.htm', 'r')
00060         html = htmlfile.read()
00061         htmlfile.close()
00062         html = scrubHTML(html)
00063         body = bodyfinder(html)
00064         return body
00065 
00066 ## This function has to be done. It's more difficult to delete the temp
00067 ## directory under Windows, because there is sometimes a directory in it.
00068 ##    def cleanDir(self, tmpdir):