Back to index

moin  1.9.0~rc2
dump.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003 MoinMoin - Dump a MoinMoin wiki to static pages
00004 
00005 @copyright: 2002-2004 Juergen Hermann <jh@web.de>,
00006             2005-2006 MoinMoin:ThomasWaldmann
00007 @license: GNU GPL, see COPYING for details.
00008 """
00009 
00010 import sys, os, time, codecs, shutil, re, errno
00011 
00012 from MoinMoin import config, wikiutil, Page, user
00013 from MoinMoin import script
00014 from MoinMoin.action import AttachFile
00015 
00016 url_prefix_static = "."
00017 logo_html = '<img src="logo.png">'
00018 HTML_SUFFIX = ".html"
00019 
00020 page_template = u'''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
00021 <html>
00022 <head>
00023 <meta http-equiv="content-type" content="text/html; charset=%(charset)s">
00024 <title>%(pagename)s</title>
00025 <link rel="stylesheet" type="text/css" media="all" charset="utf-8" href="%(theme)s/css/common.css">
00026 <link rel="stylesheet" type="text/css" media="screen" charset="utf-8" href="%(theme)s/css/screen.css">
00027 <link rel="stylesheet" type="text/css" media="print" charset="utf-8" href="%(theme)s/css/print.css">
00028 <style type="text/css">
00029 ul.pagetitle{
00030   display: inline;
00031   margin: 0;
00032   padding: 0;
00033   font-size: 1.5em;
00034 }
00035 li.pagetitle{
00036   display: inline;
00037   margin: 0;
00038 }
00039 td.noborder {
00040   border: 0;
00041 }
00042 </style>
00043 </head>
00044 <body>
00045 <table>
00046 <tr>
00047 <td class="noborder">
00048 %(logo_html)s
00049 </td>
00050 <td class="noborder">
00051 <ul class="pagetitle">
00052 <li class="pagetitle"><a class="backlink">%(pagename)s</a>
00053 </ul>
00054 <br><br>
00055 %(navibar_html)s
00056 </td>
00057 </tr>
00058 </table>
00059 <hr>
00060 <div id="page">
00061 %(pagehtml)s
00062 </div>
00063 <hr>
00064 %(timestamp)s
00065 </body>
00066 </html>
00067 '''
00068 
00069 
00070 def _attachment(request, pagename, filename, outputdir, **kw):
00071     filename = filename.encode(config.charset)
00072     source_dir = AttachFile.getAttachDir(request, pagename)
00073     source_file = os.path.join(source_dir, filename)
00074     dest_dir = os.path.join(outputdir, "attachments", wikiutil.quoteWikinameFS(pagename))
00075     dest_file = os.path.join(dest_dir, filename)
00076     dest_url = "attachments/%s/%s" % (wikiutil.quoteWikinameFS(pagename), wikiutil.url_quote(filename))
00077     if os.access(source_file, os.R_OK):
00078         if not os.access(dest_dir, os.F_OK):
00079             try:
00080                 os.makedirs(dest_dir)
00081             except:
00082                 script.fatal("Cannot create attachment directory '%s'" % dest_dir)
00083         elif not os.path.isdir(dest_dir):
00084             script.fatal("'%s' is not a directory" % dest_dir)
00085 
00086         shutil.copyfile(source_file, dest_file)
00087         script.log('Writing "%s"...' % dest_url)
00088         return dest_url
00089     else:
00090         return ""
00091 
00092 
00093 class PluginScript(script.MoinScript):
00094     """\
00095 Purpose:
00096 ========
00097 This tool allows you to dump MoinMoin wiki pages to static HTML files.
00098 
00099 Detailed Instructions:
00100 ======================
00101 General syntax: moin [options] export dump [dump-options]
00102 
00103 [options] usually should be:
00104     --config-dir=/path/to/my/cfg/ --wiki-url=wiki.example.org/
00105 
00106 [dump-options] see below:
00107     0. You must run this script as owner of the wiki files, usually this is the
00108        web server user.
00109 
00110     1. To dump all the pages on the wiki to the directory '/mywiki'
00111        moin ... export dump --target-dir=/mywiki
00112 
00113     2. To dump all the pages readable by 'JohnSmith' on the wiki to the directory
00114        '/mywiki'
00115        moin ... export dump --target-dir=/mywiki --username JohnSmith
00116 """
00117 
00118     def __init__(self, argv=None, def_values=None):
00119         script.MoinScript.__init__(self, argv, def_values)
00120         self.parser.add_option(
00121             "-t", "--target-dir", dest = "target_dir",
00122             help = "Write html dump to DIRECTORY"
00123         )
00124         self.parser.add_option(
00125             "-u", "--username", dest = "dump_user",
00126             help = "User the dump will be performed as (for ACL checks, etc)"
00127         )
00128 
00129     def mainloop(self):
00130         """ moin-dump's main code. """
00131 
00132         # Prepare output directory
00133         if not self.options.target_dir:
00134             script.fatal("you must use --target-dir=/your/output/path to specify the directory we write the html files to")
00135         outputdir = os.path.abspath(self.options.target_dir)
00136         try:
00137             os.mkdir(outputdir)
00138             script.log("Created output directory '%s'!" % outputdir)
00139         except OSError, err:
00140             if err.errno != errno.EEXIST:
00141                 script.fatal("Cannot create output directory '%s'!" % outputdir)
00142 
00143         # Insert config dir or the current directory to the start of the path.
00144         config_dir = self.options.config_dir
00145         if config_dir and os.path.isfile(config_dir):
00146             config_dir = os.path.dirname(config_dir)
00147         if config_dir and not os.path.isdir(config_dir):
00148             script.fatal("bad path given to --config-dir option")
00149         sys.path.insert(0, os.path.abspath(config_dir or os.curdir))
00150 
00151         self.init_request()
00152         request = self.request
00153 
00154         # fix url_prefix_static so we get relative paths in output html
00155         request.cfg.url_prefix_static = url_prefix_static
00156 
00157         # use this user for permissions checks
00158         request.user = user.User(request, name=self.options.dump_user)
00159 
00160         pages = request.rootpage.getPageList(user='') # get list of all pages in wiki
00161         pages.sort()
00162         if self.options.page: # did user request a particular page or group of pages?
00163             try:
00164                 namematch = re.compile(self.options.page)
00165                 pages = [page for page in pages if namematch.match(page)]
00166                 if not pages:
00167                     pages = [self.options.page]
00168             except:
00169                 pages = [self.options.page]
00170 
00171         wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename) + HTML_SUFFIX)
00172 
00173         AttachFile.getAttachUrl = lambda pagename, filename, request, **kw: _attachment(request, pagename, filename, outputdir, **kw)
00174 
00175         errfile = os.path.join(outputdir, 'error.log')
00176         errlog = open(errfile, 'w')
00177         errcnt = 0
00178 
00179         page_front_page = wikiutil.getLocalizedPage(request, request.cfg.page_front_page).page_name
00180         page_title_index = wikiutil.getLocalizedPage(request, 'TitleIndex').page_name
00181         page_word_index = wikiutil.getLocalizedPage(request, 'WordIndex').page_name
00182 
00183         navibar_html = ''
00184         for p in [page_front_page, page_title_index, page_word_index]:
00185             navibar_html += '[<a href="%s">%s</a>]&nbsp;' % (wikiutil.quoteWikinameURL(p), wikiutil.escape(p))
00186 
00187         urlbase = request.url # save wiki base url
00188         for pagename in pages:
00189             # we have the same name in URL and FS
00190             file = wikiutil.quoteWikinameURL(pagename)
00191             script.log('Writing "%s"...' % file)
00192             try:
00193                 pagehtml = ''
00194                 request.url = urlbase + pagename # add current pagename to url base
00195                 page = Page.Page(request, pagename)
00196                 request.page = page
00197                 try:
00198                     request.reset()
00199                     pagehtml = request.redirectedOutput(page.send_page, count_hit=0, content_only=1)
00200                 except:
00201                     errcnt = errcnt + 1
00202                     print >> sys.stderr, "*** Caught exception while writing page!"
00203                     print >> errlog, "~" * 78
00204                     print >> errlog, file # page filename
00205                     import traceback
00206                     traceback.print_exc(None, errlog)
00207             finally:
00208                 timestamp = time.strftime("%Y-%m-%d %H:%M")
00209                 filepath = os.path.join(outputdir, file)
00210                 fileout = codecs.open(filepath, 'w', config.charset)
00211                 fileout.write(page_template % {
00212                     'charset': config.charset,
00213                     'pagename': pagename,
00214                     'pagehtml': pagehtml,
00215                     'logo_html': logo_html,
00216                     'navibar_html': navibar_html,
00217                     'timestamp': timestamp,
00218                     'theme': request.cfg.theme_default,
00219                 })
00220                 fileout.close()
00221 
00222         # copy FrontPage to "index.html"
00223         indexpage = page_front_page
00224         if self.options.page:
00225             indexpage = pages[0] # index page has limited use when dumping specific pages, but create one anyway
00226         shutil.copyfile(
00227             os.path.join(outputdir, wikiutil.quoteWikinameFS(indexpage) + HTML_SUFFIX),
00228             os.path.join(outputdir, 'index' + HTML_SUFFIX)
00229         )
00230 
00231         errlog.close()
00232         if errcnt:
00233             print >> sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile)
00234