Back to index

moin  1.9.0~rc2
12_to_13_mig01.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003     12_to_13.py - migration from moin 1.2 to moin 1.3
00004     * switch the wiki to utf-8 encoding
00005     * switch quoting mechanism from _xx to (xx)
00006     * switch timestamps from float secs to int usecs
00007 
00008     Steps for a successful migration to utf-8:
00009         1. stop your wiki and make a backup
00010         2. make a copy of the wiki's "data" directory to your working dir
00011         3. clean up your working copy of the data dir:
00012             a. if you use CVS or GNU arch remove stuff like CVS/, .cvsignore
00013                or .arch-ids/ etc.
00014             b. remove *.pickle (used by moin for caching some information,
00015                will be re-created automatically), especially:
00016                    I. data/user/userdict.pickle
00017                    II. data/dicts.pickle
00018             c. if you used symlinks in data/text or elsewhere, remove them
00019         4. make sure that from_encoding and to_encoding matches your needs (see
00020            beginning of script below and config.charset in moin_config.py) and
00021            run python 12_to_13_mig1.py from your working dir
00022         5. if there was no error, you will find:
00023             data.pre-mig1 (the script renames your data directory copy to that name)
00024             data (result, converted to utf-8)
00025         6. verify conversion results (number of pages, size of logs, attachments,
00026            number of backup copies) - everything should be reasonable before
00027            you proceed. Usually the file size gets larger when converting from
00028            iso8859-1 (or other non-unicode charset) to utf-8 except if your
00029            content is ASCII-only, then it will keep its size.
00030         7. copy additional files from data.pre-mig1 to data (maybe intermaps, logs,
00031            etc.). Be aware that the file contents AND file names of wiki content
00032            may have changed, so DO NOT copy the cache/ directory, but let
00033            the wiki recreate it.
00034         8. replace the data directory your wiki uses with the data directory
00035            you created by previous steps. DO NOT simply copy the converted stuff
00036            into the original or you will duplicate pages and create chaos!
00037         9. test it. if something has gone wrong, you still have your backup.
00038 
00039 
00040         10. if you use dictionaries for spellchecking, you have to convert them
00041             to config.charset, too. Remove your dict.cache before re-starting
00042             your wiki.
00043 
00044     @copyright: 2004 Thomas Waldmann
00045     @license: GPL, see COPYING for details
00046 """
00047 
00048 from_encoding = 'iso8859-1'
00049 #from_encoding = 'utf-8'
00050 
00051 to_encoding = 'utf-8'
00052 
00053 import os.path, sys, shutil, urllib
00054 
00055 sys.path.insert(0, '../../../..')
00056 from MoinMoin import wikiutil
00057 
00058 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, copy_dir
00059 
00060 # this is a copy of the wikiutil.unquoteFilename of moin 1.2.1
00061 
00062 def unquoteFilename12(filename, encoding):
00063     """
00064     Return decoded original filename when given an encoded filename.
00065 
00066     @param filename: encoded filename
00067     @rtype: string
00068     @return: decoded, original filename
00069     """
00070     str = urllib.unquote(filename.replace('_', '%'))
00071     try:
00072         newstr = str.decode(encoding)
00073     except UnicodeDecodeError: # try again with iso
00074         newstr = str.decode('iso-8859-1')
00075     return newstr
00076 
00077 unquoteWikiname12 = unquoteFilename12
00078 
00079 
00080 def convert_string(str, enc_from, enc_to):
00081     try:
00082         newstr = str.decode(enc_from)
00083     except UnicodeDecodeError: # try again with iso
00084         newstr = str.decode('iso-8859-1')
00085     return newstr.encode(enc_to)
00086 
00087 def qf_convert_string(str, enc_from, enc_to):
00088     str = unquoteWikiname12(str, enc_from)
00089     str = wikiutil.quoteWikinameFS(str, enc_to)
00090     return str
00091 
00092 def convert_file(fname_from, fname_to, enc_from, enc_to):
00093     print "%s -> %s" % (fname_from, fname_to)
00094     file_from = open(fname_from, "rb")
00095     if os.path.exists(fname_to):
00096         raise "file exists %s" % fname_to
00097     file_to = open(fname_to, "wb")
00098     for line in file_from:
00099         file_to.write(convert_string(line, enc_from, enc_to))
00100     file_to.close()
00101     file_from.close()
00102     st = os.stat(fname_from)
00103     os.utime(fname_to, (st.st_atime, st.st_mtime))
00104 
00105 def convert_textdir(dir_from, dir_to, enc_from, enc_to, is_backupdir=0):
00106     os.mkdir(dir_to)
00107     for fname_from in listdir(dir_from):
00108         if is_backupdir:
00109             fname, timestamp = fname_from.split('.', 1)
00110             timestamp = str(wikiutil.timestamp2version(float(timestamp)))
00111         else:
00112             fname = fname_from
00113         fname = qf_convert_string(fname, enc_from, enc_to)
00114         if is_backupdir:
00115             fname_to = '.'.join([fname, timestamp])
00116         else:
00117             fname_to = fname
00118         convert_file(opj(dir_from, fname_from), opj(dir_to, fname_to),
00119                      enc_from, enc_to)
00120 
00121 def convert_pagedir(dir_from, dir_to, enc_from, enc_to):
00122     os.mkdir(dir_to)
00123     for dname_from in listdir(dir_from):
00124         dname_to = qf_convert_string(dname_from, enc_from, enc_to)
00125         print "%s -> %s" % (dname_from, dname_to)
00126         shutil.copytree(opj(dir_from, dname_from), opj(dir_to, dname_to), 1)
00127         try:
00128             convert_editlog(opj(dir_from, dname_from, 'last-edited'),
00129                             opj(dir_to, dname_to, 'last-edited'),
00130                             enc_from, enc_to)
00131         except IOError:
00132             pass # we ignore if it doesnt exist
00133 
00134 def convert_userdir(dir_from, dir_to, enc_from, enc_to):
00135     os.mkdir(dir_to)
00136     for fname in listdir(dir_from):
00137         convert_file(opj(dir_from, fname), opj(dir_to, fname),
00138                      enc_from, enc_to)
00139 
00140 def convert_editlog(log_from, log_to, enc_from, enc_to):
00141         file_from = open(log_from)
00142         file_to = open(log_to, "w")
00143         for line in file_from:
00144             line = line.replace('\r', '')
00145             line = line.replace('\n', '')
00146             if not line.strip(): # skip empty lines
00147                 continue
00148             fields = line.split('\t')
00149             fields[0] = qf_convert_string(fields[0], enc_from, enc_to)
00150             fields[2] = str(wikiutil.timestamp2version(float(fields[2])))
00151             if len(fields) < 6:
00152                 fields.append('') # comment
00153             if len(fields) < 7:
00154                 fields.append('SAVE') # action
00155             fields[5] = convert_string(fields[5], enc_from, enc_to)
00156             line = '\t'.join(fields) + '\n'
00157             file_to.write(line)
00158 
00159 origdir = 'data.pre-mig1'
00160 
00161 try:
00162     os.rename('data', origdir)
00163     os.mkdir('data')
00164 except OSError:
00165     print "You need to be in the directory where your copy of the 'data' directory is located."
00166     sys.exit(1)
00167 
00168 convert_textdir(opj(origdir, 'text'), opj('data', 'text'), from_encoding, to_encoding)
00169 
00170 convert_textdir(opj(origdir, 'backup'), opj('data', 'backup'), from_encoding, to_encoding, 1)
00171 
00172 convert_pagedir(opj(origdir, 'pages'), opj('data', 'pages'), from_encoding, to_encoding)
00173 
00174 convert_userdir(opj(origdir, 'user'), opj('data', 'user'), from_encoding, to_encoding)
00175 
00176 convert_editlog(opj(origdir, 'editlog'), opj('data', 'editlog'), from_encoding, to_encoding)
00177 
00178 copy_file(opj(origdir, 'event.log'), opj('data', 'event.log'))
00179 
00180 copy_dir(opj(origdir, 'plugin'), opj('data', 'plugin'))
00181 
00182 copy_file(opj(origdir, 'intermap.txt'), opj('data', 'intermap.txt'))
00183 
00184