Back to index

moin  1.9.0~rc2
12_to_13_mig06.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003     12_to_13.py - migration from < moin--main--patch-248 to >= patch 249
00004     * convert event-log from iso8859-1 to config.charset (utf-8) encoding
00005 
00006     Steps for a successful migration to utf-8:
00007         1. stop your wiki and make a backup
00008         2. make a copy of the wiki's "data" directory to your working dir
00009         3. clean up your working copy of the data dir:
00010             a. if you use CVS or GNU arch remove stuff like CVS/, .cvsignore
00011                or .arch-ids/ etc.
00012             b. remove *.pickle (used by moin for caching some information,
00013                will be re-created automatically), especially:
00014                    I. data/user/userdict.pickle
00015                    II. data/dicts.pickle
00016             c. if you used symlinks in data/text or elsewhere, remove them
00017         4. make sure that from_encoding and to_encoding matches your needs (see
00018            beginning of script below and config.charset in moin_config.py) and
00019            run python 12_to_13_mig6.py from your working dir
00020         5. if there was no error, you will find:
00021             data.pre-mig6 (the script renames your data directory copy to that name)
00022             data (result, converted to utf-8)
00023         6. verify conversion results (number of pages, size of logs, attachments,
00024            number of backup copies) - everything should be reasonable before
00025            you proceed. Usually the file size gets larger when converting from
00026            iso8859-1 (or other non-unicode charset) to utf-8 except if your
00027            content is ASCII-only, then it will keep its size.
00028         7. copy additional files from data.pre-mig6 to data (maybe intermaps, logs,
00029            etc.). Be aware that the file contents AND file names of wiki content
00030            may have changed, so DO NOT copy the cache/ directory, but let
00031            the wiki recreate it.
00032         8. replace the data directory your wiki uses with the data directory
00033            you created by previous steps. DO NOT simply copy the converted stuff
00034            into the original or you will duplicate pages and create chaos!
00035         9. test it. if something has gone wrong, you still have your backup.
00036 
00037 
00038         10. if you use dictionaries for spellchecking, you have to convert them
00039             to config.charset, too. Remove your dict.cache before re-starting
00040             your wiki.
00041 
00042     @copyright: 2004 Thomas Waldmann
00043     @license: GPL, see COPYING for details
00044 """
00045 
00046 from_encoding = 'iso8859-1'
00047 to_encoding = 'utf-8'
00048 
00049 import os.path, sys, shutil, urllib
00050 
00051 sys.path.insert(0, '../../../..')
00052 from MoinMoin import wikiutil
00053 
00054 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, copy_dir
00055 
00056 def convert_string(str, enc_from, enc_to):
00057     return str.decode(enc_from).encode(enc_to)
00058 
00059 def convert_eventlog(fname_from, fname_to, enc_from, enc_to):
00060     print "%s -> %s" % (fname_from, fname_to)
00061     file_from = open(fname_from)
00062     file_to = open(fname_to, "w")
00063 
00064     for line in file_from:
00065         line = line.replace('\r', '')
00066         line = line.replace('\n', '')
00067         fields = line.split('\t')
00068         kvpairs = fields[2]
00069         kvpairs = kvpairs.split('&')
00070         kvlist = []
00071         for kvpair in kvpairs:
00072             key, val = kvpair.split('=')
00073             key = urllib.unquote(key)
00074             val = urllib.unquote(val)
00075             key = convert_string(key, enc_from, enc_to)
00076             val = convert_string(val, enc_from, enc_to)
00077             key = urllib.quote(key)
00078             val = urllib.quote(val)
00079             kvlist.append("%s=%s" % (key, val))
00080         fields[2] = '&'.join(kvlist)
00081         line = '\t'.join(fields) + '\n'
00082         file_to.write(line)
00083 
00084     file_to.close()
00085     file_from.close()
00086     st = os.stat(fname_from)
00087     os.utime(fname_to, (st.st_atime, st.st_mtime))
00088 
00089 origdir = 'data.pre-mig6'
00090 
00091 try:
00092     os.rename('data', origdir)
00093 except OSError:
00094     print "You need to be in the directory where your copy of the 'data' directory is located."
00095     sys.exit(1)
00096 
00097 copy_dir(origdir, 'data')
00098 os.remove(opj('data', 'event-log')) # old format
00099 convert_eventlog(opj(origdir, 'event-log'), opj('data', 'event-log'), from_encoding, to_encoding)
00100 
00101