Back to index

moin  1.9.0~rc2
12_to_13_mig08.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003     migration from moin 1.3 < patch-305 to moin 1.3 >= patch-305
00004     Here we fix 2 errors that crept in by use of mig1(?) and mig5:
00005     * the edit-log misses 1 field (missing TAB) on faked "missing editlog
00006       entry" entries
00007     * we accidently gave ATTNEW/DRW/DEL an incremented revno (although
00008       attaching a file doesn't change page content and revision), so we need
00009       to convert those entries to use revno == 99999999 and renumber the
00010       normal entries so we have no missing numbers in between
00011     * edit-log's action field sometimes was empty (default: SAVE)
00012 
00013     Steps for a successful migration:
00014 
00015         1. Stop your wiki and make a backup of old data and code
00016 
00017         2. Make a copy of the wiki's "data" directory to your working dir
00018 
00019         3. Run this script from your working dir
00020 
00021         4. If there was no error, you will find:
00022             data.pre-mig8 - the script renames your data directory copy to that name
00023             data - converted data dir
00024 
00025         5. Verify conversion results (number of pages, size of logs, attachments,
00026            number of backup copies) - everything should be reasonable before
00027            you proceed.
00028 
00029         6. Copy additional files from data.pre-mig8 to data (maybe intermaps, logs,
00030            etc.). Be aware that the file contents AND file names of wiki content
00031            may have changed, so DO NOT copy the files inside the cache/ directory,
00032            let the wiki refill it.
00033 
00034         7. Replace the data directory your wiki uses with the data directory
00035            you created by previous steps. DO NOT simply copy the converted stuff
00036            into the original or you will duplicate pages and create chaos!
00037 
00038         8. Test it - if something has gone wrong, you still have your backup.
00039 
00040 
00041     @copyright: 2004 Thomas Waldmann
00042     @license: GPL, see COPYING for details
00043 """
00044 
00045 
00046 import os.path, sys, urllib
00047 
00048 # Insert THIS moin dir first into sys path, or you would run another
00049 # version of moin!
00050 sys.path.insert(0, '../../../..')
00051 from MoinMoin import wikiutil
00052 
00053 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
00054 
00055 # info[pagename][timestamp_usecs] = [revno_new, [...]]
00056 # if revno_new is 99999999, we haven't assigned a new revno to this entry
00057 info = {}
00058 
00059 def gather_editlog(el_from, forcepagename=None):
00060     """ this gathers everything that is in edit-log into internal
00061         data structures, converting to the future format
00062     """
00063     if not os.path.exists(el_from):
00064         return
00065     for l in open(el_from):
00066         data = l.rstrip('\n').rstrip('\r').split('\t')
00067         while len(data) < 9:
00068             data.append('')
00069         (timestampstr, revstr, action, pagename, ip, host, id, extra, comment) = data
00070 
00071         if forcepagename: # we use this for edit-log in pagedirs (for renamed pages!)
00072             pagename = forcepagename
00073 
00074         if not action: # FIX: sometimes action is empty ...
00075             action = 'SAVE'
00076 
00077         if action in ['ATTNEW', 'ATTDRW', 'ATTDEL', ]:
00078             revstr = '99999999' # FIXES revno
00079             # use reserved value, ATT action doesn't create new rev of anything
00080 
00081         if (comment == '' and extra == '' and id == 'missing editlog entry for this page version') or \
00082            (extra == '' and id == '' and comment == 'missing editlog entry for this page version'):
00083             # FIX omitted field bug on fake entries
00084             comment = 'missing edit-log entry for this revision' # more precise
00085             extra = ''
00086             id = ''
00087 
00088         rev = int(revstr)
00089         data = [timestampstr, rev, action, pagename, ip, host, id, extra, comment]
00090 
00091         entry = info.get(pagename, {})
00092         timestamp = long(timestampstr) # must be long for py 2.2.x
00093         entry[timestamp] = [99999999, data] # new revno, data
00094         info[pagename] = entry
00095 
00096 def gather_pagedirs(dir_from):
00097     """ this gathers edit-log information from the pagedirs, just to make sure
00098     """
00099     pagedir = opj(dir_from, 'pages')
00100     pagelist = listdir(pagedir)
00101     for pagename in pagelist:
00102         editlog_from = opj(pagedir, pagename, 'edit-log')
00103         gather_editlog(editlog_from, pagename)
00104 
00105 
00106 def generate_pages(dir_from, dir_to):
00107     revactions = ['SAVE', 'SAVENEW', 'SAVE/REVERT', ] # these actions create revisions
00108     for pn in info:
00109         entry = info.get(pn, {})
00110         tslist = entry.keys()
00111         if tslist:
00112             pagedir = opj(dir_to, 'pages', pn)
00113             revdir = opj(pagedir, 'revisions')
00114             os.makedirs(revdir)
00115             editlog_file = opj(pagedir, 'edit-log')
00116             f = open(editlog_file, 'w')
00117             revnew = 0
00118             tslist.sort()
00119             for ts in tslist:
00120                 data = entry[ts][1]
00121                 datanew = data[:]
00122                 (timestamp, rev, action, pagename, ip, host, id, extra, comment) = data
00123                 revstr = '%08d' % rev
00124                 if action in revactions:
00125                     revnew += 1
00126                     revnewstr = '%08d' % revnew
00127                     entry[ts][0] = revnew # remember what new revno we chose
00128                 else: # ATTNEW,ATTDRW,ATTDEL
00129                     revnewstr = '99999999'
00130                 if action.endswith('/REVERT'):
00131                     # replace the old revno with the correct new revno
00132                     revertrevold = int(extra)
00133                     revertrevnew = 0
00134                     for ts2 in tslist:
00135                         data2 = entry[ts2][1]
00136                         (timestamp2, rev2, action2, pagename2, ip2, host2, id2, extra2, comment2) = data2
00137                         if rev2 == revertrevold:
00138                             revertrevnew = entry[ts2][0]
00139                     datanew[7] = '%08d' % revertrevnew
00140 
00141                 datanew[1] = revnewstr
00142                 f.write('\t'.join(datanew)+'\n') # does make a CRLF on win32 in the file
00143 
00144                 if action in revactions: # we DO have a page rev for this one
00145                     file_from = opj(dir_from, 'pages', pn, 'revisions', revstr)
00146                     file_to = opj(revdir, revnewstr)
00147                     copy_file(file_from, file_to)
00148             f.close()
00149 
00150             # check if page exists or is deleted in orig dir
00151             pagedir_from = opj(dir_from, 'pages', pn)
00152             revdir_from = opj(pagedir_from, 'revisions')
00153             try:
00154                 curr_file_from = opj(pagedir_from, 'current')
00155                 currentfrom = open(curr_file_from).read().strip() # try to access it
00156                 page_exists = 1
00157             except:
00158                 page_exists = 0
00159 
00160             # re-make correct DELETED status!
00161             if page_exists:
00162                 curr_file = opj(pagedir, 'current')
00163                 f = open(curr_file, 'w')
00164                 f.write("%08d\n" % revnew) # we add a \n, so it is easier to hack in there manually
00165                 f.close()
00166 
00167         att_from = opj(dir_from, 'pages', pn, 'attachments')
00168         if os.path.exists(att_from):
00169             att_to = opj(pagedir, 'attachments')
00170             copy_dir(att_from, att_to)
00171 
00172 
00173 def generate_editlog(dir_from, dir_to):
00174     editlog = {}
00175     for pagename in info:
00176         entry = info.get(pagename, {})
00177         for ts in entry:
00178             file_from, data = entry[ts]
00179             editlog[ts] = data
00180 
00181     tslist = editlog.keys()
00182     tslist.sort()
00183 
00184     editlog_file = opj(dir_to, 'edit-log')
00185     f = open(editlog_file, 'w')
00186     for ts in tslist:
00187         datatmp = editlog[ts][:]
00188         rev = datatmp[1]
00189         datatmp[1] = '%08d' % rev
00190         f.write('\t'.join(datatmp)+'\n')
00191     f.close()
00192 
00193 
00194 origdir = 'data.pre-mig8'
00195 
00196 # Backup original dir and create new empty dir
00197 try:
00198     os.rename('data', origdir)
00199     os.mkdir('data')
00200 except OSError:
00201     print "You need to be in the directory where your copy of the 'data' directory is located."
00202     sys.exit(1)
00203 
00204 #gather_editlog(opj(origdir, 'edit-log'))
00205 gather_pagedirs(origdir)
00206 
00207 generate_editlog(origdir, 'data')
00208 generate_pages(origdir, 'data')
00209 
00210 copy_dir(opj(origdir, 'plugin'), opj('data', 'plugin'))
00211 
00212 copy_dir(opj(origdir, 'user'), opj('data', 'user'))
00213 
00214 copy_file(opj(origdir, 'event-log'), opj('data', 'event-log'))
00215 
00216 copy_file(opj(origdir, 'intermap.txt'), opj('data', 'intermap.txt'))
00217 
00218