Back to index

moin  1.9.0~rc2
12_to_13_mig05.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003     migration from moin 1.3 < patch-221 to moin 1.3 >= patch-221
00004     We need to make versioning completely different. Problem:
00005         * old versioning used UNIX timestamps (32bits), but had collisions due
00006           to seconds resolution (on the FS, they were avoided by using floats
00007           in early moin versions, but floats suck and xmlrpc only does ints).
00008         * then we moved to usecs resolution, collision problem solved, but
00009           xmlrpc broke because it can't handle long ints. Oh well ... 8-(
00010         * So for the 3rd try, we now just enumerate versions 1,2,3,4,...
00011           This makes xmlrpc happy again (and matches better how xmlrpc was
00012           designed, as it has separate fields for timestamp and version),
00013           but we now have to keep the timestamp somewhere else. The appropriate
00014           place is of course the edit-log.
00015 
00016     So we change like this:
00017         * data/pages/PageName/backup/<UTC timestamp in usecs>
00018           -> data/pages/PageName/revisions/<revno>
00019     A page save is now done like that:
00020         * mv 'current' 'notcurrent'
00021         * if success ('current' was there):
00022             * revno = read('notcurrent')
00023             * revno++
00024             * write('notcurrent', revno)
00025             * save to revisions/<revno>
00026             * mv 'notcurrent' 'current'
00027         * else give error msg and let user retry save
00028 
00029     * data/user/<uid>.bookmark stays in usecs
00030     * data/event-log stays in usecs
00031     * data/edit-log and data/pages/PageName/edit-log stay in usecs and:
00032         * old: PageName UserIp TimeUSecs UserHost UserId Comment Action
00033         * new: TimeUSecs PageRev Action PageName UserIp UserHost UserId Extra Comment
00034         *                =======                                        =====
00035          * PageRev is identical to the filename in revisions/ directory
00036          * Extra is used for some stuff formerly put into comment field, like
00037            revert info or attach filename
00038 
00039     Steps for a successful migration:
00040 
00041         1. Stop your wiki and make a backup of old data and code
00042 
00043         2. Make a copy of the wiki's "data" directory to your working dir
00044 
00045         3. Run this script from your working dir
00046 
00047         4. If there was no error, you will find:
00048             data.pre-mig5 - the script renames your data directory copy to that name
00049             data - converted data dir
00050 
00051         5. Verify conversion results (number of pages, size of logs, attachments,
00052            number of backup copies) - everything should be reasonable before
00053            you proceed.
00054 
00055         6. Copy additional files from data.pre-mig5 to data (maybe intermaps, logs,
00056            etc.). Be aware that the file contents AND file names of wiki content
00057            may have changed, so DO NOT copy the files inside the cache/ directory,
00058            let the wiki refill it.
00059 
00060         7. Replace the data directory your wiki uses with the data directory
00061            you created by previous steps. DO NOT simply copy the converted stuff
00062            into the original or you will duplicate pages and create chaos!
00063 
00064         8. Test it - if something has gone wrong, you still have your backup.
00065 
00066 
00067     @copyright: 2004 Thomas Waldmann
00068     @license: GPL, see COPYING for details
00069 """
00070 
00071 
00072 import os.path, sys, urllib
00073 
00074 # Insert THIS moin dir first into sys path, or you would run another
00075 # version of moin!
00076 sys.path.insert(0, '../../../..')
00077 from MoinMoin import wikiutil
00078 
00079 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
00080 
00081 # info[pagename][timestamp_usecs] = (file_from, (...))
00082 # if file_from is None, we have just a log entry, but no associated file yet
00083 info = {}
00084 info2 = {}
00085 exists = {}
00086 pagelist = []
00087 
00088 def gather_editlog(dir_from, el_from):
00089     """ this gathers everything that is in edit-log into internal
00090         data structures, converting to the future format
00091     """
00092     if not os.path.exists(el_from):
00093         return
00094     for l in open(el_from):
00095         data = l.rstrip('\n').split('\t')
00096         origlen = len(data)
00097         while len(data) < 7: data.append('')
00098         (pagename, ip, timestamp, host, id, comment, action) = data
00099         if origlen == 6:
00100             action = comment
00101             comment = ''
00102 
00103         extra = ''
00104         if action == 'SAVE/REVERT': # we missed to convert that in mig4
00105             ts = long(comment) # must be long for py 2.2.x
00106             if ts < 4000000000: # UNIX timestamp (secs)
00107                 extra = str(wikiutil.timestamp2version(ts))
00108             else: # usecs timestamp
00109                 extra = str(ts)
00110             # later we convert this timestamp to a revision number
00111             comment = ''
00112         if action in ['ATTNEW', 'ATTDRW', 'ATTDEL', ]:
00113             extra = comment # filename
00114             comment = '' # so we can use comments on ATT* in future
00115 
00116         timestamp = long(timestamp) # must be long for py 2.2.x
00117         data = [timestamp, '', action, pagename, ip, host, id, extra, comment]
00118 
00119         entry = info.get(pagename, {})
00120         entry[timestamp] = [None, data]
00121         info[pagename] = entry
00122 
00123 def gather_pagedirs(dir_from, is_backupdir=0):
00124     """ this gathers information from the pagedirs, i.e. text and backup
00125         files (and also the local editlog) and tries to merge/synchronize
00126         with the informations gathered from editlog
00127     """
00128     global pagelist
00129     pagelist = listdir(dir_from)
00130     for pagename in pagelist:
00131         editlog_from = opj(dir_from, pagename, 'edit-log')
00132         gather_editlog(dir_from, editlog_from)
00133 
00134         entry = info.get(pagename, {})
00135 
00136         loglist = [] # editlog timestamps of page revisions
00137         for ts, data in entry.items():
00138             if data[1][2] in ['SAVE', 'SAVENEW', 'SAVE/REVERT', ]:
00139                 loglist.append(ts)
00140         loglist.sort()
00141         lleftover = loglist[:]
00142 
00143         # remember the latest log entry
00144         if lleftover:
00145             llatest = lleftover[-1]
00146         else:
00147             llatest = None
00148 
00149         backupdir_from = opj(dir_from, pagename, 'backup')
00150         if os.path.exists(backupdir_from):
00151             backuplist = listdir(backupdir_from)
00152             bleftover = backuplist[:]
00153             for bfile in backuplist:
00154                 backup_from = opj(backupdir_from, bfile)
00155                 ts = long(bfile)
00156                 if ts in loglist: # we have an editlog entry, exact match
00157                     entry[ts][0] = backup_from
00158                     lleftover.remove(ts)
00159                     bleftover.remove(bfile)
00160 
00161         text_from = opj(dir_from, pagename, 'text')
00162         found_text = False
00163         if os.path.exists(text_from): # we have a text file, it should match latest log entry
00164             exists[pagename] = True
00165             mtime = os.path.getmtime(text_from)
00166             if llatest and llatest in lleftover:
00167                 ts = llatest
00168                 if abs(wikiutil.timestamp2version(mtime) - ts) < 2000000: # less than a second diff
00169                     entry[ts][0] = text_from
00170                     lleftover.remove(ts)
00171                     found_text = True
00172             else: # we have no log entries left 8(
00173                 ts = wikiutil.timestamp2version(mtime)
00174                 data = [ts, '', 'SAVE', pagename, '', '', '', '', 'missing editlog entry for this page version']
00175                 entry[ts] = [text_from, data]
00176         else:
00177             # this page was maybe deleted, so we remember for later:
00178             exists[pagename] = False
00179             if llatest in lleftover: # if a page is deleted, the last log entry has no file
00180                 entry[llatest][0] = None
00181                 lleftover.remove(llatest)
00182 
00183         if os.path.exists(backupdir_from):
00184             backuplist = listdir(backupdir_from)
00185             for bfile in backuplist:
00186                 if not bfile in bleftover: continue
00187                 backup_from = opj(backupdir_from, bfile)
00188                 bts = long(bfile) # must be long for py 2.2.x
00189                 for ts in lleftover:
00190                     tdiff = abs(bts-ts)
00191                     if tdiff < 2000000: # editlog, inexact match
00192                         entry[ts][0] = backup_from
00193                         lleftover.remove(ts)
00194                         bleftover.remove(bfile)
00195                     elif 3599000000 <= tdiff <= 3601000000: # editlog, win32 daylight saving bug
00196                         entry[ts][0] = backup_from
00197                         lleftover.remove(ts)
00198                         bleftover.remove(bfile)
00199                         print "Warning: Win32 daylight saving bug encountered & fixed!"
00200 
00201             if len(bleftover) == 1 and len(lleftover) == 1: # only 1 left, must be this
00202                 backup_from = opj(backupdir_from, bleftover[0])
00203                 entry[lleftover[0]][0] = backup_from
00204                 lleftover = []
00205                 bleftover = []
00206 
00207             # fake some log entries
00208             for bfile in bleftover:
00209                 backup_from = opj(backupdir_from, bfile)
00210                 bts = long(bfile) # must be long py 2.2.x
00211                 data = [ts, '', 'SAVE', pagename, '', '', '', '', 'missing editlog entry for this page version']
00212                 entry[bts] = [backup_from, data]
00213 
00214         # check if we still haven't matched the "text" file
00215         if not found_text and os.path.exists(text_from):
00216             if llatest in lleftover: # latest log entry still free
00217                 entry[llatest][0] = text_from # take it. do not care about mtime of file.
00218                 lleftover.remove(llatest)
00219             else: # log for "text" file is missing or latest was taken by other rev 8(
00220                 mtime = os.path.getmtime(text_from)
00221                 ts = wikiutil.timestamp2version(mtime) # take mtime, we have nothing better
00222                 data = [ts, '', 'SAVE', pagename, '', '', '', '', 'missing editlog entry for this page version']
00223                 entry[ts] = [text_from, data]
00224 
00225         # delete unmatching log entries
00226         for ts in lleftover:
00227             #print "XXX Deleting leftover log entry: %r" % entry[ts]
00228             del entry[ts]
00229 
00230         info[pagename] = entry
00231 
00232 def remove_trash(dir_from):
00233     for pagename in info:
00234         # omit dead pages and MoinEditorBackup
00235         if pagename in pagelist and (
00236            os.path.exists(opj(dir_from, pagename, 'text')) or
00237            os.path.exists(opj(dir_from, pagename, 'backup'))
00238            ) and not pagename.endswith('MoinEditorBackup'):
00239             info2[pagename] = info[pagename]
00240 
00241 def generate_pages(dir_from, dir_to):
00242     for pagename in info2:
00243         entry = info2.get(pagename, {})
00244         tslist = entry.keys()
00245         if tslist:
00246             pagedir = opj(dir_to, 'pages', pagename)
00247             os.makedirs(opj(pagedir, 'revisions'))
00248             editlog_file = opj(pagedir, 'edit-log')
00249             f = open(editlog_file, 'w')
00250             rev = 0
00251             tslist.sort()
00252             for ts in tslist:
00253                 rev += 1
00254                 revstr = '%08d' % rev
00255                 file_from, data = entry[ts]
00256                 data[0] = str(ts)
00257                 data[1] = revstr
00258                 if data[2].endswith('/REVERT'):
00259                     # replace the timestamp with the revision number
00260                     revertts = long(data[7]) # must be long for py 2.2.x
00261                     try:
00262                         revertrev = int(entry[revertts][1][1])
00263                     except KeyError:
00264                         # never should trigger...
00265                         print "********* KeyError %s entry[%d][1][1] **********" % (pagename, revertts)
00266                         revertrev = 0
00267                     data[7] = '%08d' % revertrev
00268                 f.write('\t'.join(data)+'\n')
00269                 if file_from is not None:
00270                     file_to = opj(pagedir, 'revisions', revstr)
00271                     copy_file(file_from, file_to)
00272             f.close()
00273 
00274             curr_file = opj(pagedir, 'current')
00275             f = open(curr_file, 'w')
00276             f.write(revstr)
00277             f.close()
00278 
00279         att_from = opj(dir_from, 'pages', pagename, 'attachments')
00280         if os.path.exists(att_from):
00281             att_to = opj(pagedir, 'attachments')
00282             copy_dir(att_from, att_to)
00283 
00284 
00285 def generate_editlog(dir_from, dir_to):
00286     editlog = {}
00287     for pagename in info2:
00288         entry = info2.get(pagename, {})
00289         for ts in entry:
00290             file_from, data = entry[ts]
00291             editlog[ts] = data
00292 
00293     tslist = editlog.keys()
00294     tslist.sort()
00295 
00296     editlog_file = opj(dir_to, 'edit-log')
00297     f = open(editlog_file, 'w')
00298     for ts in tslist:
00299         data = editlog[ts]
00300         f.write('\t'.join(data)+'\n')
00301     f.close()
00302 
00303 
00304 origdir = 'data.pre-mig5'
00305 
00306 # Backup original dir and create new empty dir
00307 try:
00308     os.rename('data', origdir)
00309     os.mkdir('data')
00310 except OSError:
00311     print "You need to be in the directory where your copy of the 'data' directory is located."
00312     sys.exit(1)
00313 
00314 gather_editlog(origdir, opj(origdir, 'edit-log'))
00315 gather_pagedirs(opj(origdir, 'pages'))
00316 
00317 remove_trash(opj(origdir, 'pages'))
00318 
00319 generate_pages(origdir, 'data')
00320 generate_editlog(origdir, 'data')
00321 
00322 
00323 copy_dir(opj(origdir, 'plugin'), opj('data', 'plugin'))
00324 
00325 copy_dir(opj(origdir, 'user'), opj('data', 'user'))
00326 
00327 copy_file(opj(origdir, 'event-log'), opj('data', 'event-log'))
00328 
00329 copy_file(opj(origdir, 'intermap.txt'), opj('data', 'intermap.txt'))
00330 
00331