Back to index

moin  1.9.0~rc2
_conv160.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - migration from 1.5.8 to 1.6.0 (creole link style)
00004 
00005     What it does:
00006 
00007     a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
00008 
00009                    pagename            quoted pagename
00010        -----------------------------------------------------
00011        old         MainPage/Sub_Page   MainPage(2f)Sub_Page
00012        new         MainPage/Sub Page   MainPage(2f)Sub(20)Page    or
00013        new         MainPage/Sub_Page   MainPage(2f)Sub_Page       (user has to decide by editing rename1.txt)
00014 
00015 
00016                    markup
00017        ----------------------------------------------------
00018        old         MoinMoin:MainPage/Sub_Page      ../Sub_Page2
00019        new         [[MoinMoin:MainPage/Sub Page]]  [[../Sub Page2]]
00020 
00021 
00022     b) decode url encoded chars in attachment names (and quote the whole fname):
00023 
00024                    markup
00025        ----------------------------------------------------
00026        old         attachment:file%20with%20blanks.txt
00027        new         [[attachment:file with blanks.txt]]
00028 
00029     c) users: move bookmarks from separate files into user profile
00030     d) users: generate new name[] for lists and name{} for dicts
00031 
00032     e) kill all */MoinEditorBackup pages (replaced by drafts functionality)
00033 
00034     @copyright: 2007 by Thomas Waldmann
00035     @license: GNU GPL, see COPYING for details.
00036 """
00037 
00038 import os.path
00039 import re
00040 import time
00041 import codecs, urllib, glob
00042 
00043 from MoinMoin import config, wikiutil
00044 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
00045 
00046 import mimetypes # this MUST be after wikiutil import!
00047 
00048 from _conv160_wiki import convert_wiki
00049 
00050 create_rev = True # create a <new> rev with the converted content of <new-1> rev?
00051 
00052 def markup_converter(request, pagename, text, renames):
00053     """ Convert the <text> content of page <pagename>, using <renames> dict
00054         to rename links correctly. Additionally, convert some changed markup.
00055     """
00056     if text.startswith('<?xml'):
00057         # would be done with xslt processor
00058         return text
00059 
00060     pis, body = wikiutil.get_processing_instructions(text)
00061     for pi, val in pis:
00062         if pi == 'format' and val != 'wiki':
00063             # not wiki page
00064             return text
00065 
00066     text = convert_wiki(request, pagename, text, renames)
00067     return text
00068 
00069 
00070 class EventLog:
00071     def __init__(self, request, fname):
00072         self.request = request
00073         self.fname = fname
00074         self.data = None
00075         self.renames = {}
00076 
00077     def read(self):
00078         """ read complete event-log from disk """
00079         data = []
00080         try:
00081             lineno = 0
00082             f = file(self.fname, 'r')
00083             for line in f:
00084                 lineno += 1
00085                 line = line.replace('\r', '').replace('\n', '')
00086                 if not line.strip(): # skip empty lines
00087                     continue
00088                 fields = line.split('\t')
00089                 try:
00090                     timestamp, action, kvpairs = fields[:3]
00091                     timestamp = int(timestamp)
00092                     kvdict = wikiutil.parseQueryString(kvpairs)
00093                     data.append((timestamp, action, kvdict))
00094                 except ValueError, err:
00095                     # corrupt event log line, log error and skip it
00096                     print "Error: invalid event log (%s) line %d, err: %s, SKIPPING THIS LINE!" % (self.fname, lineno, str(err))
00097             f.close()
00098         except IOError, err:
00099             # no event-log
00100             pass
00101         self.data = data
00102 
00103     def write(self, fname):
00104         """ write complete event-log to disk """
00105         if self.data:
00106             f = file(fname, 'w')
00107             for timestamp, action, kvdict in self.data:
00108                 pagename = kvdict.get('pagename')
00109                 if pagename and ('PAGE', pagename) in self.renames:
00110                     kvdict['pagename'] = self.renames[('PAGE', pagename)]
00111                 kvpairs = wikiutil.makeQueryString(kvdict)
00112                 fields = str(timestamp), action, kvpairs
00113                 line = '\t'.join(fields) + '\n'
00114                 f.write(line)
00115             f.close()
00116 
00117     def copy(self, destfname, renames):
00118         self.renames = renames
00119         self.read()
00120         self.write(destfname)
00121 
00122 
00123 class EditLog:
00124     def __init__(self, request, fname):
00125         self.request = request
00126         self.fname = fname
00127         self.data = None
00128         self.renames = {}
00129 
00130     def read(self):
00131         """ read complete edit-log from disk """
00132         data = {}
00133         try:
00134             lineno = 0
00135             f = file(self.fname, 'r')
00136             for line in f:
00137                 lineno += 1
00138                 line = line.replace('\r', '').replace('\n', '')
00139                 if not line.strip(): # skip empty lines
00140                     continue
00141                 fields = line.split('\t') + [''] * 9
00142                 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
00143                 try:
00144                     timestamp = int(timestamp)
00145                     rev = int(rev)
00146                 except ValueError, err:
00147                     print "Error: %r has a damaged timestamp or revision number in log line %d [%s] - skipping this entry" % (
00148                         self.fname, lineno, str(err))
00149                     continue # ignore this line, do not terminate - to find all those errors in one go
00150                 pagename = wikiutil.unquoteWikiname(pagename)
00151                 data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
00152             f.close()
00153         except IOError, err:
00154             # no edit-log
00155             pass
00156         self.data = data
00157 
00158     def write(self, fname, deleted=False):
00159         """ write complete edit-log to disk """
00160         if self.data:
00161             editlog = self.data.items()
00162             editlog.sort()
00163             f = file(fname, "w")
00164             max_rev = 0
00165             for key, fields in editlog:
00166                 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
00167                 if action.startswith('ATT'):
00168                     try:
00169                         fname = urllib.unquote(extra).decode('utf-8')
00170                     except UnicodeDecodeError:
00171                         fname = urllib.unquote(extra).decode('iso-8859-1')
00172                     if ('FILE', pagename, fname) in self.renames:
00173                         fname = self.renames[('FILE', pagename, fname)]
00174                     extra = urllib.quote(fname.encode('utf-8'))
00175                 if ('PAGE', pagename) in self.renames:
00176                     pagename = self.renames[('PAGE', pagename)]
00177                 timestamp = str(timestamp)
00178                 if rev != 99999999:
00179                     max_rev = max(rev, max_rev)
00180                 revstr = '%08d' % rev
00181                 pagename = wikiutil.quoteWikinameFS(pagename)
00182                 fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
00183                 log_str = '\t'.join(fields) + '\n'
00184                 f.write(log_str)
00185             if create_rev and not deleted:
00186                 timestamp = str(wikiutil.timestamp2version(time.time()))
00187                 revstr = '%08d' % (max_rev + 1)
00188                 action = 'SAVE'
00189                 ip = '127.0.0.1'
00190                 hostname = 'localhost'
00191                 userid = ''
00192                 extra = ''
00193                 comment = "converted to 1.6 markup"
00194                 fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
00195                 log_str = '\t'.join(fields) + '\n'
00196                 f.write(log_str)
00197             f.close()
00198 
00199     def copy(self, destfname, renames, deleted=False):
00200         self.renames = renames
00201         self.read()
00202         self.write(destfname, deleted)
00203 
00204 
00205 class PageRev:
00206     """ a single revision of a page """
00207     def __init__(self, request, pagename, rev_dir, rev):
00208         self.request = request
00209         self.pagename = pagename
00210         self.rev_dir = rev_dir
00211         self.rev = rev
00212 
00213     def read(self):
00214         fname = opj(self.rev_dir, '%08d' % self.rev)
00215         f = file(fname, "rb")
00216         data = f.read()
00217         f.close()
00218         data = data.decode(config.charset)
00219         return data
00220 
00221     def write(self, data, rev_dir, convert, rev=None):
00222         if rev is None:
00223             rev = self.rev
00224         if convert:
00225             data = markup_converter(self.request, self.pagename, data, self.renames)
00226         fname = opj(rev_dir, '%08d' % rev)
00227         data = data.encode(config.charset)
00228         f = file(fname, "wb")
00229         f.write(data)
00230         f.close()
00231 
00232     def copy(self, rev_dir, renames, convert=False, new_rev=None):
00233         self.renames = renames
00234         data = self.read()
00235         self.write(data, rev_dir, convert, new_rev)
00236 
00237 
00238 class Attachment:
00239     """ a single attachment """
00240     def __init__(self, request, attach_dir, attfile):
00241         self.request = request
00242         self.path = opj(attach_dir, attfile)
00243         self.name = attfile.decode('utf-8', 'replace')
00244 
00245     def copy(self, attach_dir):
00246         """ copy attachment file from orig path to new destination """
00247         attfile = self.name.encode('utf-8')
00248         dest = opj(attach_dir, attfile)
00249         copy_file(self.path, dest)
00250 
00251 
00252 class Page:
00253     """ represents a page with all related data """
00254     def __init__(self, request, pages_dir, qpagename):
00255         self.request = request
00256         self.name = wikiutil.unquoteWikiname(qpagename)
00257         self.name_old = self.name # renaming: still original name when self.name has the new name
00258         self.page_dir = opj(pages_dir, qpagename)
00259         self.current = None # int current
00260         self.editlog = None # dict (see read_editlog)
00261         self.revlist = None # list of ints (page text revisions)
00262         self.revisions = None # dict int: pagerev obj
00263         self.attachments = None # dict of unicode fname: full path
00264         self.renames = {} # info for renaming pages/attachments
00265 
00266     def read(self):
00267         """ read a page, including revisions, log, attachments from disk """
00268         page_dir = self.page_dir
00269         # read current file
00270         current_fname = opj(page_dir, 'current')
00271         if os.path.exists(current_fname):
00272             current_file = file(current_fname, "r")
00273             current_rev = current_file.read()
00274             current_file.close()
00275             try:
00276                 self.current = int(current_rev)
00277             except ValueError:
00278                 print "Error: invalid current file %s, SKIPPING THIS PAGE!" % current_fname
00279                 return
00280         # read edit-log
00281         editlog_fname = opj(page_dir, 'edit-log')
00282         if os.path.exists(editlog_fname):
00283             self.editlog = EditLog(self.request, editlog_fname)
00284         # read page revisions
00285         rev_dir = opj(page_dir, 'revisions')
00286         if os.path.exists(rev_dir):
00287             revlist = listdir(rev_dir)
00288             revlist = [int(rev) for rev in revlist]
00289             revlist.sort()
00290             self.revlist = revlist
00291             self.revisions = {}
00292             for rev in revlist:
00293                 self.revisions[rev] = PageRev(self.request, self.name_old, rev_dir, rev)
00294         # set deleted status
00295         self.is_deleted = not self.revisions or self.current not in self.revisions
00296         # read attachment filenames
00297         attach_dir = opj(page_dir, 'attachments')
00298         if os.path.exists(attach_dir):
00299             self.attachments = {}
00300             attlist = listdir(attach_dir)
00301             for attfile in attlist:
00302                 a = Attachment(self.request, attach_dir, attfile)
00303                 self.attachments[a.name] = a
00304 
00305     def write(self, pages_dir):
00306         """ write a page, including revisions, log, attachments to disk """
00307         if ('PAGE', self.name) in self.renames:
00308             name_new = self.renames[('PAGE', self.name)]
00309             if name_new != self.name:
00310                 print "Renaming page %r -> %r" % (self.name, name_new)
00311                 self.name_old = self.name
00312                 self.name = name_new
00313         qpagename = wikiutil.quoteWikinameFS(self.name)
00314         page_dir = opj(pages_dir, qpagename)
00315         os.makedirs(page_dir)
00316         # write current file
00317         current = self.current
00318         if current is not None:
00319             if create_rev and not self.is_deleted:
00320                 current += 1
00321             current_fname = opj(page_dir, 'current')
00322             current_file = file(current_fname, "w")
00323             current_str = '%08d\n' % current
00324             current_file.write(current_str)
00325             current_file.close()
00326         # copy edit-log
00327         if self.editlog is not None:
00328             editlog_fname = opj(page_dir, 'edit-log')
00329             self.editlog.copy(editlog_fname, self.renames, deleted=self.is_deleted)
00330         # copy page revisions
00331         if self.revisions is not None:
00332             rev_dir = opj(page_dir, 'revisions')
00333             os.makedirs(rev_dir)
00334             for rev in self.revlist:
00335                 if create_rev:
00336                     self.revisions[rev].copy(rev_dir, self.renames)
00337                 else:
00338                     if int(rev) == self.current:
00339                         self.revisions[rev].copy(rev_dir, self.renames, convert=True)
00340                     else:
00341                         self.revisions[rev].copy(rev_dir, self.renames)
00342             if create_rev and not self.is_deleted:
00343                 self.revisions[rev].copy(rev_dir, self.renames, convert=True, new_rev=rev+1)
00344 
00345         # copy attachments
00346         if self.attachments is not None:
00347             attach_dir = opj(page_dir, 'attachments')
00348             os.makedirs(attach_dir)
00349             for fn, att in self.attachments.items():
00350                 # we have to check for renames here because we need the (old) pagename, too:
00351                 if ('FILE', self.name_old, fn) in self.renames:
00352                     fn_new = self.renames[('FILE', self.name_old, fn)]
00353                     if fn_new != fn:
00354                         print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
00355                         att.name = fn_new
00356                 att.copy(attach_dir)
00357 
00358     def copy(self, pages_dir, renames):
00359         self.renames = renames
00360         self.read()
00361         self.write(pages_dir)
00362 
00363 
00364 class User:
00365     """ represents a user with all related data """
00366     def __init__(self, request, users_dir, uid):
00367         self.request = request
00368         self.uid = uid
00369         self.users_dir = users_dir
00370         self.profile = None
00371         self.bookmarks = None
00372 
00373     def read(self):
00374         """ read profile and bookmarks data from disk """
00375         self.profile = {}
00376         fname = opj(self.users_dir, self.uid)
00377         # read user profile
00378         f = codecs.open(fname, 'r', config.charset)
00379         for line in f:
00380             line = line.replace(u'\r', '').replace(u'\n', '')
00381             if not line.strip() or line.startswith(u'#'): # skip empty or comment lines
00382                 continue
00383             try:
00384                 key, value = line.split(u'=', 1)
00385             except Exception, err:
00386                 print "Error: User reader can not parse line %r from profile %r (%s)" % (line, fname, str(err))
00387                 continue
00388             self.profile[key] = value
00389         f.close()
00390         # read bookmarks
00391         self.bookmarks = {}
00392         fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
00393         for fname in glob.glob(fname_pattern):
00394             f = file(fname, "r")
00395             bookmark = f.read()
00396             f.close()
00397             wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
00398             self.bookmarks[wiki] = int(bookmark)
00399         # don't care about trail
00400 
00401     def write(self, users_dir):
00402         """ write profile and bookmarks data to disk """
00403         fname = opj(users_dir, self.uid)
00404         f = codecs.open(fname, 'w', config.charset)
00405         for key, value in self.profile.items():
00406             if key in (u'subscribed_pages', u'quicklinks'):
00407                 pages = value.split(u'\t')
00408                 for i in range(len(pages)):
00409                     pagename = pages[i]
00410                     try:
00411                         interwiki, pagename = pagename.split(u':', 1)
00412                     except:
00413                         interwiki, pagename = u'Self', pagename
00414                     if interwiki == u'Self' or interwiki == self.request.cfg.interwikiname:
00415                         if ('PAGE', pagename) in self.renames:
00416                             pagename = self.renames[('PAGE', pagename)]
00417                             pages[i] = u'%s:%s' % (interwiki, pagename)
00418                 key += '[]' # we have lists here
00419                 value = u'\t'.join(pages)
00420                 f.write(u"%s=%s\n" % (key, value))
00421             else:
00422                 f.write(u"%s=%s\n" % (key, value))
00423         bookmark_entries = [u'%s:%s' % item for item in self.bookmarks.items()]
00424         key = u"bookmarks{}"
00425         value = u'\t'.join(bookmark_entries)
00426         f.write(u"%s=%s\n" % (key, value))
00427         f.close()
00428         # don't care about trail
00429 
00430     def copy(self, users_dir, renames):
00431         self.renames = renames
00432         self.read()
00433         self.write(users_dir)
00434 
00435 
00436 class DataConverter(object):
00437     def __init__(self, request, src_data_dir, dest_data_dir):
00438         self.request = request
00439         self.sdata = src_data_dir
00440         self.ddata = dest_data_dir
00441         self.pages = {}
00442         self.users = {}
00443         self.complete = {}
00444         self.renames = {}
00445         self.complete_fname = opj(self.sdata, 'complete.txt')
00446         self.rename_fname1 = opj(self.sdata, 'rename1.txt')
00447         self.rename_fname2 = opj(self.sdata, 'rename2.txt')
00448 
00449     def pass1(self):
00450         """ First create the rename list - the user has to review/edit it as
00451             we can't decide about page/attachment names automatically.
00452         """
00453         self.read_src()
00454         # pages
00455         for pn, p in self.pages.items():
00456             p.read()
00457             if not p.revisions:
00458                 continue # we don't care for pages with no revisions (trash)
00459             if pn.endswith('/MoinEditorBackup'):
00460                 continue # we don't care for old editor backups
00461             self.complete[('PAGE', pn)] = None
00462             if "_" in pn:
00463                 # log all pagenames with underscores
00464                 self.renames[('PAGE', pn)] = None
00465             if p.attachments is not None:
00466                 for fn in p.attachments:
00467                     try:
00468                         fn_str = fn.encode('ascii')
00469                         log = False # pure ascii filenames are no problem
00470                     except UnicodeEncodeError:
00471                         log = True # this file maybe has a strange representation in wiki markup
00472                     else:
00473                         if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
00474                             log = True
00475                     self.complete[('FILE', pn, fn)] = None
00476                     if log:
00477                         # log all strange attachment filenames
00478                         fn_str = fn.encode('utf-8')
00479                         self.renames[('FILE', pn, fn)] = None
00480         self.save_list(self.complete_fname, self.complete)
00481         self.save_list(self.rename_fname1, self.renames)
00482 
00483     LIST_FIELDSEP = u'|' # in case | makes trouble, one can use \t tab char
00484 
00485     def save_list(self, fname, what):
00486         what_sorted = what.keys()
00487         # make sure we have 3-tuples:
00488         what_sorted = [(k + (None, ))[:3] for k in what_sorted]
00489         # we only have python 2.3, thus no cmp keyword for the sort() call,
00490         # thus we need to do it the more complicated way:
00491         what_sorted = [(pn, fn, rtype) for rtype, pn, fn in what_sorted] # shuffle
00492         what_sorted.sort() # sort
00493         what_sorted = [(rtype, pn, fn) for pn, fn, rtype in what_sorted] # shuffle
00494         f = codecs.open(fname, 'w', 'utf-8')
00495         for rtype, pn, fn in what_sorted:
00496             if rtype == 'PAGE':
00497                 line = (rtype, pn, pn)
00498             elif rtype == 'FILE':
00499                 line = (rtype, pn, fn, fn)
00500             line = self.LIST_FIELDSEP.join(line)
00501             f.write(line + u'\n')
00502         f.close()
00503 
00504     def load_list(self, fname, what):
00505         f = codecs.open(fname, 'r', 'utf-8')
00506         for line in f:
00507             line = line.rstrip()
00508             if not line:
00509                 continue
00510             t = line.split(self.LIST_FIELDSEP)
00511             rtype, p1, p2, p3 = (t + [None]*3)[:4]
00512             if rtype == u'PAGE':
00513                 what[(str(rtype), p1)] = p2
00514             elif rtype == u'FILE':
00515                 what[(str(rtype), p1, p2)] = p3
00516         f.close()
00517 
00518     def pass2(self):
00519         """ Second, read the (user edited) rename list and do the renamings everywhere. """
00520         self.read_src()
00521         #self.load_list(self.complete_fname, self.complete)
00522         self.load_list(self.rename_fname2, self.renames)
00523         self.write_dest()
00524 
00525     def read_src(self):
00526         # create Page objects in memory
00527         pages_dir = opj(self.sdata, 'pages')
00528         pagelist = listdir(pages_dir)
00529         for qpagename in pagelist:
00530             p = Page(self.request, pages_dir, qpagename)
00531             self.pages[p.name] = p
00532 
00533         # create User objects in memory
00534         users_dir = opj(self.sdata, 'user')
00535         user_re = re.compile(r'^\d+\.\d+(\.\d+)?$')
00536         userlist = listdir(users_dir)
00537         userlist = [f for f in userlist if user_re.match(f)]
00538         for userid in userlist:
00539             u = User(self.request, users_dir, userid)
00540             self.users[u.uid] = u
00541 
00542         # create log objects in memory
00543         self.editlog = EditLog(self.request, opj(self.sdata, 'edit-log'))
00544         self.eventlog = EventLog(self.request, opj(self.sdata, 'event-log'))
00545 
00546     def write_dest(self):
00547         self.init_dest()
00548         # copy pages
00549         pages_dir = opj(self.ddata, 'pages')
00550         for pn, page in self.pages.items():
00551             if pn.endswith('/MoinEditorBackup'):
00552                 continue # we don't care for old editor backups
00553             page.copy(pages_dir, self.renames)
00554 
00555         # copy users
00556         users_dir = opj(self.ddata, 'user')
00557         for user in self.users.values():
00558             user.copy(users_dir, self.renames)
00559 
00560         # copy logs
00561         self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
00562         self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
00563 
00564     def init_dest(self):
00565         try:
00566             os.makedirs(self.ddata)
00567         except:
00568             pass
00569         os.makedirs(opj(self.ddata, 'pages'))
00570         os.makedirs(opj(self.ddata, 'user'))
00571         copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
00572         copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))
00573 
00574