Back to index

moin  1.9.0~rc2
SyncPages.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - SyncPages action
00004 
00005     This action allows you to synchronise pages of two wikis.
00006 
00007     @copyright: 2006 MoinMoin:AlexanderSchremmer
00008     @license: GNU GPL, see COPYING for details.
00009 """
00010 
00011 import re
00012 import traceback
00013 import StringIO # not relevant for speed, so we do not need cStringIO
00014 
00015 
00016 from MoinMoin import wikiutil
00017 from MoinMoin.packages import unpackLine, packLine
00018 from MoinMoin.PageEditor import PageEditor, conflict_markers
00019 from MoinMoin.Page import Page
00020 from MoinMoin.wikisync import TagStore, UnsupportedWikiException, SyncPage, NotAllowedException
00021 from MoinMoin.wikisync import MoinLocalWiki, MoinRemoteWiki, UP, DOWN, BOTH, MIMETYPE_MOIN
00022 from MoinMoin.support.python_compatibility import set
00023 from MoinMoin.util.bdiff import decompress, patch, compress, textdiff
00024 from MoinMoin.util import diff3, rpc_aggregator
00025 
00026 
00027 debug = False
00028 
00029 
00030 # map sync directions
00031 directions_map = {"up": UP, "down": DOWN, "both": BOTH}
00032 
00033 
00034 class ActionStatus(Exception):
00035     pass
00036 
00037 
00038 class ActionClass(object):
00039     INFO, WARN, ERROR = zip(range(3), ("", "<!>", "/!\\")) # used for logging
00040 
00041     def __init__(self, pagename, request):
00042         self.request = request
00043         self.pagename = pagename
00044         self.page = PageEditor(request, pagename)
00045         self.status = []
00046         self.rollback = set()
00047 
00048     def log_status(self, level, message=u"", substitutions=(), raw_suffix=u""):
00049         """ Appends the message with a given importance level to the internal log. """
00050         if isinstance(message, str):
00051             message = message.decode("utf-8")
00052         if isinstance(raw_suffix, str):
00053             raw_suffix = raw_suffix.decode("utf-8")
00054         self.status.append((level, message, substitutions, raw_suffix))
00055 
00056     def register_rollback(self, func):
00057         self.rollback.add(func)
00058 
00059     def remove_rollback(self, func):
00060         self.rollback.remove(func)
00061 
00062     def call_rollback_funcs(self):
00063         _ = lambda x: x
00064 
00065         for func in self.rollback:
00066             try:
00067                 page_name = func()
00068                 self.log_status(self.INFO, _("Rolled back changes to the page %s."), (page_name, ))
00069             except Exception:
00070                 temp_file = StringIO.StringIO()
00071                 traceback.print_exc(file=temp_file)
00072                 self.log_status(self.ERROR, _("Exception while calling rollback function:"), raw_suffix=temp_file.getvalue())
00073 
00074     def generate_log_table(self):
00075         """ Transforms self.status into a user readable table. """
00076         table_line = u"|| %(smiley)s || %(message)s%(raw_suffix)s ||"
00077         table = []
00078 
00079         for line in self.status:
00080             level, message, substitutions, raw_suffix = line
00081             if message:
00082                 if substitutions:
00083                     macro_args = [message] + list(substitutions)
00084                     message = u"<<GetText2(|%s)>>" % (packLine(macro_args), )
00085                 else:
00086                     message = u"<<GetText(%s)>>" % (message, )
00087             else:
00088                 message = u""
00089             table.append(table_line % {"smiley": level[1],
00090                                        "message": message,
00091                                        "raw_suffix": raw_suffix.replace("\n", "<<BR>>")})
00092 
00093         return "\n".join(table)
00094 
00095     def parse_page(self):
00096         """ Parses the parameter page and returns the read arguments. """
00097         options = {
00098             "remotePrefix": "",
00099             "localPrefix": "",
00100             "remoteWiki": "",
00101             "pageMatch": None,
00102             "pageList": None,
00103             "groupList": None,
00104             "direction": "foo", # is defaulted below
00105             "user": None, # XXX should be refactored into a password agent or OpenID like solution
00106             "password": None,
00107         }
00108 
00109         options.update(self.request.dicts[self.pagename])
00110 
00111         # Convert page and group list strings to lists
00112         if options["pageList"] is not None:
00113             options["pageList"] = unpackLine(options["pageList"], ",")
00114         if options["groupList"] is not None:
00115             options["groupList"] = unpackLine(options["groupList"], ",")
00116 
00117         options["direction"] = directions_map.get(options["direction"].lower(), BOTH)
00118 
00119         return options
00120 
00121     def fix_params(self, params):
00122         """ Does some fixup on the parameters. """
00123         # Load the password
00124         if "password" in self.request.values:
00125             params["password"] = self.request.values["password"]
00126 
00127         # merge the pageList case into the pageMatch case
00128         if params["pageList"] is not None:
00129             params["pageMatch"] = u'|'.join([r'^%s$' % re.escape(name)
00130                                              for name in params["pageList"]])
00131 
00132         if params["pageMatch"] is not None:
00133             params["pageMatch"] = re.compile(params["pageMatch"], re.U)
00134 
00135         # we do not support matching or listing pages if there is a group of pages
00136         if params["groupList"]:
00137             params["pageMatch"] = None
00138             params["pageList"] = None
00139 
00140         return params
00141 
00142     def show_password_form(self):
00143         _ = self.request.getText
00144         d = {"message": _(r"Please enter your password of your account at the remote wiki below. <<BR>> /!\ You should trust both wikis because the password could be read by the particular administrators.", wiki=True),
00145              "passwordlabel": _("Password"),
00146              "submit": _("Login"),
00147              "cancel": _("Cancel"),
00148         }
00149         html_form = """
00150 %(message)s
00151 <form method="post">
00152 <div>
00153 <input type="hidden" name="action" value="SyncPages">
00154 <label for="iPassword" style="font-weight: bold;">%(passwordlabel)s:</label>
00155 <input type="password" name="password" id="iPassword" size="20">
00156 </div>
00157 <div style="margin-top:1em; margin-bottom:1em;">
00158 <div style="float:left">
00159 <input type="submit" value="%(submit)s">
00160 </div>
00161 <div style="margin-left: 10em; margin-right: 10em;">
00162 <input type="submit" value="%(cancel)s" name="cancel">
00163 </div>
00164 </div>
00165 </form>
00166 """ % d
00167         self.request.theme.add_msg(html_form, "dialog")
00168         self.page.send_page()
00169 
00170     def render(self):
00171         """ Render action
00172 
00173         This action returns a status message.
00174         """
00175         _ = self.request.getText
00176 
00177         params = self.fix_params(self.parse_page())
00178 
00179         try:
00180             if "cancel" in self.request.values:
00181                 raise ActionStatus(_("Operation was canceled."), "error")
00182 
00183             if params["direction"] == UP:
00184                 raise ActionStatus(_("The only supported directions are BOTH and DOWN."), "error")
00185 
00186             if not self.request.cfg.interwikiname:
00187                 raise ActionStatus(_("Please set an interwikiname in your wikiconfig (see HelpOnConfiguration) to be able to use this action.", wiki=True), "error")
00188 
00189             if not params["remoteWiki"]:
00190                 raise ActionStatus(_("Incorrect parameters. Please supply at least the ''remoteWiki'' parameter. Refer to HelpOnSynchronisation for help.", wiki=True), "error")
00191 
00192             local = MoinLocalWiki(self.request, params["localPrefix"], params["pageList"])
00193             try:
00194                 remote = MoinRemoteWiki(self.request, params["remoteWiki"], params["remotePrefix"], params["pageList"], params["user"], params["password"], verbose=debug)
00195             except (UnsupportedWikiException, NotAllowedException), (msg, ):
00196                 raise ActionStatus(msg, "error")
00197 
00198             if not remote.valid:
00199                 raise ActionStatus(_("The ''remoteWiki'' is unknown.", wiki=True), "error")
00200             # if only the username is supplied, we ask for the password
00201             if params["user"] and not params["password"]:
00202                 return self.show_password_form()
00203         except ActionStatus, e:
00204             self.request.theme.add_msg(*e.args)
00205         else:
00206             try:
00207                 try:
00208                     self.sync(params, local, remote)
00209                 except Exception, e:
00210                     temp_file = StringIO.StringIO()
00211                     traceback.print_exc(file=temp_file)
00212                     self.log_status(self.ERROR, _("A severe error occurred:"), raw_suffix=temp_file.getvalue())
00213                     raise
00214                 else:
00215                     self.request.theme.add_msg(u"%s" % (_("Synchronisation finished. Look below for the status messages."), ), "info")
00216             finally:
00217                 self.call_rollback_funcs()
00218                 # XXX aquire readlock on self.page
00219                 self.page.saveText(self.page.get_raw_body() + "\n\n" + self.generate_log_table(), 0)
00220                 # XXX release readlock on self.page
00221 
00222                 remote.delete_auth_token()
00223 
00224         return self.page.send_page()
00225 
00226     def sync(self, params, local, remote):
00227         """ This method does the synchronisation work.
00228             Currently, it handles nearly all cases.
00229             The major missing part is rename handling.
00230             There are a few other cases left that have to be implemented:
00231                 Wiki A    | Wiki B   | Remark
00232                 ----------+----------+------------------------------
00233                 exists    | non-     | Now the wiki knows that the page was renamed.
00234                 with tags | existing | There should be an RPC method that asks
00235                           |          | for the new name (which could be recorded
00236                           |          | on page rename). Then the page is
00237                           |          | renamed in Wiki A as well and the sync
00238                           |          | is done normally.
00239                           |          | Every wiki retains a dict that maps
00240                           |          | (IWID, oldname) => newname and that is
00241                           |          | updated on every rename. oldname refers
00242                           |          | to the pagename known by the old wiki (can be
00243                           |          | gathered from tags).
00244                 ----------+----------+-------------------------------
00245                 exists    | any case | Try a rename search first, then
00246                           |          | do a sync without considering tags
00247                 with tags | with non | to ensure data integrity.
00248                           | matching | Hmm, how do we detect this
00249                           | tags     | case if the unmatching tags are only
00250                           |          | on the remote side?
00251                 ----------+----------+-------------------------------
00252         """
00253         _ = lambda x: x # we will translate it later
00254 
00255         direction = params["direction"]
00256         if direction == BOTH:
00257             match_direction = direction
00258         else:
00259             match_direction = None
00260 
00261         local_full_iwid = packLine([local.get_iwid(), local.get_interwiki_name()])
00262         remote_full_iwid = remote.iwid_full
00263 
00264         self.log_status(self.INFO, _("Synchronisation started -"), raw_suffix=" <<DateTime(%s)>>" % self.page._get_local_timestamp())
00265 
00266         l_pages = local.get_pages()
00267         r_pages = remote.get_pages(exclude_non_writable=direction != DOWN)
00268 
00269         if params["groupList"]:
00270             pages_from_groupList = set(local.getGroupItems(params["groupList"]))
00271             r_pages = SyncPage.filter(r_pages, pages_from_groupList.__contains__)
00272             l_pages = SyncPage.filter(l_pages, pages_from_groupList.__contains__)
00273 
00274         m_pages = [elem.add_missing_pagename(local, remote) for elem in SyncPage.merge(l_pages, r_pages)]
00275 
00276         self.log_status(self.INFO, _("Got a list of %s local and %s remote pages. This results in %s pages to process."),
00277                         (str(len(l_pages)), str(len(r_pages)), str(len(m_pages))))
00278 
00279         if params["pageMatch"]:
00280             m_pages = SyncPage.filter(m_pages, params["pageMatch"].match)
00281             self.log_status(self.INFO, _("After filtering: %s pages"), (str(len(m_pages)), ))
00282 
00283         class handle_page(rpc_aggregator.RPCYielder):
00284             def run(yielder, sp):
00285                 # XXX add locking, acquire read-lock on sp
00286                 if debug:
00287                     self.log_status(ActionClass.INFO, raw_suffix="Processing %r" % sp)
00288 
00289                 local_pagename = sp.local_name
00290                 if not self.request.user.may.write(local_pagename):
00291                     self.log_status(ActionClass.WARN, _("Skipped page %s because of no write access to local page."), (local_pagename, ))
00292                     return
00293 
00294                 current_page = PageEditor(self.request, local_pagename) # YYY direct access
00295                 comment = u"Local Merge - %r" % (remote.get_interwiki_name() or remote.get_iwid())
00296 
00297                 tags = TagStore(current_page)
00298 
00299                 matching_tags = tags.fetch(iwid_full=remote.iwid_full, direction=match_direction)
00300                 matching_tags.sort()
00301                 if debug:
00302                     self.log_status(ActionClass.INFO, raw_suffix="Tags: %r <<BR>> All: %r" % (matching_tags, tags.tags))
00303 
00304                 # some default values for non matching tags
00305                 normalised_name = None
00306                 remote_rev = None
00307                 local_rev = sp.local_rev # merge against the newest version
00308                 old_contents = ""
00309 
00310                 if matching_tags:
00311                     newest_tag = matching_tags[-1]
00312 
00313                     local_change = newest_tag.current_rev != sp.local_rev
00314                     remote_change = newest_tag.remote_rev != sp.remote_rev
00315 
00316                     # handle some cases where we cannot continue for this page
00317                     if not remote_change and (direction == DOWN or not local_change):
00318                         return # no changes done, next page
00319                     if sp.local_deleted and sp.remote_deleted:
00320                         return
00321                     if sp.remote_deleted and not local_change:
00322                         msg = local.delete_page(sp.local_name, comment)
00323                         if not msg:
00324                             self.log_status(ActionClass.INFO, _("Deleted page %s locally."), (sp.name, ))
00325                         else:
00326                             self.log_status(ActionClass.ERROR, _("Error while deleting page %s locally:"), (sp.name, ), msg)
00327                         return
00328                     if sp.local_deleted and not remote_change:
00329                         if direction == DOWN:
00330                             return
00331                         yield remote.delete_page_pre(sp.remote_name, sp.remote_rev, local_full_iwid)
00332                         msg = remote.delete_page_post(yielder.fetch_result())
00333                         if not msg:
00334                             self.log_status(ActionClass.INFO, _("Deleted page %s remotely."), (sp.name, ))
00335                         else:
00336                             self.log_status(ActionClass.ERROR, _("Error while deleting page %s remotely:"), (sp.name, ), msg)
00337                         return
00338                     if sp.local_mime_type != MIMETYPE_MOIN and not (local_change ^ remote_change):
00339                         self.log_status(ActionClass.WARN, _("The item %s cannot be merged automatically but was changed in both wikis. Please delete it in one of both wikis and try again."), (sp.name, ))
00340                         return
00341                     if sp.local_mime_type != sp.remote_mime_type:
00342                         self.log_status(ActionClass.WARN, _("The item %s has different mime types in both wikis and cannot be merged. Please delete it in one of both wikis or unify the mime type, and try again."), (sp.name, ))
00343                         return
00344                     if newest_tag.normalised_name != sp.name:
00345                         self.log_status(ActionClass.WARN, _("The item %s was renamed locally. This is not implemented yet. Therefore the full synchronisation history is lost for this page."), (sp.name, )) # XXX implement renames
00346                     else:
00347                         normalised_name = newest_tag.normalised_name
00348                         local_rev = newest_tag.current_rev
00349                         remote_rev = newest_tag.remote_rev
00350                         old_contents = Page(self.request, local_pagename, rev=newest_tag.current_rev).get_raw_body_str() # YYY direct access
00351                 else:
00352                     if (sp.local_deleted and not sp.remote_rev) or (
00353                         sp.remote_deleted and not sp.local_rev):
00354                         return
00355 
00356                 self.log_status(ActionClass.INFO, _("Synchronising page %s with remote page %s ..."), (local_pagename, sp.remote_name))
00357 
00358                 if direction == DOWN:
00359                     remote_rev = None # always fetch the full page, ignore remote conflict check
00360                     patch_base_contents = ""
00361                 else:
00362                     patch_base_contents = old_contents
00363 
00364                 # retrieve remote contents diff
00365                 if remote_rev != sp.remote_rev:
00366                     if sp.remote_deleted: # ignore remote changes
00367                         current_remote_rev = sp.remote_rev
00368                         is_remote_conflict = False
00369                         diff = None
00370                         self.log_status(ActionClass.WARN, _("The page %s was deleted remotely but changed locally."), (sp.name, ))
00371                     else:
00372                         yield remote.get_diff_pre(sp.remote_name, remote_rev, None, normalised_name)
00373                         diff_result = remote.get_diff_post(yielder.fetch_result())
00374                         if diff_result is None:
00375                             self.log_status(ActionClass.ERROR, _("The page %s could not be synced. The remote page was renamed. This is not supported yet. You may want to delete one of the pages to get it synced."), (sp.remote_name, ))
00376                             return
00377                         is_remote_conflict = diff_result["conflict"]
00378                         assert diff_result["diffversion"] == 1
00379                         diff = diff_result["diff"]
00380                         current_remote_rev = diff_result["current"]
00381                 else:
00382                     current_remote_rev = remote_rev
00383                     if sp.local_mime_type == MIMETYPE_MOIN:
00384                         is_remote_conflict = wikiutil.containsConflictMarker(old_contents.decode("utf-8"))
00385                     else:
00386                         is_remote_conflict = NotImplemented
00387                     diff = None
00388 
00389                 # do not sync if the conflict is remote and local, or if it is local
00390                 # and the page has never been synchronised
00391                 if (sp.local_mime_type == MIMETYPE_MOIN and wikiutil.containsConflictMarker(current_page.get_raw_body()) # YYY direct access
00392                     and (remote_rev is None or is_remote_conflict)):
00393                     self.log_status(ActionClass.WARN, _("Skipped page %s because of a locally or remotely unresolved conflict."), (local_pagename, ))
00394                     return
00395 
00396                 if remote_rev is None and direction == BOTH:
00397                     self.log_status(ActionClass.INFO, _("This is the first synchronisation between the local and the remote wiki for the page %s."), (sp.name, ))
00398 
00399                 # calculate remote page contents from diff
00400                 if sp.remote_deleted:
00401                     remote_contents = ""
00402                 elif diff is None:
00403                     remote_contents = old_contents
00404                 else:
00405                     remote_contents = patch(patch_base_contents, decompress(diff))
00406 
00407                 if diff is None: # only a local change
00408                     if debug:
00409                         self.log_status(ActionClass.INFO, raw_suffix="Only local changes for %r" % sp.name)
00410                     merged_text_raw = current_page.get_raw_body_str()
00411                     if sp.local_mime_type == MIMETYPE_MOIN:
00412                         merged_text = merged_text_raw.decode("utf-8")
00413                 elif local_rev == sp.local_rev:
00414                     if debug:
00415                         self.log_status(ActionClass.INFO, raw_suffix="Only remote changes for %r" % sp.name)
00416                     merged_text_raw = remote_contents
00417                     if sp.local_mime_type == MIMETYPE_MOIN:
00418                         merged_text = merged_text_raw.decode("utf-8")
00419                 else:
00420                     # this is guaranteed by a check above
00421                     assert sp.local_mime_type == MIMETYPE_MOIN
00422                     remote_contents_unicode = remote_contents.decode("utf-8")
00423                     # here, the actual 3-way merge happens
00424                     merged_text = diff3.text_merge(old_contents.decode("utf-8"), remote_contents_unicode, current_page.get_raw_body(), 1, *conflict_markers) # YYY direct access
00425                     if debug:
00426                         self.log_status(ActionClass.INFO, raw_suffix="Merging %r, %r and %r into %r" % (old_contents.decode("utf-8"), remote_contents_unicode, current_page.get_raw_body(), merged_text))
00427                     merged_text_raw = merged_text.encode("utf-8")
00428 
00429                 # generate binary diff
00430                 diff = textdiff(remote_contents, merged_text_raw)
00431                 if debug:
00432                     self.log_status(ActionClass.INFO, raw_suffix="Diff against %r" % remote_contents)
00433 
00434                 # XXX upgrade to write lock
00435                 try:
00436                     local_change_done = True
00437                     current_page.saveText(merged_text, sp.local_rev or 0, comment=comment) # YYY direct access
00438                 except PageEditor.Unchanged:
00439                     local_change_done = False
00440                 except PageEditor.EditConflict:
00441                     local_change_done = False
00442                     assert False, "You stumbled on a problem with the current storage system - I cannot lock pages"
00443 
00444                 new_local_rev = current_page.get_real_rev() # YYY direct access
00445 
00446                 def rollback_local_change(): # YYY direct local access
00447                     comment = u"Wikisync rollback"
00448                     rev = new_local_rev - 1
00449                     revstr = '%08d' % rev
00450                     oldpg = Page(self.request, sp.local_name, rev=rev)
00451                     pg = PageEditor(self.request, sp.local_name)
00452                     if not oldpg.exists():
00453                         pg.deletePage(comment)
00454                     else:
00455                         try:
00456                             savemsg = pg.saveText(oldpg.get_raw_body(), 0, comment=comment, extra=revstr, action="SAVE/REVERT")
00457                         except PageEditor.Unchanged:
00458                             pass
00459                     return sp.local_name
00460 
00461                 if local_change_done:
00462                     self.register_rollback(rollback_local_change)
00463 
00464                 if direction == BOTH:
00465                     yield remote.merge_diff_pre(sp.remote_name, compress(diff), new_local_rev, current_remote_rev, current_remote_rev, local_full_iwid, sp.name)
00466                     try:
00467                         very_current_remote_rev = remote.merge_diff_post(yielder.fetch_result())
00468                     except NotAllowedException:
00469                         self.log_status(ActionClass.ERROR, _("The page %s could not be merged because you are not allowed to modify the page in the remote wiki."), (sp.name, ))
00470                         return
00471                 else:
00472                     very_current_remote_rev = current_remote_rev
00473 
00474 
00475                 if local_change_done:
00476                     self.remove_rollback(rollback_local_change)
00477 
00478                 # this is needed at least for direction both and cgi sync to standalone for immutable pages on both
00479                 # servers. It is not needed for the opposite direction
00480                 try:
00481                     tags.add(remote_wiki=remote_full_iwid, remote_rev=very_current_remote_rev, current_rev=new_local_rev, direction=direction, normalised_name=sp.name)
00482                 except:
00483                     self.log_status(ActionClass.ERROR, _("The page %s could not be merged because you are not allowed to modify the page in the remote wiki."), (sp.name, ))
00484                     return
00485 
00486                 if sp.local_mime_type != MIMETYPE_MOIN or not wikiutil.containsConflictMarker(merged_text):
00487                     self.log_status(ActionClass.INFO, _("Page %s successfully merged."), (sp.name, ))
00488                 elif is_remote_conflict:
00489                     self.log_status(ActionClass.WARN, _("Page %s contains conflicts that were introduced on the remote side."), (sp.name, ))
00490                 else:
00491                     self.log_status(ActionClass.WARN, _("Page %s merged with conflicts."), (sp.name, ))
00492 
00493                 # XXX release lock
00494 
00495         rpc_aggregator.scheduler(remote.create_multicall_object, handle_page, m_pages, 8, remote.prepare_multicall)
00496 
00497 
00498 def execute(pagename, request):
00499     ActionClass(pagename, request).render()
00500