Back to index

moin  1.9.0~rc2
bdiff.py
Go to the documentation of this file.
00001 """
00002     MoinMoin - Binary patching and diffing
00003 
00004     @copyright: 2005 Matt Mackall <mpm@selenic.com>,
00005                 2006 MoinMoin:AlexanderSchremmer
00006 
00007     Algorithm taken from mercurial's mdiff.py
00008 
00009     @license: GNU GPL, see COPYING for details.
00010 """
00011 
00012 import zlib, difflib, struct
00013 
00014 BDIFF_PATT = ">lll"
00015 BDIFF_PATT_SIZE = struct.calcsize(BDIFF_PATT)
00016 
00017 def compress(text):
00018     return zlib.compress(text) # here we could tune the compression level
00019 
00020 def decompress(bin):
00021     return zlib.decompress(bin)
00022 
00023 def diff(a, b):
00024     """ Generates a binary diff of the passed strings.
00025         Note that you can pass arrays of strings as well.
00026         This might give you better results for text files. """
00027     if not a:
00028         s = "".join(b)
00029         return s and (struct.pack(BDIFF_PATT, 0, 0, len(s)) + s)
00030 
00031     bin = []
00032     la = lb = 0
00033 
00034     p = [0]
00035     for i in a: p.append(p[-1] + len(i))
00036 
00037     for am, bm, size in difflib.SequenceMatcher(None, a, b).get_matching_blocks():
00038         s = "".join(b[lb:bm])
00039         if am > la or s:
00040             bin.append(struct.pack(BDIFF_PATT, p[la], p[am], len(s)) + s)
00041         la = am + size
00042         lb = bm + size
00043 
00044     return "".join(bin)
00045 
00046 def textdiff(a, b):
00047     """ A diff function optimised for text files. Works with binary files as well. """
00048     return diff(a.splitlines(1), b.splitlines(1))
00049 
00050 def patchtext(bin):
00051     """ Returns the new hunks that are contained in a binary diff."""
00052     pos = 0
00053     t = []
00054     while pos < len(bin):
00055         p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + BDIFF_PATT_SIZE])
00056         pos += BDIFF_PATT_SIZE
00057         t.append(bin[pos:pos + l])
00058         pos += l
00059     return "".join(t)
00060 
00061 def patch(a, bin):
00062     """ Patches the string a with the binary patch bin. """
00063     c = last = pos = 0
00064     r = []
00065 
00066     while pos < len(bin):
00067         p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + BDIFF_PATT_SIZE])
00068         pos += BDIFF_PATT_SIZE
00069         r.append(a[last:p1])
00070         r.append(bin[pos:pos + l])
00071         pos += l
00072         last = p2
00073         c += 1
00074     r.append(a[last:])
00075 
00076     return "".join(r)
00077 
00078 def test():
00079     a = ("foo\n" * 30)
00080     b = ("  fao" * 30)
00081 
00082     a = file(r"test.1").read()
00083     b = file(r"test.2").read()
00084     a = a.splitlines(1)
00085     b = b.splitlines(1)
00086 
00087     d = diff(a, b)
00088     z = compress(d)
00089     print repr(patchtext(d))
00090     print repr(d)
00091     print "".join(b) == patch("".join(a), d)
00092     print len(d), len(z)
00093