""" A delta encoder/decoder based on Mercurial's binary diff/patch code. ATTRIBUTION: Contains source fragements written by Matt Mackall. Copyright (C) 2009 Darrell Karbott This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ # For names in pillaged Mercurial code. # pylint: disable-msg=C0103, W0141 import zlib from mercurial import mdiff from binaryrep import NULL_SHA ############################################################ # ATTRIBUTION: Pillaged from Mercurial revlog.py by Matt Mackall # Then hacked, so bugs are mine. _compress = zlib.compress _decompress = zlib.decompress def compress(text): """ generate a possibly-compressed representation of text """ if not text: return ("", text) l = len(text) bin = None if l < 44: # Is this Mercurial specific or a zlib overhead thing? pass elif l > 1000000: # zlib makes an internal copy, thus doubling memory usage for # large files, so lets do this in pieces z = zlib.compressobj() p = [] pos = 0 while pos < l: pos2 = pos + 2**20 p.append(z.compress(text[pos:pos2])) pos = pos2 p.append(z.flush()) if sum(map(len, p)) < l: bin = "".join(p) else: bin = _compress(text) if bin is None or len(bin) > l: if text[0] == '\0': return ("", text) return ('u', text) return ("", bin) def decompress(bin): """ decompress the given input """ if not bin: return bin t = bin[0] if t == '\0': return bin if t == 'x': return _decompress(bin) if t == 'u': return bin[1:] raise Exception("unknown compression type %r" % t) # _ is a function defined in i18n.py to call i18n.gettext. #raise RevlogError(_("unknown compression type %r") % t) ############################################################ # REDFLAG: wants_stream ENOTIMPL, who closes stream? # Returns raw patch data if if it's not set # returns a readable stream if wants_stream is True, otherwise the raw data # def example_get_data_func(history_link, wants_stream=False): # pass class DeltaCoder: """ Wrapper around the delta compression/decompression implementation used by the Mercurial Revlog. See revlog.py, mdiff.py, mpatch.c, bdiff.c in Mercurial codebase. """ def __init__(self): self.get_data_func = lambda x:None self.tmp_file_mgr = None # Define an ABC? What would the runtime overhead be? # Subclass might need tmp_file_mgr or get_data_func. # pylint: disable-msg=R0201 def make_full_insert(self, new_file, out_file_name, disable_compression=False): """ Make a blob readable by apply_deltas containing the entire file. """ in_file = open(new_file, 'rb') raw_new = None try: raw_new = in_file.read() finally: in_file.close() if disable_compression: values = ('u', raw_new) else: values = compress(raw_new) out_file = open(out_file_name, 'wb') try: if values[0]: out_file.write(values[0]) out_file.write(values[1]) finally: out_file.close() return NULL_SHA # Writes a new delta blob into out_files # Returns parent sha1. # Can truncate history by returning NULL_SHA def make_delta(self, history_chain, old_file, new_file, out_file_name): """ Make a new binary change blob and write it into out_file_name. """ if len(history_chain) == 0: #print "DOING FULL INSERT" return self.make_full_insert(new_file, out_file_name) #print "MAKING DELTA" in_file = open(new_file, 'rb') raw_new = None try: raw_new = in_file.read() finally: in_file.close() parent = NULL_SHA in_old = open(old_file, 'rb') try: raw_old = in_old.read() values = compress(mdiff.textdiff(raw_old, raw_new)) parent = history_chain[0][0] out_file = open(out_file_name, 'wb') try: if values[0]: out_file.write(values[0]) out_file.write(values[1]) finally: out_file.close() finally: in_old.close() return parent # All text and patches kept in RAM. # Rebuilds the file by applying all the deltas in the history chain. def apply_deltas(self, history_chain, out_file_name): """ Rebuild a file from a series of patches and write it into out_file_name. """ assert len(history_chain) > 0 deltas = [] text = None index = 0 while index < len(history_chain): link = history_chain[index] if link[2] == NULL_SHA: text = link[3] if text is None: text = self.get_data_func(link[0]) break delta = link[3] if delta is None: delta = self.get_data_func(link[0]) assert not delta is None deltas.append(delta) index += 1 assert not text is None text = decompress(text) if len(deltas) == 0: raw = text else: for index in range(0, len(deltas)): deltas[index] = decompress(deltas[index]) deltas.reverse() # iterate in reverse? raw = mdiff.patches(text, deltas) text = None out_file = open(out_file_name, "wb") try: out_file.write(raw) finally: out_file.close()