""" A class to keep track of history links stored in a set of files. Copyright (C) 2009 Darrell Karbott This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ from binaryrep import read_link, str_sha class LinkMap(dict): """ A history link hash addressable index of the history links in a set of block files. """ def __init__(self): dict.__init__(self) self.files = [] def read(self, file_list, keep_data=False): """ Read the index from a collection of block files. """ counts = [0 for dummy in range(0, len(file_list))] age = 0 # Hmmmm for index, name in enumerate(file_list): in_stream = open(name, 'rb') raised = True try: latest_age, count = self.read_from_stream(in_stream, index, keep_data) age = max(age, latest_age) counts[index] = count raised = False finally: if raised or keep_data: in_stream.close() else: self.files.append(in_stream) return age, tuple(counts) def read_from_stream(self, in_stream, index, keep_data=False): """ Read links from a stream. """ age = 0 count = 0 while True: link = read_link(in_stream, keep_data, in_stream.tell(), index) if link is None: break age = max(age, link[1]) prev = list(self.get(link[0], [])) link = list(link) # REDFLAG: ??? tuple -> list -> tuple prev.append(tuple(link)) self[link[0]] = tuple(prev) count += 1 return age, count # SLOW, get rid of list copy? # fixups is a old_index -> new index map # Omit from fixups == delete def _update_block_ordinals(self, fixups): """ INTERNAL: Implementation helper for update_blocks(). """ for sha_hash in self.keys(): prev = self.get(sha_hash) updated = [] for link in prev: assert link[0] == sha_hash if not link[5] in fixups: continue # Dropped block link = list(link) link[5] = fixups[link[5]] updated.append(tuple(link)) if len(updated) > 0: self[sha_hash] = tuple(updated) else: del self[sha_hash] # Fixes ordinals in referenced links # Drops omited blocks # Closes and re-opens all file streams. # Loads links from the streams in new_indices. def update_blocks(self, fixups, file_list, new_indices, keep_data=False): """ Update the index to track addition, deletion and reordering of the underlying block files. """ assert len(self.files) == 0 # must be closed. self._update_block_ordinals(fixups) self.files = [] age = 0 raised = True try: for index, name in enumerate(file_list): self.files.append(open(name, 'rb')) if not index in new_indices: continue # Need to read links out of the new file. latest_age, dummy = self.read_from_stream(self.files[index], index, keep_data) age = max(age, latest_age) raised = False return age finally: if raised: self.close() def close(self): """ Close the index. """ for in_file in self.files: in_file.close() self.files = [] def get_link(self, link_sha, need_data=False): """ Get a history link by its sha1 hash. """ links = self.get(link_sha, None) if links is None: raise IOError("Unresolved link: " + str_sha(link_sha)) assert len(links) > 0 # REDFLAG: Fully think through. # The same link can exist in multiple files. link = links[0] if (not need_data) or (not link[3] is None): return link index = link[5] self.files[index].seek(link[4]) ret = read_link(self.files[index], True) if ret is None: raise IOError("Couldn't read blob from disk.") assert ret[0] == link[0] assert ret[1] == link[1] assert ret[2] == link[2] assert not ret[3] is None assert ret[0] == link_sha return ret def raw_block_read(link_map, ordinal): """ Read a single block file. """ table = {} in_stream = link_map.files[ordinal] in_stream.seek(0) while True: start_pos = in_stream.tell() link = read_link(in_stream, False, start_pos, ordinal) # read_link() never returns None except for eof, right? # Otherwise we'd only do a partial read... if link is None: break entry = table.get(link[0], []) entry.append(link) table[link[0]] = entry return table def links_by_block(link_map): """ INTERNAL: Implementation helper function for verify_link_map(). """ tables = [{} for dummy in range(0, len(link_map.files))] for links in link_map.values(): assert len(links) > 0 for link in links: ordinal = link[5] assert ordinal >= 0 and ordinal < len(link_map.files) entry = tables[ordinal].get(link[0], []) entry.append(link) tables[ordinal][link[0]] = entry return tables def verify_link_map(link_map): """ Debugging function to verify the integrity of a LinkMap instance. """ assert len(link_map.files) > 0 count = 0 by_block = links_by_block(link_map) for ordinal in range(0, len(link_map.files)): raw_shas = raw_block_read(link_map, ordinal) # Hashes read from the raw file are the same as # the ones that the LinkMap thinks should be in the file. assert frozenset(raw_shas.keys()) == frozenset(by_block[ordinal].keys()) # Now check values. for link_sha in raw_shas: assert (frozenset(raw_shas[link_sha]) == frozenset(by_block[ordinal][link_sha])) count += 1 return count