""" Classes to address files stored in a WORMBlockArchive by human readable name. ATTRIBUTION: Contains source fragements written by Matt Mackall. Copyright (C) 2009 Darrell Karbott This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ import os import shutil from binaryrep import NULL_SHA, manifest_from_file, \ manifest_to_file, get_file_sha, check_shas, str_sha from archive import UpToDateException def is_printable_ascii(value): """ Return True if all the characters in value are printable ASCII, False otherwise. """ value = [ord(c) for c in value] # Hmmm... allow spaces return max(value) <= 0x7e and min(value) >= 0x20 #----------------------------------------------------------# # Hmmmm... this feels horrifically overdesigned, but I need a way # to decouple the data that you can insert into a manifest from # the manifest implementation. class IManifestEntry: """ Abstract base class for things that can be referenced from a FileManifest. """ def __init__(self): pass def get_name(self): """ Returns the name to insert this entry under in the manifest. """ #raise NotImplementedError() pass def make_file(self): """ Returns the full path to the data to insert. May create a temp file which it can clean up in release(). """ #raise NotImplementedError() pass # MUST call this. def release(self): """ Cleanup method called when the instance is no longer in use. """ #raise NotImplementedError() pass class FileManifest: """ An index which maps human readable names to files in an archive. """ def __init__(self, name_map=None, history_sha=NULL_SHA): check_shas([history_sha, ]) if name_map == None: name_map = {} # name -> (file sha1, patch chain head sha1) self.name_map = name_map # Hmmmm... convenient, but it ties the manifest to an archive. self.stored_sha = history_sha @classmethod def from_archive(cls, archive, history_sha): """ Create a FileManifest from a file in the archive. """ check_shas([history_sha, ]) tmp_name = archive.blocks.tmps.make_temp_file() try: archive.get_file(history_sha, tmp_name) # Hmmmm... age... put back in manifest? name_map = manifest_from_file(tmp_name) return FileManifest(name_map, history_sha) finally: archive.blocks.tmps.remove_temp_file(tmp_name) # hmmmm... not to_archive, would expect that to be an instance member. @classmethod def write_manifest(cls, archive, name_map, history_sha): """ Helper, writes updated manifest to archive. Returns link. """ check_shas([history_sha, ]) # Add manifest tmp_file_name = archive.blocks.tmps.make_temp_file() try: manifest_to_file(tmp_file_name, name_map) return archive.write_new_delta(history_sha, tmp_file_name) finally: archive.blocks.tmps.remove_temp_file(tmp_file_name) def make_file_sha_map(self): """ INTERNAL: Make a file_sha -> (file_sha, patch_sha) map from name_map. """ file_sha_map = {} for name in self.name_map: pair = self.name_map[name] file_sha_map[pair[0]] = pair return file_sha_map # Doesn't change manifest or archive. def write_changes(self, archive, entry_infos, prev_manifest_sha=NULL_SHA): """ INTERNAL: Helper function for update(). Writes the changes required to add the IManifestEntries in entries_infos to an archive. Raises UpToDateException if there are no changes. Return an (updated_name_map, manifest_sha) tuple. """ check_shas([prev_manifest_sha, ]) file_sha_map = self.make_file_sha_map() new_name_map = {} updated = False for info in entry_infos: full_path = info.make_file() try: name = info.get_name() if not is_printable_ascii(name): raise IOError("Non-ASCII name: %s" % repr(name)) hash_info = self.name_map.get(name, None) file_sha = get_file_sha(full_path) if hash_info is None: updated = True if file_sha in file_sha_map: # Renamed new_name_map[name] = file_sha_map[file_sha] else: # REDFLAG: We lose history for files which are renamed # and modified. # Created (or renamed and modified) link = archive.write_new_delta(NULL_SHA, full_path) new_name_map[name] = (file_sha, link[0]) else: if self.name_map[name][0] == file_sha: # Exists in manifest and is unmodified. new_name_map[name] = self.name_map[name] continue # Modified updated = True link = archive.write_new_delta(self.name_map[name][1], full_path) new_name_map[name] = (file_sha, link[0]) # delete == ophaned history, NOP finally: info.release() if not updated: if (frozenset(new_name_map.keys()) == frozenset(self.name_map.keys())): raise UpToDateException("The file manifest is up to date.") # Add updated manifest link = FileManifest.write_manifest(archive, new_name_map, prev_manifest_sha) return (new_name_map, link[0]) # Only works if fully committed! def all_shas(self, archive): """ Return the SHA1 hashes of all history links required to store the files referenced by the manifest. """ shas = [entry[1] for entry in self.name_map] shas.add(self.stored_sha) history_shas = set([]) for value in shas: history_shas.union(set([link[0] for link in archive.blocks.get_history(value)])) return shas.union(history_shas) # Changes both the manifest and the archive. # other_head_shas is for other files in the archive not # handled by this manifest. def update(self, archive, entry_infos, other_head_shas=None, truncate_manifest_history=False): """ Update the manifest with the changes in entry infos and write the changes and the updated manifest into the archive. """ if other_head_shas is None: other_head_shas = set([]) check_shas(other_head_shas) archive.start_update() raised = True try: prev_sha = self.stored_sha if truncate_manifest_history: prev_sha = NULL_SHA new_names, root_sha = self.write_changes(archive, entry_infos, prev_sha) # History for all files except recently modified ones. old_shas = set([]) new_shas = archive.uncommited_shas() for value in new_names.values(): if value[1] in new_shas: # Adding history for new values is handled by # commit_update(). continue # We need to explictly add history for the files which # still exist in the manifest but didn't change. for link in (archive.blocks.get_history(value[1])): old_shas.add(link[0]) all_shas = archive.referenced_shas(old_shas. union(other_head_shas)) archive.commit_update(all_shas) self.stored_sha = root_sha self.name_map = new_names raised = False finally: if raised: archive.abandon_update() def verify_manifest(archive, manifest, brief=False): """ Debugging function to verify the integrity of a manifest. """ failures = 0 for name in manifest.name_map: tmp = archive.blocks.tmps.make_temp_file() file_sha, link_sha = manifest.name_map[name] if not brief: print "Verifying: %s %s => %s)" % (name, str_sha(file_sha), str_sha(link_sha)) archive.get_file(link_sha, tmp) history = archive.blocks.get_history(link_sha) if not brief: print "History: " + " ".join([str_sha(link[0]) for link in history]) retrieved_sha = get_file_sha(tmp) if retrieved_sha != file_sha: print "Expected: %s, but got %s." % (str_sha(file_sha), str_sha(retrieved_sha)) failures += 1 else: if not brief: print "Ok. Read %i bytes." % os.path.getsize(tmp) archive.blocks.tmps.remove_temp_file(tmp) if failures > 0: print "%i entries failed to verify!" % failures assert False def fix_backwards_slashes(name): """ Helper to fix backwards slashes in windows file names. """ if os.sep != '\\' or name.find('\\') == -1: return name return '/'.join(name.split('\\')) class PathEntry(IManifestEntry): """ IManifestEntry implementation for a path to a file on the local filesystem. """ def __init__(self, full_path, name): IManifestEntry.__init__(self) self.full_path = full_path self.name = fix_backwards_slashes(name) def get_name(self): """ IManifestEntry implementation. """ return self.name def make_file(self): """ IManifestEntry implementation. """ return self.full_path # make_file(), release() are NOPs # skips empty directories # LATER: updates w/o sending all data? # only send files which have changes since # a local sha1 list file has changed, just send sha1s of others. # LATER: add accept_regex? def entries_from_dir(start_dir, recurse, ignore_regex=None, include_dirs=False): """ An iterator which yields FileManifestEntries for files in a directory. """ stack = [start_dir] while len(stack) > 0: current_dir = stack.pop() names = os.listdir(current_dir) for name in names: if not ignore_regex is None and ignore_regex.match(name): continue full_path = os.path.join(current_dir, name) if os.path.isdir(full_path) and recurse: if include_dirs: # Hack so that I can delete unreferenced dirs # in manifest_to_dir yield PathEntry(full_path, '') stack.append(full_path) if os.path.isfile(full_path): name = full_path[len(start_dir):] while len(name) > 0 and name.startswith(os.sep): name = name[1:] if len(name) > 0: yield PathEntry(full_path, name) def find_dirs(name_map, target_dir): """ INTERNAL: Helper function used by manifest_to_dir(). """ dirs = set([]) for file_name in name_map: dir_name = os.path.dirname(os.path.join(target_dir, file_name)) if not dir_name: continue # Hmmm if dir_name == os.sep: continue # Hmmm dirs.add(dir_name) return dirs def read_local_dir(manifest, target_dir, dirs, ignore_regex): """ INTERNAL: Helper function used by manifest_to_dir(). """ # Read local directory state. overwrite = set([]) remove = {} # name -> path local_dirs = set([]) extant = set([]) for entry in entries_from_dir(target_dir, True, ignore_regex, True): name = entry.get_name() extant.add(name) full_path = entry.make_file() if name == '': # Because we told entries_from_dir to return directories. local_dirs.add(full_path) continue local_dirs.add(os.path.dirname(full_path)) if name in manifest.name_map: overwrite.add(name) else: # skip directory entries remove[name] = entry.make_file() entry.release() # O(N*M) hmmm.... # Remove non-leaf subdirectories. for stored_dir in dirs: for local_dir in local_dirs.copy(): if stored_dir.startswith(local_dir): local_dirs.remove(local_dir) return (overwrite, remove, local_dirs, extant) # Hmmm... wackamole code. # REDFLAG: Other ways to make sleazy path references. def validate_path(base_dir, full_path): """ Catch references to direcories above base_dir. """ base_dir = os.path.abspath(base_dir) if type(full_path) is unicode: raise IOError("Unicode path name: %s" % repr(full_path)) if not is_printable_ascii(full_path): raise IOError("Non-ASCII path name: %s" % repr(full_path)) full_path = os.path.abspath(full_path) if not (len(full_path) > len(base_dir) and full_path.startswith(base_dir)): raise IOError("Hinky path in manifest: %s" % full_path) # No error handling or cleanup. # Doubt this will work on Windows, must handle backwards path sep. def manifest_to_dir(archive, manifest, target_dir, ignore_regex=None, dry_run=False): """ Update files in a local directory by extracting files in a manifest. WARNING. NOT WELL TESTED. POTENTIALLY DANGEROUS. PROBABLY BROKEN ON WINDOWS. """ dirs = find_dirs(manifest.name_map, target_dir) overwrite, remove, local_dirs, extant = \ read_local_dir(manifest, target_dir, dirs, ignore_regex) remove_dirs = local_dirs - dirs create = set(manifest.name_map.keys()) - extant if dry_run: return (create, overwrite, set(remove.keys()), remove_dirs) # Remove files for victim in remove.values(): if os.path.exists(victim): validate_path(target_dir, victim) os.remove(victim) # Remove directories for victim in (remove_dirs): if os.path.exists(victim): # REDFLAG: I saw this fail silently once validate_path(target_dir, victim) shutil.rmtree(victim) assert not os.path.exists(victim) # Make directories that exist in manifest, but not locally. for dir_name in dirs: if not os.path.exists(dir_name): validate_path(target_dir, dir_name) os.makedirs(dir_name) # Copy files out of the archive, onto the local file system. for file_name in manifest.name_map: validate_path(target_dir, os.path.join(target_dir, file_name)) archive.get_file(manifest.name_map[file_name][1], os.path.join(target_dir, file_name)) return (create, overwrite, set(remove.keys()), remove_dirs) class RawDataTupleEntry(IManifestEntry): """ IManifestEntry implementation for a path to a file on the local filesystem. """ def __init__(self, tmps, raw_tuple): IManifestEntry.__init__(self) self.tmps = tmps self.raw_tuple = raw_tuple self.full_path = None def get_name(self): """ IManifestEntry implementation. """ return self.raw_tuple[0] def make_file(self): """ IManifestEntry implementation. """ assert self.full_path is None self.full_path = self.tmps.make_temp_file() out_file = open(self.full_path, 'wb') try: out_file.write(self.raw_tuple[1]) finally: out_file.close() return self.full_path # MUST call this. def release(self): """ IManifestEntry implementation. """ if not self.full_path is None: self.tmps.remove_temp_file(self.full_path) self.full_path = None # REDFLAG: Does this really help garbage collection or just CC? self.raw_tuple = None self.tmps = None def entries_from_seq(tmps, sequence): """ An iterator which yields FileManifestEntries from a sequence of (name, raw_data) tuples. REQUIRES: sequence not modified while iterating. """ for value in sequence: yield RawDataTupleEntry(tmps, value)