""" Classes to address files stored in a WORMBlockArchive by
    human readable name.

    ATTRIBUTION: Contains source fragements written by Matt Mackall.

    Copyright (C) 2009 Darrell Karbott

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public
    License as published by the Free Software Foundation; either
    version 2.0 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

    Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks
"""

import os
import shutil

from binaryrep import NULL_SHA, manifest_from_file, \
     manifest_to_file, get_file_sha, check_shas, str_sha

from archive import UpToDateException

def is_printable_ascii(value):
    """ Return True if all the characters in value are printable
        ASCII, False otherwise. """
    value = [ord(c) for c in value]
    # Hmmm... allow spaces
    return max(value) <= 0x7e and min(value) >= 0x20

#----------------------------------------------------------#

# Hmmmm... this feels horrifically overdesigned, but I need a way
# to decouple the data that you can insert into a manifest from
# the manifest implementation.
class IManifestEntry:
    """ Abstract base class for things that can be referenced
        from a FileManifest. """
    def __init__(self):
        pass

    def get_name(self):
        """ Returns the name to insert this entry under in the manifest. """
        #raise NotImplementedError()
        pass

    def make_file(self):
        """ Returns the full path to the data to insert.
            May create a temp file which it can clean up in release().
        """
        #raise NotImplementedError()
        pass

    # MUST call this.
    def release(self):
        """ Cleanup method called when the instance is no longer in use. """
        #raise NotImplementedError()
        pass

class FileManifest:
    """ An index which maps human readable names to files in an archive. """
    def __init__(self, name_map=None, history_sha=NULL_SHA):
        check_shas([history_sha, ])
        if name_map == None:
            name_map = {}

        # name ->  (file sha1, patch chain head sha1)
        self.name_map = name_map
        # Hmmmm... convenient, but it ties the manifest to an archive.
        self.stored_sha = history_sha

    @classmethod
    def from_archive(cls, archive, history_sha):
        """ Create a FileManifest from a file in the archive. """
        check_shas([history_sha, ])
        tmp_name = archive.blocks.tmps.make_temp_file()
        try:
            archive.get_file(history_sha, tmp_name)
            # Hmmmm... age... put back in manifest?
            name_map = manifest_from_file(tmp_name)
            return FileManifest(name_map, history_sha)
        finally:
            archive.blocks.tmps.remove_temp_file(tmp_name)

    # hmmmm... not to_archive, would expect that to be an instance member.
    @classmethod
    def write_manifest(cls, archive, name_map, history_sha):
        """ Helper, writes updated manifest to archive.
            Returns link.
        """
        check_shas([history_sha, ])
        # Add manifest
        tmp_file_name = archive.blocks.tmps.make_temp_file()
        try:
            manifest_to_file(tmp_file_name, name_map)
            return archive.write_new_delta(history_sha, tmp_file_name)
        finally:
            archive.blocks.tmps.remove_temp_file(tmp_file_name)

    def make_file_sha_map(self):
        """ INTERNAL: Make a file_sha -> (file_sha, patch_sha) map
            from name_map. """
        file_sha_map =  {}
        for name in self.name_map:
            pair = self.name_map[name]
            file_sha_map[pair[0]] = pair
        return file_sha_map

    # Doesn't change manifest or archive.
    def write_changes(self, archive, entry_infos, prev_manifest_sha=NULL_SHA):
        """ INTERNAL: Helper function for update().

            Writes the changes required to add the IManifestEntries
            in entries_infos to an archive.

            Raises UpToDateException if there are no changes.

            Return an (updated_name_map, manifest_sha) tuple. """

        check_shas([prev_manifest_sha, ])

        file_sha_map =  self.make_file_sha_map()
        new_name_map = {}
        updated = False

        for info in entry_infos:
            full_path = info.make_file()
            try:
                name = info.get_name()
                if not is_printable_ascii(name):
                    raise IOError("Non-ASCII name: %s" % repr(name))
                hash_info = self.name_map.get(name, None)
                file_sha = get_file_sha(full_path)
                if hash_info is None:
                    updated = True
                    if file_sha in file_sha_map:
                        # Renamed
                        new_name_map[name] = file_sha_map[file_sha]
                    else:
                        # REDFLAG: We lose history for files which are renamed
                        #          and modified.
                        # Created (or renamed and modified)
                        link = archive.write_new_delta(NULL_SHA, full_path)
                        new_name_map[name] = (file_sha, link[0])
                else:
                    if self.name_map[name][0] == file_sha:
                        # Exists in manifest and is unmodified.
                        new_name_map[name] = self.name_map[name]
                        continue

                    # Modified
                    updated = True
                    link = archive.write_new_delta(self.name_map[name][1],
                                                   full_path)
                    new_name_map[name] = (file_sha, link[0])

                # delete == ophaned history, NOP
            finally:
                info.release()

        if not updated:
            if (frozenset(new_name_map.keys()) ==
                frozenset(self.name_map.keys())):
                raise UpToDateException("The file manifest is up to date.")

        # Add updated manifest
        link = FileManifest.write_manifest(archive, new_name_map,
                                           prev_manifest_sha)

        return (new_name_map, link[0])

    # Only works if fully committed!
    def all_shas(self, archive):
        """ Return the SHA1 hashes of all history links required to store
            the files referenced by the manifest. """
        shas = [entry[1] for entry in self.name_map]
        shas.add(self.stored_sha)
        history_shas = set([])
        for value in shas:
            history_shas.union(set([link[0] for link in
                                    archive.blocks.get_history(value)]))
        return shas.union(history_shas)

    # Changes both the manifest and the archive.
    # other_head_shas is for other files in the archive not
    # handled by this manifest.
    def update(self, archive, entry_infos, other_head_shas=None,
               truncate_manifest_history=False):
        """ Update the manifest with the changes in entry infos and
            write the changes and the updated manifest into the archive. """
        if other_head_shas is None:
            other_head_shas = set([])

        check_shas(other_head_shas)

        archive.start_update()
        raised = True
        try:
            prev_sha = self.stored_sha
            if truncate_manifest_history:
                prev_sha = NULL_SHA

            new_names, root_sha = self.write_changes(archive,
                                                     entry_infos,
                                                     prev_sha)

            # History for all files except recently modified ones.
            old_shas = set([])

            new_shas = archive.uncommited_shas()

            for value in new_names.values():
                if value[1] in new_shas:
                    # Adding history for new values is handled by
                    # commit_update().
                    continue

                # We need to explictly add history for the files which
                # still exist in the manifest but didn't change.
                for link in (archive.blocks.get_history(value[1])):
                    old_shas.add(link[0])

            all_shas = archive.referenced_shas(old_shas.
                                               union(other_head_shas))

            archive.commit_update(all_shas)
            self.stored_sha = root_sha
            self.name_map = new_names
            raised = False
        finally:
            if raised:
                archive.abandon_update()


def verify_manifest(archive, manifest, brief=False):
    """ Debugging function to verify the integrity of a manifest. """
    failures = 0
    for name in manifest.name_map:
        tmp = archive.blocks.tmps.make_temp_file()
        file_sha, link_sha = manifest.name_map[name]
        if not brief:
            print "Verifying: %s  %s => %s)" % (name,
                                              str_sha(file_sha),
                                              str_sha(link_sha))
        archive.get_file(link_sha, tmp)
        history = archive.blocks.get_history(link_sha)
        if not brief:
            print "History: " + " ".join([str_sha(link[0])
                                          for link in history])

        retrieved_sha = get_file_sha(tmp)
        if retrieved_sha != file_sha:
            print "Expected: %s, but got %s." % (str_sha(file_sha),
                                                 str_sha(retrieved_sha))
            failures += 1
        else:
            if not brief:
                print "Ok. Read %i bytes." % os.path.getsize(tmp)

        archive.blocks.tmps.remove_temp_file(tmp)

    if failures > 0:
        print "%i entries failed to verify!" % failures
        assert False

def fix_backwards_slashes(name):
    """ Helper to fix backwards slashes in windows file names. """
    if os.sep != '\\' or name.find('\\') == -1:
        return name

    return '/'.join(name.split('\\'))

class PathEntry(IManifestEntry):
    """ IManifestEntry implementation for a path to a file on the
        local filesystem. """
    def __init__(self, full_path, name):
        IManifestEntry.__init__(self)
        self.full_path = full_path
        self.name = fix_backwards_slashes(name)

    def get_name(self):
        """ IManifestEntry implementation. """
        return self.name

    def make_file(self):
        """ IManifestEntry implementation. """
        return self.full_path


    # make_file(), release() are NOPs


# skips empty directories
# LATER: updates w/o sending all data?
# only send files which have changes since
# a local sha1 list file has changed, just send sha1s of others.
# LATER: add accept_regex?
def entries_from_dir(start_dir, recurse, ignore_regex=None, include_dirs=False):
    """ An iterator which yields FileManifestEntries for
        files in a directory. """
    stack = [start_dir]
    while len(stack) > 0:
        current_dir = stack.pop()
        names = os.listdir(current_dir)
        for name in names:
            if not ignore_regex is None and ignore_regex.match(name):
                continue
            full_path = os.path.join(current_dir, name)
            if os.path.isdir(full_path) and recurse:
                if include_dirs:
                    # Hack so that I can delete unreferenced dirs
                    # in manifest_to_dir
                    yield PathEntry(full_path, '')
                stack.append(full_path)
            if os.path.isfile(full_path):
                name = full_path[len(start_dir):]
                while len(name) > 0 and name.startswith(os.sep):
                    name = name[1:]
                if len(name) > 0:
                    yield PathEntry(full_path, name)

def find_dirs(name_map, target_dir):
    """ INTERNAL: Helper function used by manifest_to_dir(). """

    dirs = set([])
    for file_name in name_map:
        dir_name = os.path.dirname(os.path.join(target_dir, file_name))
        if not dir_name:
            continue # Hmmm
        if dir_name == os.sep:
            continue # Hmmm
        dirs.add(dir_name)

    return dirs

def read_local_dir(manifest, target_dir, dirs, ignore_regex):
    """ INTERNAL: Helper function used by manifest_to_dir(). """
    # Read local directory state.
    overwrite = set([])
    remove = {} # name -> path
    local_dirs = set([])
    extant = set([])
    for entry in entries_from_dir(target_dir, True, ignore_regex, True):
        name = entry.get_name()
        extant.add(name)
        full_path = entry.make_file()
        if name == '':
            # Because we told entries_from_dir to return directories.
            local_dirs.add(full_path)
            continue

        local_dirs.add(os.path.dirname(full_path))
        if name in manifest.name_map:
            overwrite.add(name)
        else: # skip directory entries
            remove[name] = entry.make_file()
        entry.release()

    # O(N*M) hmmm....
    # Remove non-leaf subdirectories.
    for stored_dir in dirs:
        for local_dir in local_dirs.copy():
            if stored_dir.startswith(local_dir):
                local_dirs.remove(local_dir)


    return (overwrite, remove, local_dirs, extant)

# Hmmm... wackamole code.
# REDFLAG: Other ways to make sleazy path references.
def validate_path(base_dir, full_path):
    """ Catch references to direcories above base_dir. """
    base_dir = os.path.abspath(base_dir)

    if type(full_path) is unicode:
        raise IOError("Unicode path name: %s" % repr(full_path))
    if not is_printable_ascii(full_path):
        raise IOError("Non-ASCII path name: %s" % repr(full_path))

    full_path = os.path.abspath(full_path)

    if not (len(full_path) > len(base_dir) and
            full_path.startswith(base_dir)):
        raise IOError("Hinky path in manifest: %s" % full_path)

# No error handling or cleanup.
# Doubt this will work on Windows, must handle backwards path sep.
def manifest_to_dir(archive, manifest, target_dir, ignore_regex=None,
                    dry_run=False):

    """ Update files in a local directory by extracting files in a manifest.

        WARNING. NOT WELL TESTED. POTENTIALLY DANGEROUS.
        PROBABLY BROKEN ON WINDOWS. """

    dirs = find_dirs(manifest.name_map, target_dir)

    overwrite, remove, local_dirs, extant = \
               read_local_dir(manifest, target_dir, dirs, ignore_regex)

    remove_dirs = local_dirs - dirs
    create = set(manifest.name_map.keys()) - extant
    if dry_run:
        return (create, overwrite, set(remove.keys()), remove_dirs)

    # Remove files
    for victim in remove.values():
        if os.path.exists(victim):
            validate_path(target_dir, victim)
            os.remove(victim)

    # Remove directories
    for victim in (remove_dirs):
        if os.path.exists(victim):
             # REDFLAG: I saw this fail silently once
            validate_path(target_dir, victim)
            shutil.rmtree(victim)
            assert not os.path.exists(victim)

    # Make directories that exist in manifest, but not locally.
    for dir_name in dirs:
        if not os.path.exists(dir_name):
            validate_path(target_dir, dir_name)
            os.makedirs(dir_name)

    # Copy files out of the archive, onto the local file system.
    for file_name in manifest.name_map:
        validate_path(target_dir, os.path.join(target_dir, file_name))
        archive.get_file(manifest.name_map[file_name][1],
                         os.path.join(target_dir, file_name))

    return (create, overwrite, set(remove.keys()), remove_dirs)

class RawDataTupleEntry(IManifestEntry):
    """ IManifestEntry implementation for a path to a file on the
        local filesystem. """
    def __init__(self, tmps, raw_tuple):
        IManifestEntry.__init__(self)
        self.tmps = tmps
        self.raw_tuple = raw_tuple
        self.full_path = None
    def get_name(self):
        """ IManifestEntry implementation. """
        return self.raw_tuple[0]

    def make_file(self):
        """ IManifestEntry implementation. """
        assert self.full_path is None
        self.full_path = self.tmps.make_temp_file()
        out_file = open(self.full_path, 'wb')
        try:
            out_file.write(self.raw_tuple[1])
        finally:
            out_file.close()

        return self.full_path

    # MUST call this.
    def release(self):
        """ IManifestEntry implementation. """
        if not self.full_path is None:
            self.tmps.remove_temp_file(self.full_path)
            self.full_path = None

        # REDFLAG: Does this really help garbage collection or just CC?
        self.raw_tuple = None
        self.tmps = None

def entries_from_seq(tmps, sequence):
    """ An iterator which yields FileManifestEntries from a sequence of
        (name, raw_data) tuples.

        REQUIRES: sequence not modified while iterating.
    """
    for value in sequence:
        yield RawDataTupleEntry(tmps, value)