infocalypse: infocalypse/graph.py

""" Infocalypse Freenet hg repo update graph.

    Copyright (C) 2009 Darrell Karbott

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public
    License as published by the Free Software Foundation; either
    version 2.0 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

    Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks
"""

# REDFLAG: Document how dealing with missing indices works...
# REDFLAG: Document how pruning works.
# REDFLAG: Remove unused crap from this file
# REDFLAG: push MAX_PATH_LEN into graph class -> max_cannonical_len
# REDFLAG: stash version map info in the graph?
# REDFLAG: DOCUMENT version sorting assumptions/requirements
import copy
import random

from binascii import hexlify
from mercurial import commands

# Index for an empty repo.
FIRST_INDEX = -1
NULL_REV = '0000000000000000000000000000000000000000'
PENDING_INSERT = 'pending'
PENDING_INSERT1 = 'pending1'

# Values greater than 4  won't work without fixing the implementation
# of canonical_paths().
MAX_PATH_LEN = 4

INSERT_NORMAL = 1 # Don't transform inserted data.
INSERT_PADDED = 2 # Add one trailing byte.
INSERT_SALTED_METADATA = 3 # Salt Freenet splitfile metadata.
INSERT_HUGE = 4 # Full re-insert with alternate metadata.

# The size of Freenet data blocks.
FREENET_BLOCK_LEN = 32 * 1024

# Hmmm... arbitrary.
MAX_REDUNDANT_LENGTH = 128 * 1024

# HACK: Approximate multi-level splitfile boundary
MAX_METADATA_HACK_LEN = 7 * 1024 * 1024

############################################################
# Mercurial helper functions.
def hex_version(repo, version = 'tip', offset=0):
    """ Returns the 40 digit hex changeset id for an changeset in repo. """
    #print "hex_version -- ", version
    ctx = repo.changectx(version)
    assert not ctx is None
    if offset != 0:
        ctx = repo.changectx(ctx.rev() + offset)
        assert not ctx is None
    return hexlify(ctx.node())

def has_version(repo, version):
    """ Returns True if repo already contains the changeset version,
        False otherwise. """
    try:
        # Is there a faster way?
        repo.changectx(version)
    except:
        # REDFLAG: Back to this. Hack for 1.2
        # Mercurial 1.2 can't find RepoError???
        return False
#     except mercurial.repo.RepoError:
#         return False
#     except mercurial.revlog.LookupError:
#         return False
    return True

def pull_bundle(repo, ui_, bundle_file):
    """ Pull an hg bundle file.

        bundle_file must be an absolute path.
    """

    # Not 
    commands.unbundle(ui_, repo, bundle_file, rev=[],
                      force=None, update=None)
    # Not required anymore, unbundle seems to work.
    # 
    # REDFLAG: djk20090319, is this still an issue?
    # IMPORTANT:
    # You must be in the repository root directory in order to pull
    # from the bundle.  This is not obvious from the Hg doc.
    #
    # See: http://marc.info/?l=mercurial&m=118362491521186&w=2
    #
    # MUST use --cwd
    # MUST use an absolute path for the bundle field
    #prev_cwd = os.getcwd()
    #os.chdir(repo.root)
    #try:
    #    commands.pull(ui_, repo, bundle_file, rev=[],
    #                  force=None, update=None)
    #finally:
    #    os.chdir(prev_cwd)

############################################################

def edges_containing(graph, index):
    """ INTERNAL: Returns a list of edges containing index in order of
        ascending 'canonicalness'.
    """
    def cmp_edge(edge_a, edge_b):
        """ INTERNAL: Comparison function. """
        # First, ascending final index. == Most recent.
        diff = edge_a[1] - edge_b[1]
        if diff == 0:
            # Then, descending  initial index. == Most redundant
            diff = edge_b[0] - edge_a[0]
            if diff == 0:
                # Finally, descending 'canonicalness'
                diff = edge_b[2] - edge_a[2]
        return diff

    edges = graph.contain(index)
    edges.sort(cmp_edge) # Best last so you can pop
    #print "--- dumping edges_containing ---"
    #print '\n'.join([str(edge) for edge in edges])
    #print "---"
    return edges

def tail(list_value):
    """ Returns the tail of a list. """
    return list_value[len(list_value) - 1]

def canonical_path_itr(graph, from_index, to_index, max_search_len):
    """ A generator which returns a sequence of canonical paths in
        descending order of 'canonicalness'. """

    returned = set([])
    min_search_len = -1
    while min_search_len <= max_search_len:
        visited = set([]) # Retraverse for each length! REDFLAG: Do better?
        steps = [edges_containing(graph, from_index), ]
        current_search_len = max_search_len
        while len(steps) > 0:
            while len(tail(steps)) > 0:
                #candidate = [tail(paths) for paths in steps]
                #print "candidate: ", candidate
                #print "end: ", tail(tail(steps))
                if tail(tail(steps))[1] >= to_index:
                    # The edge at the bottom of every list.
                    value = [tail(step) for step in steps]
                    #print "HIT:"
                    #print value

                    if min_search_len == -1:
                        min_search_len = len(steps)

                    current_search_len = max(len(steps), min_search_len)
                    tag = str(value)
                    if not tag in returned:
                        returned.add(tag)

                        # Shorter paths should already be in returned.
                        assert len(value) >= min_search_len
                        assert len(value) <= max_search_len
                        yield value
                    tail(steps).pop()
                elif len(steps) < current_search_len:
                    tag = str([tail(step) for step in steps])
                    if not tag in visited:
                        # Follow the path one more step.
                        visited.add(tag)
                        steps.append(edges_containing(graph,
                                                      tail(tail(steps))[1] + 1))
                    else:
                        tail(steps).pop()
                else:
                    # Abandon the path because it's too long.
                    tail(steps).pop()

            # Get rid of the empty list
            assert len(tail(steps)) == 0
            steps.pop()
        if min_search_len == -1:
            #print "No such path."
            return
        min_search_len += 1

    #print "exiting"
    # Done iterating.

def get_changes(repo, version_map, versions):
    """ INTERNAL: Helper function used by UpdateGraph.update()
        to determine which changes need to be added. """
    if versions == None:
        versions = [hexlify(head) for head in repo.heads()]
    else:
        versions = list(versions) # Hmmmm...
        # Normalize all versions to 40 digit hex strings.
        for index, version in enumerate(versions):
            versions[index] = hex_version(repo, version)

    if NULL_REV in versions:
        versions.remove(NULL_REV)

    new_heads = []
    for version in versions:
        if not version in version_map:
            new_heads.append(version)

    if len(new_heads) == 0:
        if len(versions) > 0:
            versions.sort()
            raise UpToDate("Already in repo: " + ' '.join([ver[:12] for
                                                           ver in versions]))
        else:
            raise UpToDate("Empty repository. Nothing to add.")

    if len(version_map) == 1:
        return ((NULL_REV,), new_heads)

    #print "VERSION MAP:"
    #print version_map
    # Determine base revs.
    base_revs = set([])
    traversed = set([])
    for head in new_heads:
        find_latest_bases(repo, head, version_map, traversed, base_revs)

    return (base_revs, new_heads)

############################################################

def block_cost(length):
    """ Return the number of Freenet blocks required to store
        data of length, length. """
    blocks = length/FREENET_BLOCK_LEN
    if (length % FREENET_BLOCK_LEN) != 0:
        blocks += 1
    return blocks

class UpdateGraphException(Exception):
    """ Base class for UpdateGraph exceptions. """
    def __init__(self, msg):
        Exception.__init__(self, msg)

class UpToDate(UpdateGraphException):
    """ Exception thrown to indicate that an update failed because
        the graph already contains the specified local changes. """
    def __init__(self, msg):
        UpdateGraphException.__init__(self, msg)

class UpdateGraph:
    """ A digraph representing an Infocalypse Freenet
        hg repository. """ # REDFLAG: digraph of what dude?

    def __init__(self):
        # Vertices in the update digraph.
        #
        # An indice is an encapsulation of the parameters that you
        # need to bundle a collection of changes.
        #
        # index_ordinal -> ((base_revs, ), (tip_revs, ))
        self.index_table = {FIRST_INDEX:((), (NULL_REV,))}

        # These are edges in the update digraph.
        # There can be multiple redundant edges.
        #
        # This is what is actually stored in Freenet.
        # Edges contain changesets for the indices from
        # start_index + 1 to end_index, but not for start_index.
        # (start_index, end_index) -> (length, chk@, chk@,  ...)
        self.edge_table = {}

        self.latest_index = -1

    def clone(self):
        """ Return a deep copy of the graph. """
        return copy.deepcopy(self) # REDFLAG: correct

    # Contains the end_index changesets but not the start index changesets.
    def add_edge(self, index_pair, length_chk_pair):
        """ Add a new edge to the graph. """
        assert len(index_pair) == 2
        assert len(length_chk_pair) == 2
        assert index_pair[0] <= index_pair[1]

        edge_info = self.edge_table.get(index_pair)
        if edge_info is None:
            edge_info = tuple(length_chk_pair)
        else:
            if length_chk_pair[0] != edge_info[0]:
                raise ValueError("Redundant edge doesn't have same length.")
            edge_info = list(edge_info)
            edge_info.append(length_chk_pair[1])
            edge_info = tuple(edge_info)

        self.edge_table[index_pair] = edge_info
        return (index_pair[0], index_pair[1],
                len(self.edge_table[index_pair]) - 2)

    ############################################################
    # Helper functions used when inserting / requesting
    # the edge CHKs.
    ############################################################
    def has_chk(self, edge_triple):
        """ Return True if the graph has a CHK for the edge,
            false otherwise. """
        chk = self.edge_table.get(edge_triple[:2])[1:][edge_triple[2]]
        return chk.startswith('CHK@') # Hmmm... False for pending???

    def get_chk(self, edge_triple):
        """ Return the CHK for an edge. """
        return self.edge_table[edge_triple[:2]][1:][edge_triple[2]]

    def get_length(self, edge_triple):
        """ Return the length of the hg bundle file for an edge. """
        return self.edge_table.get(edge_triple[:2])[0]

    def is_redundant(self, edge_triple):
        """ Return True if there is more than one CHK for the
            edge, False otherwise. """
        return len(self.edge_table[edge_triple[:2]]) > 2

    # REDFLAG: fix signature to take an edge triplet?
    # Hmmm... too much paranoia. just assert?
    def set_chk(self, index_pair, ordinal, length, chk):
        """ Set the CHK for an edge. """
        edge_info = self.edge_table.get(index_pair)
        if edge_info is None:
            raise UpdateGraphException("No such edge: %s" % str(index_pair))
        edge_list = list(edge_info)
        if len(edge_list) < ordinal + 2:
            raise UpdateGraphException("No such chk ordinal: [%s]:%i"
                                       % (str(index_pair), ordinal))
        if edge_list[0] != length:
            raise UpdateGraphException("Length mismatch: [%s]:%i"
                                       % (str(index_pair), ordinal))
        if not edge_list[ordinal + 1].startswith(PENDING_INSERT):
            print "set_chk -- replacing a non pending chk (%i, %i, %i)?" % \
                  (index_pair[0], index_pair[1], ordinal)
            if edge_list[ordinal + 1] == chk:
                print "Values are same."
            else:
                print "Values are different:"
                print "old:", edge_list[ordinal + 1]
                print "new:", chk
        edge_list[ordinal + 1] = chk
        self.edge_table[index_pair] = tuple(edge_list)

# def insert_type_(self, edge_triple):
#         """ Return the kind of insert required to insert the CHK
#             for the edge.

#             INSERT_NORMAL -> No modification to the bundle file.
#             INSERT_PADDED -> Add one trailing pad byte.
#             INSERT_SALTED_METADATA -> Copy and salt the Freenet
#             split file metadata for the normal insert. """
#         edge_info = self.edge_table[edge_triple[:2]]
#         #print "insert_type -- ", edge_triple, entry
#         if edge_info[edge_triple[2] + 1] == PENDING_INSERT:
#             return INSERT_NORMAL
#         if edge_info[edge_triple[2] + 1] != PENDING_INSERT1:
#             raise ValueError("CHK already set?")
#         if edge_info[0] <= FREENET_BLOCK_LEN:
#             return INSERT_PADDED
#         return INSERT_SALTED_METADATA

    def insert_type(self, edge_triple):
        """ Return the kind of insert required to insert the CHK
            for the edge.

            INSERT_NORMAL -> No modification to the bundle file.
            INSERT_PADDED -> Add one trailing pad byte.
            INSERT_SALTED_METADATA -> Copy and salt the Freenet
            split file metadata for the normal insert.
            INSERT_HUGE -> Full re-insert of data that's too big
            for metadata salting.
            """

        if edge_triple[2] == 0:
            return INSERT_NORMAL

        assert edge_triple[2] == 1

        length = self.edge_table[edge_triple[:2]][0]

        # REDFLAG: MUST DEAL WITH ==32k case, djk20080425 -- I think this is ok
        if length <= FREENET_BLOCK_LEN:
            # Made redundant path by padding.
            return  INSERT_PADDED

        if length <= MAX_METADATA_HACK_LEN:
            return INSERT_SALTED_METADATA

        print "insert_type -- called for edge that's too big to salt???"
        print edge_triple
        return INSERT_HUGE

    def insert_length(self, step):
        """ Returns the actual length of the data inserted into
            Freenet for the edge. """
        length = self.edge_table.get(step[:2])[0]
        if step[2] == 0:
            # No hacks on primary insert.
            return length
        if length < FREENET_BLOCK_LEN:
            # Made redundant path by padding.
            return  length + 1

        # Salted the metadata. Data length unaffected.
        return length

    ############################################################

    def add_index(self, base_revs, new_heads):
        """ Add changes to the graph. """
        assert not NULL_REV in new_heads
        assert len(base_revs) > 0
        assert len(new_heads) > 0
        base_revs.sort()
        new_heads.sort()
        if self.latest_index != FIRST_INDEX and NULL_REV in base_revs:
            print "add_index -- base=null in base_revs. Really do that?"
        self.latest_index += 1
        self.index_table[self.latest_index] = (tuple(base_revs),
                                               tuple(new_heads))
        return self.latest_index

    # REDFLAG: really no need for ui? if so, remove arg
    # Index and edges to insert
    # Returns index triples with new edges that need to be inserted.
    def update(self, repo, dummy, versions, cache):
        """ Update the graph to include versions up to version
            in repo.

            This may add multiple edges for redundancy.

            Returns the new edges.

            The client code is responsible for setting their CHKs!"""

        version_map = build_version_table(self, repo)

        base_revs, new_heads = get_changes(repo, version_map, versions)

        # IMPORTANT: Order matters. Must be after find_latest_bases above.
        # Update the map. REDFLAG: required?
        #for version in new_heads:
        #    version_map[version] = self.latest_index + 1

        index = self.add_index(list(base_revs), new_heads)
        new_edges = []

        #print "ADDED INDEX: ", index
        #print self.index_table
        # Insert index w/ rollup if possible.
        first_bundle = cache.make_redundant_bundle(self, version_map, index)

        new_edges.append(self.add_edge(first_bundle[2],
                                       (first_bundle[0], PENDING_INSERT)))
        #print "ADDED EDGE: ", new_edges[-1]
        bundle = None
        try:
            canonical_path = self.canonical_path(index, MAX_PATH_LEN)
            assert len(canonical_path) <= MAX_PATH_LEN
        except UpdateGraphException:
            # We need to compress the path.
            short_cut = self._compress_canonical_path(index, MAX_PATH_LEN + 1)

            bundle = cache.make_bundle(self, version_map, short_cut)

            new_edges.append(self.add_edge(bundle[2],
                                           (bundle[0], PENDING_INSERT)))
            # MAX_PATH_LEN + 1 search can be very slow.
            canonical_path = self.canonical_path(index, MAX_PATH_LEN + 1)

            assert len(canonical_path) <= MAX_PATH_LEN

        if bundle == None:
            if (first_bundle[0] <= FREENET_BLOCK_LEN and
                first_bundle[2][0] < index - 1):
                # This gives us redundancy at the cost of one 32K block.

                bundle = cache.make_bundle(self,
                                           version_map,
                                           (first_bundle[2][0] + 1,
                                            index))
                new_edges.append(self.add_edge(bundle[2],
                                               (bundle[0],
                                                PENDING_INSERT)))
            elif first_bundle[0] <= MAX_METADATA_HACK_LEN:
                # Request insert of a redundant copy of exactly the same
                # bundle.
                bundle = first_bundle[:]
                new_edges.append(self.add_edge(bundle[2], (bundle[0],
                                                           PENDING_INSERT1)))
            else:
                print "update -- Bundle too big to salt! CHK: %i" \
                      % first_bundle[0]

        new_edges = new_edges + self._add_canonical_path_redundancy()

        return new_edges

    def get_top_key_edges(self):
        """ Returns the ordered list of edges that should be
            included in the top key. """
        self.rep_invariant()

        edges = []

        #print "LATEST_INDEX: ", self.latest_index

        paths = self.enumerate_update_paths(self.latest_index,
                                             self.latest_index, 1)
        #print paths

        paths.sort(self._cmp_block_cost)
        #dump_paths(self, paths, "Paths sorted by block cost")

        if len(paths) > 0:
            # Path with the most changes in the least blocks.
            edges.append(paths[0][0])
            del paths[0]

        if len(paths) > 0:
            # REDFLAG: == 32k case for padding crosses block boundry...
            if (block_cost(self.path_cost([edges[0], ])) ==
                block_cost(self.path_cost(paths[0]))):
                # One more at the same cost if there is one.
                edges.append(paths[0][0])
                del paths[0]

        # The canonical path
        path = list(self.canonical_path(self.latest_index, MAX_PATH_LEN))

        path.reverse() # most recent first.
        for step in path:
            if not step in edges:
                edges.append(step)

        # 2 possibly redundant immediate update keys, and MAX_PATH_LEN
        # canonical path keys. Actually at least one of the canonical keys
        # should already be in the immediate updates.
        assert len(edges) < 4 + MAX_PATH_LEN
        return edges

    def enumerate_update_paths(self, containing_start, to_end, max_len,
                                partial_path=()):

        """ INTERNAL: Returns a list of paths from the start index to the end
            index. """

        if max_len <= 0:
            return []
        ret = []

        candidates = self.contain(containing_start)
        #print "CANDIDATES: ", candidates
        for candidate in candidates:
            if candidate[1] >= to_end:
                ret.append(partial_path + (candidate,))
            else:
                ret += self.enumerate_update_paths(candidate[1] + 1, to_end,
                                                   max_len - 1,
                                                   partial_path
                                                   + (candidate,))
        return ret

    # REQUIRES: Using the same index mappings!
    def copy_path(self, from_graph, path):
        """ Copy a path from one graph to another. """
        copied = False
        for step in path:
            pair = step[:2]
            if not pair in self.edge_table:
                copied = True
                self.edge_table[pair] = (
                    from_graph.edge_table[pair][:]) # Deep copy
                for index in pair:
                    if index not in self.index_table:
                        self.index_table[index] = (
                            from_graph.index_table[index][:]) # Deep copy
        return copied

    def canonical_path(self, to_index, max_search_len):
        """ Returns shortest preferred path from no updates
            to latest_index.

            This is what you would use to bootstrap from hg rev -1. """
        try:
            return canonical_path_itr(self, 0, to_index, max_search_len).next()
        except StopIteration:
            raise UpdateGraphException("No such path: %s"
                                       % str((0, to_index)))

    def path_cost(self, path, blocks=False):
        """ The sum of the lengths of the hg bundles required to update
            using the path. """

        value = 0
        for step in path:
            if blocks:
                value += block_cost(self.edge_table[step[:2]][0])
            else:
                value += self.edge_table[step[:2]][0]

        return value

    # Returns ((start_index, end_index, chk_list_ordinal), ...)
    def contain(self, contains_index):
        """ Returns a list of edge triples which contain contains_index. """
        ret = []
        for pair in self.edge_table:
            if pair[0] >= contains_index:
                continue
            if pair[1] < contains_index:
                continue
            for index in range(0, len(self.edge_table[pair]) - 1):
                ret.append(pair + (index,))
        return ret

    def cmp_recency(self, path_a, path_b):
        """ INTERNAL: A comparison function for sorting single edge paths
            by recency. """
        # Only for steps
        assert len(path_a) == 1
        assert len(path_b) == 1

         # Only steps in the paths.
        step_a = path_a[0]
        step_b = path_b[0]

        if step_a[1] == step_b[1]:
            if step_a[2] == step_b[2]:
                # Ascending Length. TRICKY: because of padding hacks.
                return (self.insert_length(step_a)
                        - self.insert_length(step_b))
            # Ascending redundancy. i.e. "most canonical" first
            return step_a[2] - step_b[2]

        # descending initial update. i.e. Most recent first.
        return step_b[1] - step_a[1]

    def _cmp_block_cost(self, path_a, path_b):
        """ INTERNAL: A comparison function for sorting single edge paths
            in order of ascending order of block count. """
        assert len(path_a) == 1
        assert len(path_b) == 1

        cost_a = self.insert_length(path_a[0])
        cost_b = self.insert_length(path_b[0])

        # Actually block cost - 1, but that's ok.
        block_cost_a = cost_a / FREENET_BLOCK_LEN
        block_cost_b = cost_b / FREENET_BLOCK_LEN

        if block_cost_a == block_cost_b:
            mod_a = cost_a % FREENET_BLOCK_LEN
            mod_b = cost_b % FREENET_BLOCK_LEN
            if mod_a == mod_b:
                # Ascending order of redundancy ordinal.
                return int(path_a[0][2] - path_b[0][2])

            # Descending order of length (for same block size)
            return int(mod_b - mod_a)

        # Ascending order of length in blocks
        return int(block_cost_a - block_cost_b)

    # REDFLAG: Can the edge already exists?
    # Only makes sense for latest index. get rid of latest_index argument?

    # enforce constraint that sum of costs head must be less
    # than the cost for the prev step. power law???

    # REQURIES: len(canonical_path(latest_index)) > 1
    def _compress_canonical_path(self, to_index, max_search_len=10):
        """ Return an index tuple for a new shortcut path that would
            reduces the canonical path length by at least one, favoring
            accumulation of hg bundle size  at the start of the path. """


        shortest_known = self.canonical_path(to_index, max_search_len)
        #print "SHORTEST_KNOWN: ", shortest_known
        assert len(shortest_known) > 1

        if len(shortest_known) == 2:
            # We only have one move.
            return (shortest_known[0][0], shortest_known[-1][1])

        # REDFLAG: Shouldn't this be using block cost?
        for index in range(1, len(shortest_known)):
            prev_cost = self.path_cost((shortest_known[index - 1],))
            if self.path_cost(shortest_known[index:]) > prev_cost:
                return (shortest_known[index - 1][0], shortest_known[-1][1])
        return (shortest_known[-2][0], shortest_known[-1][1])

    def _add_canonical_path_redundancy(self):
        """ Adds redundant edges for steps on the canonical path.

            Returns the new edges.
        """
        ret = []
        path = self.canonical_path(self.latest_index, MAX_PATH_LEN)
        for index, step in enumerate(path):
            if index == MAX_PATH_LEN - 1:
                # Don't try to add redundancy to the last (latest) step
                break
            entries = self.edge_table[step[:2]]
            if len(entries) > 2:
                # Already redundant
                continue
            assert step[2] == 0
            assert entries[1] != PENDING_INSERT1
            if entries[0] <= FREENET_BLOCK_LEN:
                #print "_add_canonical_path_redundancy -- too small: ", \
                # str(step)
                continue
            if entries[0] > MAX_METADATA_HACK_LEN:
                #print "_add_canonical_path_redundancy -- too big: ", str(step)
                continue
            edge = self.add_edge(step[:2], (entries[0], PENDING_INSERT1))
            #print "_add_canonical_path_redundancy -- added edge: ", str(edge)
            ret.append(edge)
        return ret

    def rep_invariant(self, repo=None, full=True):
        """ Debugging function to check invariants. """
        max_index = -1
        min_index = -1
        for index in self.index_table.keys():
            max_index = max(index, max_index)
            min_index = min(index, min_index)
        assert self.latest_index == max_index
        assert min_index == FIRST_INDEX

        assert self.index_table[FIRST_INDEX][0] == ()
        assert self.index_table[FIRST_INDEX][1] == (NULL_REV, )
        for index in range(0, self.latest_index + 1):
            # Indices must be contiguous.
            assert index in self.index_table

            # Each index except for the empty graph sentinel
            # must have at least one base and head rev.
            assert len(self.index_table[index][0]) > 0
            assert len(self.index_table[index][1]) > 0

        # All edges must be resolvable.
        for edge in self.edge_table.keys():
            assert edge[0] in self.index_table
            assert edge[1] in self.index_table
            assert edge[0] < edge[1]


        if repo is None:
            return

        # Slow
        version_map = build_version_table(self, repo)

        values = set(version_map.values())
        values = list(values)
        values.sort()
        assert values[-1] == max_index
        assert values[0] == FIRST_INDEX
        # Indices contiguous
        assert values == range(FIRST_INDEX, max_index + 1)

        if full:
            # Verify that version map is complete.
            copied = version_map.copy()
            for rev in range(-1, repo['tip'].rev() + 1):
                version = hex_version(repo, rev)
                assert version in copied
                del copied[version]

            assert len(copied) == 0

        every_head = set([])
        for index in range(FIRST_INDEX, max_index + 1):
            versions = set([])
            for version in (self.index_table[index][0]
                            + self.index_table[index][0]):
                assert version in version_map
                if version in versions:
                    continue

                assert has_version(repo, version)
                versions.add(version)

            # Base versions must have a lower index.
            for version in self.index_table[index][0]:
                assert version_map[version] < index

            # Heads must have index == index.
            for version in self.index_table[index][1]:
                assert version_map[version] == index
                # Each head should appear in one and only one index.
                assert not version in every_head
                every_head.add(version)

# REDFLAG: O(n), has_index().
def latest_index(graph, repo):
    """ Returns the index of the latest hg version in the graph
        that exists in repo. """
    graph.rep_invariant()
    for index in range(graph.latest_index, FIRST_INDEX - 1, -1):
        if not index in graph.index_table:
            continue
        # BUG: Dog slow for big repos? cache index -> heads map
        skip = False
        for head in get_heads(graph, index):
            if not has_version(repo, head):
                skip = True
                break # Inner loop... grrr named continue?

        if skip:
            continue
        return index

    return FIRST_INDEX

def chk_to_edge_triple_map(graph):
    """ Returns a CHK -> edge triple map. """
    ret = {}
    for edge in graph.edge_table:
        #print "EDGE: ", edge
        chks = graph.edge_table[edge][1:]
        #print "ENTRIES: ", entries
        for index, chk in enumerate(chks):
            assert ret.get(chk) is None
            ret[chk] = (edge[0], edge[1], index)
    return ret

def break_edges(graph, kill_probability, skip_chks):
    """ Testing function breaks edges by replacing the CHKs with a known
        bad one. """
    bad_chk = ('CHK@badroutingkeyB55JblbGup0yNSpoDJgVPnL8E5WXoc,'
               +'KZ6azHOwEm4ga6dLy6UfbdSzVhJEz3OvIbSS4o5BMKU,AAIC--8')
    for edge in graph.edge_table:
        edge_info = graph.edge_table[edge[:2]]
        length = edge_info[0]
        chks = edge_info[1:]
        for index in range(0, len(chks)):
            if graph.get_chk((edge[0], edge[1], index)) in skip_chks:
                # Hack to skip pending requests.
                print "break_edges -- skipped: ", (edge[0], edge[1], index)
                continue
            if random.random() < kill_probability:
                graph.set_chk(edge, index, length, bad_chk)

def pretty_index(index):
    """ Format an index value for output. """
    if index == FIRST_INDEX:
        return "."
    else:
        return str(index)

def dump_path(graph, path):
    """ Debugging function to print a path. """
    if len(path) == 0:
        print "EMPTY PATH!"
        return

    print "(%s)-->[%s] cost=%0.2f" % (pretty_index(path[0][0]),
                                      pretty_index(path[-1][1]),
                                      graph.path_cost(path, True))
    for step in path:
        cost = graph.get_length(step)
        print "   (%s) -- (%0.2f, %i) --> [%s]" % (pretty_index(step[0]),
                                                cost,
                                                step[2],
                                                pretty_index(step[1]))
def dump_paths(graph, paths, msg):
    """ Debugging function to dump a list of paths. """
    print  "--- %s ---" % msg
    for path in paths:
        dump_path(graph, path)
    print "---"

def print_list(msg, values):
    """ INTERNAL: Helper function. """
    if msg:
        print msg
    for value in values:
        print "   ", value
    if len(values) == 0:
        print
# REDFLAG: is it a version_map or a version_table? decide an fix all names
# REDFLAG: Scales to what? 10k nodes?
# Returns version -> index mapping
# REQUIRES: Every version is in an index!
def build_version_table(graph, repo):
    """ INTERNAL: Build a version -> index ordinal map for all changesets
        in the graph. """
    table = {NULL_REV:-1}
    for index in range(0, graph.latest_index + 1):
        assert index in graph.index_table
        dummy, heads = graph.index_table[index]
        for head in heads:
            if not head in table:
                assert not head in table
                table[head] = index

            ancestors = repo[head].ancestors()
            for ancestor in ancestors:
                version = hexlify(ancestor.node())
                if version in table:
                    continue
                table[version] = index
    return table

# Find most recent ancestors for version which are already in
# the version map. REDFLAG: fix. don't make reference to time

def _check_base(repo, version, version_map, traversed, base_revs):
    """ INTERNAL: Check for a given base,
    if it's in traversed (do nothing)
    or version map (add to base_revs).
    If not one of these, return the parents."""
    if version in traversed:
        return
    traversed.add(version)
    if version in version_map:
        #print "   find_latest_bases -- adding: ", version[:12]
        base_revs.add(version)
        return
    parents = [hexlify(parent.node()) for parent in repo[version].parents()]
    return parents

def find_latest_bases(repo, version, version_map, traversed, base_revs):
    """ INTERNAL: Add latest known base revs for version to base_revs. """
    #print "find_latest_bases -- called: ", version[:12]
    assert version_map != {NULL_REV:FIRST_INDEX}
    parents = _check_base(repo, version, version_map, traversed, base_revs)
    while parents:
        pas = parents[:]
        parents = []
        for parent in pas: 
            ps = _check_base(repo, parent, version_map, traversed, base_revs)
            if ps is not None:
                parents.extend(ps)


# REDFLAG: correct?  I can't come up with a counter example.
def get_heads(graph, to_index=None):
    """ Returns the 40 digit hex changeset ids of the heads. """
    if to_index is None:
        to_index = graph.latest_index

    heads = set([])
    bases = set([])
    for index in range(FIRST_INDEX, to_index + 1):
        for base in graph.index_table[index][0]:
            bases.add(base)
        for head in graph.index_table[index][1]:
            heads.add(head)
    heads = list(heads - bases)
    heads.sort()
    return tuple(heads)

# ASSUMPTIONS:
# 0) head which don't appear in bases are tip heads. True?

# INVARIANTS:
# o every changeset must exist "in" one and only one index
#   -> contiguousness
# o the parent revs for all the changesets in every index
#   must exist in a previous index (though not necessarily the
#   immediate predecessor)
# o indices referenced by edges must exist
# o latest index must be set correctly
# o inices must be contiguous
# o FIRST_INDEX in index_table