""" Functions for manipulating UpdateGraphs. Copyright (C) 2009 Darrell Karbott This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ from binascii import hexlify from graph import FIRST_INDEX, MAX_PATH_LEN, UpdateGraph, \ UpdateGraphException, canonical_path_itr, edges_containing, INSERT_HUGE, \ INSERT_NORMAL, MAX_METADATA_HACK_LEN ############################################################ # Doesn't dump FIRST_INDEX entry. def graph_to_string(graph): """ Returns a human readable representation of the graph. """ lines = [] # Indices indices = graph.index_table.keys() indices.sort() for index in indices: if index == FIRST_INDEX: continue entry = graph.index_table[index] # Example: # I:0:aaaaaaaaaaaa:|:bbbbbbbbbbbb:cccccccccccc lines.append(':'.join(('I', str(index), ':'.join(entry[0]), '|', ':'.join(entry[1])))) # Edges index_pairs = graph.edge_table.keys() # MUST sort so you get the same CHK for the same graph instance. index_pairs.sort() for index_pair in index_pairs: edge_info = graph.edge_table[index_pair] as_str = ':'.join(edge_info[1:]) if as_str != '': as_str = ':' + as_str lines.append("E:%i:%i:%i%s" % (index_pair[0], index_pair[1], edge_info[0], as_str)) return '\n'.join(lines) + '\n' def parse_graph(text): """ Returns a graph parsed from text. text must be in the format used by graph_to_string(). Lines starting with '#' are ignored. """ graph = UpdateGraph() lines = text.split('\n') for line in lines: fields = line.split(':') if fields[0] == 'I': fields.pop(0) try: if len(fields) == 0: raise ValueError("HACK") index = int(fields.pop(0)) except ValueError: raise ValueError("Syntax error reading index") try: divider = fields.index('|') except ValueError: raise ValueError("Syntax error reading index %i" % index) parents = fields[:divider] heads = fields[divider + 1:] if index in graph.index_table: print "OVERWRITING INDEX: " , index if len(parents) < 1: raise ValueError("index %i has no parent revs" % index) if len(heads) < 1: raise ValueError("index %i has no head revs" % index) graph.index_table[index] = (tuple(parents), tuple(heads)) elif fields[0] == 'E': #print fields if len(fields) < 5: raise ValueError("Exception parsing edge values.") index_pair = (int(fields[1]), int(fields[2])) length = int(fields[3]) chk_list = [] for chk in fields[4:]: chk_list.append(chk) graph.edge_table[index_pair] = tuple([length, ] + chk_list) #else: # print "SKIPPED LINE:" # print line indices = graph.index_table.keys() if len(indices) == 0: raise ValueError("No indices?") indices.sort() graph.latest_index = indices[-1] graph.rep_invariant() return graph def parse_v100_graph(text): """ Returns a graph parsed from text in old format. text must be in the format used by graph_to_string(). Lines starting with '#' are ignored. """ graph = UpdateGraph() lines = text.split('\n') for line in lines: fields = line.split(':') if fields[0] == 'I': if len(fields) != 4: raise ValueError("Exception parsing index values.") index = int(fields[1]) if index in graph.index_table: print "OVERWRITING INDEX: " , index if len(tuple(fields[2:])) != 2: raise ValueError("Error parsing index value: %i" % index) versions = tuple(fields[2:]) graph.index_table[index] = ((versions[0], ), (versions[1], )) elif fields[0] == 'E': #print fields if len(fields) < 5: raise ValueError("Exception parsing edge values.") index_pair = (int(fields[1]), int(fields[2])) length = int(fields[3]) chk_list = [] for chk in fields[4:]: chk_list.append(chk) graph.edge_table[index_pair] = tuple([length, ] + chk_list) #else: # print "SKIPPED LINE:" # print line indices = graph.index_table.keys() if len(indices) == 0: raise ValueError("No indices?") indices.sort() graph.latest_index = indices[-1] graph.rep_invariant() return graph ############################################################ def should_add_head(repo, version_table, head, to_index): """ INTERNAL: Helper function used by get_rollup_bounds. """ children = [hexlify(child.node()) for child in repo[head].children()] for child in children: # Versions that we don't know about, don't count. # REDFLAG: Think this through if not child in version_table: #print "head: %s %s not in version table. IGNORING" \ # % (head[:12], child[:12]) continue # REDFLAG: Check. This graph stuff makes me crazy. child_index = version_table[child] if child_index <= to_index: # Has a child in or under the index we are rolling up to. # Not a head! #print "should_add_head -- returned False, %i:%s" % # (child_index, child[:12]) return False #print "should_add_head -- returned True" return True # TRICKY: # You need the repository changeset DAG in order to determine # the base revs, because new changes might have branched from # a change in the middle of a previous index which doesn't # appear explictly in the graph. # def get_rollup_bounds(graph, repo, from_index, to_index, version_table): """ Return a ((parent_revs, ), (head_revs,) tuple required to create a bundle with all the changes [from_index, to_index] (inclusive). """ assert from_index <= to_index assert to_index > FIRST_INDEX #print "get_rollup_bounds -- ", from_index, to_index #print "version_table:", len(version_table) new_bases = set([]) new_heads = set([]) for index in range(from_index, to_index + 1): bases, heads = graph.index_table[index] for base in bases: #print " considering base ", base[:12], version_table[base] if version_table[base] < from_index: #print " adding base ", base[:12], version_table[base] new_bases.add(base) for head in heads: if should_add_head(repo, version_table, head, to_index): new_heads.add(head) new_bases = list(new_bases) new_bases.sort() new_heads = list(new_heads) new_heads.sort() #print "get_rollup_bounds -- returning" #print " bases: ", new_bases #print " heads: ", new_heads assert len(new_bases) > 0 assert len(new_heads) > 0 return (tuple(new_bases), tuple(new_heads)) # call this from subgraph def coalesce_indices(original_graph, graph, repo, version_table): """ INTERNAL: Coalesce changes so that indices (and changes) are contiguous. """ original_graph.rep_invariant() # graph invariants are broken ! assert FIRST_INDEX in graph.index_table assert len(graph.index_table) > 1 # Roll up info in missing indices into existing ones. lacuna = False prev_index = graph.latest_index for index in range(graph.latest_index - 1, FIRST_INDEX -1, -1): # REDFLAG: There was a bad bug here. Better testing? if index in graph.index_table: if lacuna: # Rollup all changes in the missing indices into # the latest one. graph.index_table[prev_index] = ( get_rollup_bounds(original_graph, repo, index + 1, prev_index, version_table)) lacuna = False prev_index = index else: lacuna = True # Hmmm... or graph is empty assert lacuna == False # Make indices contiguous. indices = graph.index_table.keys() indices.sort() assert indices[0] == FIRST_INDEX assert FIRST_INDEX == -1 fixups = {} for ordinal, value in enumerate(indices): fixups[value] = ordinal - 1 new_indices = {} for old_index in indices: # Ok not to copy, value is immutable (a tuple). new_indices[fixups[old_index]] = graph.index_table[old_index] new_edges = {} for edge in graph.edge_table: # Deep copy? Nothing else has a ref to the values. new_edges[(fixups[edge[0]], fixups[edge[1]])] = graph.edge_table[edge] graph.index_table.clear() graph.edge_table.clear() graph.index_table.update(new_indices) graph.edge_table.update(new_edges) graph.latest_index = max(graph.index_table.keys()) original_graph.rep_invariant() #print "FAILING:" #print graph_to_string(graph) graph.rep_invariant() def subgraph(graph, repo, version_table, containing_paths): """ Return a subgraph which contains the vertices and edges in containing_paths. """ graph.rep_invariant() small_graph = UpdateGraph() max_index = -1 # Copy edges and indices. for path in containing_paths: for step in path: pair = step[:2] # REDFLAG: copies ALL redundant paths small_graph.edge_table[pair] = graph.edge_table[pair][:] for index in pair: if index not in small_graph.index_table: # Don't need to deep copy because index info is # immutable. (a tuple) small_graph.index_table[index] = graph.index_table[index] max_index = max(max_index, index) small_graph.latest_index = max_index # Fix contiguousness. coalesce_indices(graph, small_graph, repo, version_table) # Invariants should be fixed. small_graph.rep_invariant() graph.rep_invariant() return small_graph # REDFLAG: TERMINATE when all edges in graph have been yielded? def important_edge_itr(graph, known_paths): """ INTERNAL: A generator which returns a sequence of edges in order of descending criticalness.""" known_edges = set([]) for path in known_paths: for edge in path: known_edges.add(edge) # Edges which are in the canonical path index = graph.latest_index canonical_paths = canonical_path_itr(graph, 0, index, MAX_PATH_LEN) for path in canonical_paths: for edge in path: if edge in known_edges: continue known_edges.add(edge) yield edge if index == 0: return # Then add bootstrap paths back to previous indices # Favors older edges. for upper_index in range(index - 1, FIRST_INDEX, - 1): canonical_paths = canonical_path_itr(graph, 0, upper_index, MAX_PATH_LEN) for path in canonical_paths: for edge in path: if edge in known_edges: continue known_edges.add(edge) yield edge return # Really slow def minimal_graph(graph, repo, version_table, max_size=32*1024, formatter_func=graph_to_string): """ Returns a subgraph that can be formatted to <= max_size bytes with formatter_func. """ length = len(formatter_func(graph)) if length <= max_size: #print "minimal_update_graph -- graph fits as is: ", length # Hmmm... must clone for consistent semantics. return graph.clone() index = graph.latest_index assert index > FIRST_INDEX # All the edges that would be included in the top key. # This includes the canonical bootstrap path and the # two cheapest updates from the previous index. paths = [[edge, ] for edge in graph.get_top_key_edges()] minimal = subgraph(graph, repo, version_table, paths) length = len(formatter_func(minimal)) if length > max_size: raise UpdateGraphException("Too big with only required paths (%i > %i)" % (length, max_size)) prev_minimal = minimal.clone() for edge in important_edge_itr(graph, paths): paths.append([edge, ]) minimal = subgraph(graph, repo, version_table, paths) length = len(formatter_func(minimal)) if length > max_size: return prev_minimal else: prev_minimal = minimal.clone() return prev_minimal # REDFLAG: todo, find other places where I should be using this func. def find_alternate_edges(graph, edges): """ Find alternate redundant edges that aren't already in edges. """ alternate_edges = [] for edge in edges: if graph.is_redundant(edge): alternate_edge = (edge[0], edge[1], int(not edge[2])) if not alternate_edge in edges: alternate_edges.append(alternate_edge) return alternate_edges def find_redundant_edges(graph, current_edges, skip_huge=True): """ Find the edges you would need to add to make current_edges fully redundant. Return an (edge_list, failed_indices) with the alternate edges and indices which only appear in a single edge. """ def increment_counts(counts, for_edge): """ INTERNAL: Increment count for every index contained by for_edge. """ for index in range(for_edge[0] + 1, for_edge[1] + 1): counts[index] = counts[index] + 1 # Empty redundancy counts for all indexes redundancy = [0, ] * (graph.latest_index + 1) edges = set(current_edges) for edge in edges: increment_counts(redundancy, edge) failed_indices = [] redundant_edges = [] for index in range(0, len(redundancy)): if redundancy[index] < 2: candidates = edges_containing(graph, index) while len(candidates) and redundancy[index] < 2: candidate = candidates.pop() if candidate in edges: continue if graph.insert_type(candidate) == INSERT_HUGE and skip_huge: continue redundant_edges.append(candidate) increment_counts(redundancy, candidate) if redundancy[index] < 2: failed_indices.append(index) return (redundant_edges, failed_indices) def get_huge_top_key_edges(graph, extant=False): """ Get the list of edges in the top key edges (and alternates) that are too big to salt. If extant is True, return existing edges. If extant is False, return edges that could be added. """ ret = [] edges = graph.get_top_key_edges() edges += find_redundant_edges(graph, edges, False)[0] for edge in edges: if graph.get_length(edge) > MAX_METADATA_HACK_LEN: if edge[2] == 1: assert graph.insert_type(edge) == INSERT_HUGE if extant and (not alternate in ret): ret.append(edge) else: assert edge[2] == 0 assert graph.insert_type(edge) == INSERT_NORMAL alternate = (edge[0], edge[1], 1) if graph.is_redundant(edge): assert graph.insert_type(alternate) == INSERT_HUGE if extant and (not alternate in ret): ret.append(alternate) else: if (not extant) and (not alternate in ret): ret.append(alternate) return ret