infocalypse

(djk)
2009-04-28: Added --level for fine grain control over what fn-reinsert inserts.

Added --level for fine grain control over what fn-reinsert inserts. --level 1 - re-inserts the top key(s) 2 - re-inserts the top keys(s), graphs(s) and the most recent update. 3 - re-inserts the top keys(3), graphs(s) and all keys required to bootstrap the repo. This is the default level. 4 - adds redundancy for big (>7Mb) updates. 5 - re-inserts existing redundant big updates.

diff --git a/infocalypse/__init__.py b/infocalypse/__init__.py
--- a/infocalypse/__init__.py
+++ b/infocalypse/__init__.py
@@ -85,13 +85,38 @@ The request uri -> dir mapping is saved 
 the first pull, so you can ommit the --uri
 argument for subsequent fn-pull invocations.
 
+
+RE-REINSERTING AND "SPONSORING" REPOS:
+
 hg fn-reinsert
 
 will re-insert the bundles for the repository
-that was last pulled into the directory.  If
-you have the insert uri the top level key(s)
-will also be re-inserted.
+that was last pulled into the directory.
 
+The exact behavior is determined by the
+level argument.
+
+level:
+1 - re-inserts the top key(s)
+2 - re-inserts the top keys(s), graphs(s) and
+    the most recent update.
+3 - re-inserts the top keys(3), graphs(s) and
+    all keys required to bootstrap the repo.
+    This is the default level.
+4 - adds redundancy for big (>7Mb) updates.
+5 - re-inserts existing redundant big updates.
+
+Levels 1 and 4 require that you have the private
+key for the repository. For other levels, the
+top key insert is skipped if you don't have
+the private key.
+
+WARNING:
+DO NOT use fn-reinsert if you're concerned about
+correlation attacks. The risk is on the order
+of re-inserting a freesite, but may be
+worse if you use redundant
+(i.e. USK@<line noise>/name.R1/0) top keys.
 
 HINTS:
 The -q, -v and --debug verbosity options are
@@ -196,14 +221,25 @@ def infocalypse_reinsert(ui_, repo, **op
                      "Do a fn-pull from a repository USK and try again.\n")
             return
 
+    level = opts['level']
+    if level < 1 or level > 5:
+        ui_.warn("level must be 1,2,3,4 or 5.\n")
+        return
+
     insert_uri = stored_cfg.get_dir_insert_uri(repo.root)
     if not insert_uri:
+        if level == 1 or level == 4:
+            ui_.warn(("You can't re-insert at level %i without the "
+                     + "insert URI.\n") % level)
+            return
+
         ui_.status("No insert URI. Will skip re-insert "
                    +"of top key.\n")
         insert_uri = None
 
     params['INSERT_URI'] = insert_uri
     params['REQUEST_URI'] = request_uri
+    params['REINSERT_LEVEL'] = level
     execute_reinsert(ui_, repo, params, stored_cfg)
 
 def infocalypse_pull(ui_, repo, **opts):
@@ -295,7 +331,8 @@ cmdtable = {
                 "[options]"),
 
     "fn-reinsert": (infocalypse_reinsert,
-                    [('', 'uri', '', 'request URI')]
+                    [('', 'uri', '', 'request URI'),
+                     ('', 'level', 3, 'how much to re-insert')]
                     + FCP_OPTS
                     + NOSEARCH_OPT,
                     "[options]"),
diff --git a/infocalypse/graph.py b/infocalypse/graph.py
--- a/infocalypse/graph.py
+++ b/infocalypse/graph.py
@@ -45,6 +45,7 @@ MAX_PATH_LEN = 4
 INSERT_NORMAL = 1 # Don't transform inserted data.
 INSERT_PADDED = 2 # Add one trailing byte.
 INSERT_SALTED_METADATA = 3 # Salt Freenet splitfile metadata.
+INSERT_HUGE = 4 # Full re-insert with alternate metadata.
 
 # The size of Freenet data blocks.
 FREENET_BLOCK_LEN = 32 * 1024
@@ -377,7 +378,10 @@ class UpdateGraph:
             INSERT_NORMAL -> No modification to the bundle file.
             INSERT_PADDED -> Add one trailing pad byte.
             INSERT_SALTED_METADATA -> Copy and salt the Freenet
-            split file metadata for the normal insert. """
+            split file metadata for the normal insert.
+            INSERT_HUGE -> Full re-insert of data that's too big
+            for metadata salting.
+            """
 
         if edge_triple[2] == 0:
             return INSERT_NORMAL
@@ -394,9 +398,9 @@ class UpdateGraph:
         if length <= MAX_METADATA_HACK_LEN:
             return INSERT_SALTED_METADATA
 
-        print "insert_type called for edge that's too big to salt???"
+        print "insert_type -- called for edge that's too big to salt???"
         print edge_triple
-        assert False
+        return INSERT_HUGE
 
     def insert_length(self, step):
         """ Returns the actual length of the data inserted into
@@ -949,6 +953,33 @@ def get_heads(graph, to_index=None):
     heads.sort()
     return tuple(heads)
 
+def get_huge_top_key_edges(graph, extant=False):
+    """ Get the list of edges in the top key edges (and
+        alternates) that are too big to salt.
+
+        If extant is True, return existing edges.
+        If extent is False, return edges that could be added. """
+    ret = []
+    edges = graph.get_top_key_edges()
+    for edge in edges:
+        if graph.get_length(edge) > MAX_METADATA_HACK_LEN:
+            if edge[2] == 1:
+                assert graph.insert_type(edge) == INSERT_HUGE
+                if extant and (not alternate in ret):
+                    ret.append(edge)
+            else:
+                assert edge[2] == 0
+                assert graph.insert_type(edge) == INSERT_NORMAL
+                alternate = (edge[0], edge[1], 1)
+                if graph.is_redundant(edge):
+                    assert graph.insert_type(alternate) == INSERT_HUGE
+                    if extant and (not alternate in ret):
+                        ret.append(alternate)
+                else:
+                    if (not extant) and (not alternate in ret):
+                        ret.append(alternate)
+
+    return ret
 # ASSUMPTIONS:
 # 0) head which don't appear in bases are tip heads. True?
 
diff --git a/infocalypse/infcmds.py b/infocalypse/infcmds.py
--- a/infocalypse/infcmds.py
+++ b/infocalypse/infcmds.py
@@ -559,6 +559,14 @@ def usks_equal(usk_a, usk_b):
     return (get_usk_for_usk_version(usk_a, 0)
             == get_usk_for_usk_version(usk_b, 0))
 
+LEVEL_MSGS = {
+    1:"Re-inserting top key(s) and graph(s).",
+    2:"Re-inserting top key(s) if possible, graph(s), latest update.",
+    3:"Re-inserting top key(s) if possible, graph(s), all bootstrap CHKs.",
+    4:"Inserting redundant keys for > 7Mb updates.",
+    5:"Re-inserting redundant updates > 7Mb.",
+    }
+
 def execute_reinsert(ui_, repo, params, stored_cfg):
     """ Run the reinsert command. """
     update_sm = None
@@ -582,9 +590,11 @@ def execute_reinsert(ui_, repo, params, 
             'REQUEST_URI']),
                                              params['REQUEST_URI']))
 
+        ui_.status(LEVEL_MSGS[params['REINSERT_LEVEL']] + '\n')
         update_sm.start_reinserting(params['REQUEST_URI'],
                                     params['INSERT_URI'],
-                                    is_keypair)
+                                    is_keypair,
+                                    params['REINSERT_LEVEL'])
 
         run_until_quiescent(update_sm, params['POLL_SECS'])
 
diff --git a/infocalypse/insertingbundles.py b/infocalypse/insertingbundles.py
--- a/infocalypse/insertingbundles.py
+++ b/infocalypse/insertingbundles.py
@@ -20,8 +20,9 @@
     Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks
 """
 
-from graph import UpToDate, INSERT_SALTED_METADATA, \
-     FREENET_BLOCK_LEN, build_version_table, get_heads
+from graph import UpToDate, INSERT_SALTED_METADATA, INSERT_HUGE, \
+     FREENET_BLOCK_LEN, build_version_table, get_heads, \
+     PENDING_INSERT1, get_huge_top_key_edges
 from graphutil import graph_to_string
 from bundlecache import BundleException
 
@@ -68,12 +69,21 @@ class InsertingBundles(RequestQueueState
             self.parent.ctx.ui_.status("--- Initial Graph ---\n")
             self.parent.ctx.ui_.status(graph_to_string(graph) +'\n')
 
-
         latest_revs = get_heads(graph)
 
         self.parent.ctx.ui_.status("Latest heads(s) in Freenet: %s\n"
                                  % ' '.join([ver[:12] for ver in latest_revs]))
 
+        if self.parent.ctx.get('REINSERT', 0) == 1:
+            self.parent.ctx.ui_.status("No bundles to reinsert.\n")
+            # REDFLAG: Think this through. Crappy code, but expedient.
+            # Hmmmm.... need version table to build minimal graph
+            self.parent.ctx.version_table = build_version_table(graph,
+                                                                self.parent.ctx.
+                                                                repo)
+            self.parent.transition(INSERTING_GRAPH)
+            return
+
         if not self.parent.ctx.has_versions(latest_revs):
             self.parent.ctx.ui_.warn("The local repository isn't up "
                                      + "to date.\n"
@@ -105,8 +115,7 @@ class InsertingBundles(RequestQueueState
         #dump_top_key_tuple((('CHK@', 'CHK@'),
         #                    get_top_key_updates(graph)))
 
-        if len(self.new_edges) == 0:
-            raise Exception("Up to date")
+        self._check_new_edges("Up to date")
 
         self.parent.ctx.graph = graph
 
@@ -224,6 +233,12 @@ class InsertingBundles(RequestQueueState
                               graph.get_length(edge),
                               chk1)
             else:
+                if (graph.insert_type(edge) == INSERT_HUGE and
+                    graph.get_chk(edge) == PENDING_INSERT1):
+                    assert edge[2] == 1
+                    graph.set_chk(edge[:2], edge[2],
+                              graph.get_length(edge),
+                              chk1)
                 if chk1 != graph.get_chk(edge):
                     self.parent.ctx.ui_.status("Bad CHK: %s %s\n" %
                                                (str(edge), chk1))
@@ -242,6 +257,11 @@ class InsertingBundles(RequestQueueState
             len(self.required_edges) == 0):
             self.parent.transition(INSERTING_GRAPH)
 
+    def _check_new_edges(self, msg):
+        """ INTERNAL: Helper function to raise if new_edges is empty. """
+        if len(self.new_edges) == 0:
+            raise UpToDate(msg)
+
     def set_new_edges(self, graph):
         """ INTERNAL: Set the list of new edges to insert. """
 
@@ -249,20 +269,37 @@ class InsertingBundles(RequestQueueState
         self.parent.ctx.version_table = build_version_table(graph,
                                                             self.parent.ctx.
                                                             repo)
-        if self.parent.ctx.get('REINSERT', 0) == 0:
+        # Hmmmm level == 1 handled elsewhere...
+        level = self.parent.ctx.get('REINSERT', 0)
+        if level == 0: # Insert update, don't re-insert
             self.new_edges = graph.update(self.parent.ctx.repo,
                                           self.parent.ctx.ui_,
                                           self.parent.ctx['TARGET_VERSIONS'],
                                           self.parent.ctx.bundle_cache)
+        elif level ==  2 or level == 3: # Topkey(s), graphs(s), updates
+            # Hmmmm... later support different values of REINSERT?
+            self.new_edges = graph.get_top_key_edges()
+            if level == 2: # 3 == All top key updates.
+                # Only the latest update.
+                self.new_edges = self.new_edges[:1]
 
-            return
+            redundant = []
+            for edge in  self.new_edges:
+                if graph.is_redundant(edge):
+                    alternate_edge = (edge[0], edge[1], int(not edge[2]))
+                    if not alternate_edge in self.new_edges:
+                        redundant.append(alternate_edge)
+            self.new_edges += redundant
+            for edge in self.new_edges[:]: # Deep copy!
+                if graph.insert_type(edge) == INSERT_HUGE:
+                    # User can do this with level == 5
+                    self.parent.ctx.ui_.status("Skipping unsalted re-insert of "
+                                               + "big edge: %s\n" % edge)
+                    self.new_edges.remove(edge)
+        elif level == 4: # Add redundancy for big updates.
+            self.new_edges = get_huge_top_key_edges(graph, False)
+            self._check_new_edges("There are no big edges to add.")
 
-        # Hmmmm... later support different int values of REINSERT?
-        self.new_edges = graph.get_top_key_edges()
-        redundant = []
-        for edge in  self.new_edges:
-            if graph.is_redundant(edge):
-                alternate_edge = (edge[0], edge[1], int(not edge[2]))
-                if not alternate_edge in self.new_edges:
-                    redundant.append(alternate_edge)
-        self.new_edges += redundant
+        elif level == 5: # Reinsert big updates.
+            self.new_edges =  get_huge_top_key_edges(graph, True)
+            self._check_new_edges("There are no big edges to re-insert.")
diff --git a/infocalypse/updatesm.py b/infocalypse/updatesm.py
--- a/infocalypse/updatesm.py
+++ b/infocalypse/updatesm.py
@@ -36,14 +36,13 @@ from requestqueue import RequestQueue
 from chk import clear_control_bytes
 from bundlecache import make_temp_file, BundleException
 from graph import INSERT_NORMAL, INSERT_PADDED, INSERT_SALTED_METADATA, \
-     FREENET_BLOCK_LEN, has_version, \
+     INSERT_HUGE, FREENET_BLOCK_LEN, has_version, \
      pull_bundle, hex_version
 from graphutil import minimal_graph, graph_to_string, parse_graph
 from choose import get_top_key_updates
 from topkey import bytes_to_top_key_tuple, top_key_tuple_to_bytes, \
      dump_top_key_tuple
 
-
 from statemachine import StatefulRequest, RequestQueueState, StateMachine, \
      Quiescent, Canceling, RetryingRequestList, CandidateRequest, \
      require_state, delete_client_file
@@ -166,7 +165,8 @@ class UpdateContext(dict):
             self.set_cancel_time(request)
             return request
 
-        assert kind == INSERT_NORMAL or kind == INSERT_PADDED
+        assert (kind == INSERT_NORMAL or kind == INSERT_PADDED or
+                kind == INSERT_HUGE)
         pad = (kind == INSERT_PADDED)
         #print "make_edge_insert_request -- from disk: pad"
 
@@ -391,7 +391,6 @@ class InsertingGraph(StaticRequestList):
                                    + '\n')
 
         # Create minimal graph that will fit in a 32k block.
-
         assert not self.parent.ctx.version_table is None
         self.working_graph = minimal_graph(self.parent.ctx.graph,
                                            self.parent.ctx.repo,
@@ -462,6 +461,13 @@ class InsertingGraph(StaticRequestList):
 
         return (chks, updates)
 
+def should_increment(state):
+    """ INTERNAL: Returns True if the insert uri should be incremented,
+        False otherwise. """
+    level = state.parent.ctx.get('REINSERT', 0)
+    assert level >= 0 and level <= 5
+    return (level < 1 or level > 3) and level != 5
+
 class InsertingUri(StaticRequestList):
     """ A state to insert the top level URI for an Infocalypse repository
         into Freenet."""
@@ -493,7 +499,7 @@ class InsertingUri(StaticRequestList):
 
         salt = {0:0x00, 1:0xff} # grrr.... less code.
         insert_uris = make_frozen_uris(self.parent.ctx['INSERT_URI'],
-                                       self.parent.ctx.get('REINSERT', 0) < 1)
+                                       should_increment(self))
         assert len(insert_uris) < 3
         for index, uri in enumerate(insert_uris):
             if self.parent.params.get('DUMP_URIS', False):
@@ -508,7 +514,7 @@ class InsertingUri(StaticRequestList):
         if to_state.name == self.success_state:
             # Hmmm... what about chks?
             # Update the index in the insert_uri on success
-            if (self.parent.ctx.get('REINSERT', 0) < 1 and
+            if (should_increment(self) and
                 is_usk(self.parent.ctx['INSERT_URI'])):
                 version = get_version(self.parent.ctx['INSERT_URI']) + 1
                 self.parent.ctx['INSERT_URI'] = (
@@ -540,7 +546,6 @@ class RequestingUri(StaticRequestList):
     def enter(self, dummy):
         """ Implementation of State virtual. """
         #require_state(from_state, QUIESCENT)
-
         #print "REQUEST_URI:"
         #print self.parent.ctx['REQUEST_URI']
 
@@ -652,7 +657,6 @@ class InvertingUri(RequestQueueState):
         if self.insert_uri == None:
             self.insert_uri = self.parent.ctx['INSERT_URI']
         assert not self.insert_uri is None
-        #print "INVERTING: ", self.insert_uri
 
     def leave(self, to_state):
         """ Implementation of State virtual.
@@ -698,10 +702,8 @@ class InvertingUri(RequestQueueState):
 
     def request_done(self, dummy_client, msg):
         """ Implementation of RequestQueueState virtual. """
-        #print "INVERTING DONE:", msg
         self.msg = msg
         if msg[0] == 'PutSuccessful':
-            #print "REQUEST_URI: ", self.get_request_uri()
             self.parent.transition(self.success_state)
             return
         self.parent.transition(self.failure_state)
@@ -936,14 +938,15 @@ class UpdateStateMachine(RequestQueue, S
         self.get_state(INVERTING_URI).insert_uri = insert_uri
         self.transition(INVERTING_URI)
 
-    def start_reinserting(self, request_uri, insert_uri=None, is_keypair=False):
+    def start_reinserting(self, request_uri, insert_uri=None, is_keypair=False,
+                          level = 3):
         """ Start reinserting the repository"""
         self.require_state(QUIESCENT)
         self.reset()
         self.ctx['REQUEST_URI'] = request_uri
         self.ctx['INSERT_URI'] = insert_uri
         self.ctx['IS_KEYPAIR'] = is_keypair
-        self.ctx['REINSERT'] = 1
+        self.ctx['REINSERT'] = level
         # REDFLAG: added hack code to InsertingUri to handle
         # reinsert w/o insert uri?
         # Tradedoff: hacks in states vs. creating extra state
@@ -991,7 +994,6 @@ class UpdateStateMachine(RequestQueue, S
             # Clean up all upload and download files.
             delete_client_file(client)
 
-# REDFLAG: fix orphan handling to use special state iff it is the current state.
 # REDFLAG: rationalize. writing updated state into ctx vs.
 # leaving it in state instances
 # REDFLAG: audit. is_usk vs. is_usk_file