._site

(Arne Babenhauserheide)
2012-07-25: the implemented path folding algorithm really sucks. It actually

the implemented path folding algorithm really sucks. It actually makes an ideal network random.

diff --git a/sim.py b/sim.py
--- a/sim.py
+++ b/sim.py
@@ -2,11 +2,13 @@
 
 """very simple gnutella path folding simulator."""
 
-from random import random, choice
+from random import random, choice, randint, shuffle
 import numpy as np
 import pylab as pl
+import bisect
+from copy import deepcopy
 
-def randomnodes(num=8000):
+def randomnodes(num=1000):
     """Generate num nodes with locations between 0 and 1."""
     nodes = set()
     for n in range(num):
@@ -14,7 +16,7 @@ def randomnodes(num=8000):
        while n in nodes:
            n = random()
        nodes.add(n)
-    return list(nodes)
+    return sorted(list(nodes))
 
 def generateflat(nodes, connectionspernode=10):
     """generate a randomly connected network with the given connectionspernode."""
@@ -39,6 +41,60 @@ def generateflat(nodes, connectionsperno
         net[node] == sorted(conns)
     return net
 
+def closetolocrandomdist(loc, nodes, lennodes):
+    """With the assumption that our locations are in a random flat
+    distribution, we can do a faster lookup of the location to
+    take.
+
+    Essentially this does automatic binning with the cost of some perfection.
+    """
+    lo = max(0, int((loc - 10/lennodes)  * lennodes))
+    hi = min(lennodes, int((loc + 10/lennodes)  * lennodes))
+    return bisect.bisect_left(nodes, loc, lo=lo, hi=hi) - 1
+
+def smallworldtargetlocations(node, connectionspernode):
+    """Small world routing requires that the number number of
+    connections to a certain area in the key space scales with
+    1/distance."""
+    maxdist = 0.5
+    dists = [maxdist / (i+1) for i in range(int(connectionspernode*100))]
+    locs = []
+    for d in dists:
+        locs.append((node + d) % 1)
+        locs.append((node - d) % 1)
+    shuffle(locs)
+    return locs[:connectionspernode]
+
+def generatesmallworldunclean(nodes, connectionspernode=10):
+    """generate an ideal small world network with the given connectionspernode."""
+
+    net = {}
+    lennodes = len(nodes)
+    for node in nodes:
+        if not node in net:
+            net[node] = []
+        numconn = connectionspernode - len(net[node])
+        for loc in smallworldtargetlocations(node, numconn):
+            # find connections to others we do not know yet
+            closestindex = closetolocrandomdist(loc, nodes, lennodes)
+            conn = nodes[closestindex]
+            while (conn == node or
+                   conn in net and (
+                    conn in net[node] or
+                    node in net[conn] or
+                   net[conn][connectionspernode+10:])):
+                closestindex = (closestindex + randint(-lennodes/connectionspernode,lennodes/connectionspernode-1))%len(nodes)
+                conn = nodes[closestindex]
+            net[node].append(conn)
+            if not conn in net:
+                net[conn] = []
+            net[conn].append(node)
+
+    # we need to make sure that all connections are sorted…
+    for conns in net.values():
+        conns.sort()
+    return net
+
 def closest(target, sortedlist):
     """return the node which is closest to the target."""
     if not sortedlist:
@@ -109,17 +165,12 @@ def fold(net, num=10):
         target = choice(nodes)
         while target == start:
             target = choice(nodes)
-            
         route = findroute(target, start, net)
         routelen = len(route)
         routelengths.append(routelen)
         # fold all on the route except for the start and the endpoint
         for prev in route[1:-1]:
-            try:
-                pnet = net[prev]
-            except KeyError:
-                idx = route.index(prev)
-                print (route[idx:], prev, target, idx, prev in route[:idx])
+            pnet = net[prev]
             checkfold(target, prev, net)
     return routelengths
             
@@ -134,17 +185,21 @@ def linklengths(net):
 
 if __name__ == "__main__":
     nodes = randomnodes()
-    net = generateflat(nodes)
+    basenet = generatesmallworldunclean(nodes)
     lensnapshots = {}
-    lensnapshots[(0,0)] = linklengths(net)
-    print (np.mean(lensnapshots[(0,0)]))
-    foldperstep = 500
+    foldperstep = 50
     for run in range(2):
+        if not run: 
+            net = deepcopy(basenet)
+        else:
+            net = generateflat(nodes)
+        lensnapshots[(run,0)] = linklengths(net), [0]
+        print (np.mean(lensnapshots[(0,0)][0]))
         print("===", "run", run, "===")
         for i in range(40):
             lengths = fold(net, foldperstep)
-            lensnapshots[(run+1, i+1)] = [linklengths(net), lengths]
-            print (np.mean(lensnapshots[(run+1, i+1)][0]), np.mean(lensnapshots[(run+1, i+1)][1]))
+            lensnapshots[(run, i+1)] = linklengths(net), lengths
+            print (np.mean(lensnapshots[(run, i+1)][0]), np.mean(lensnapshots[(run, i+1)][1]))
 
     for key, val in sorted(lensnapshots.items()):
         run, i = key
@@ -152,14 +207,8 @@ if __name__ == "__main__":
         # only plot one in 10 results
         if i % 10: 
             continue
-        pdf, bins, patches = pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + str(i*foldperstep) + ", " + str(routelen))
+        pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + str(i*foldperstep) + ", " + str(np.mean(routelen)))
         pl.semilogx()
     pl.legend(loc="best")
     pl.savefig("123.png")
     
-
-
-
-
-
-