._site

(Arne Babenhauserheide)
2012-07-25: add real path folding via connect-to-target in two flavors: check tip

add real path folding via connect-to-target in two flavors: check local linkdist and simple.

diff --git a/sim.py b/sim.py
--- a/sim.py
+++ b/sim.py
@@ -4,7 +4,6 @@
 
 from random import random, choice, randint, shuffle
 import numpy as np
-import pylab as pl
 import bisect
 from copy import deepcopy
 import math
@@ -20,7 +19,7 @@ def distances(target, nodelist):
         lengths.append(dist(n,target))
     return lengths
 
-def randomnodes(num=20000):
+def randomnodes(num=2000):
     """Generate num nodes with locations between 0 and 1."""
     nodes = set()
     for n in range(num):
@@ -133,7 +132,7 @@ def findroute(target, start, net):
         if best in seen:
             possible = [node for node in net[prev] if not node in seen]
             if not possible:
-                print ("all routes already taken from", start, "to", target, "over", prev)
+                #print ("all routes already taken from", start, "to", target, "over", prev)
                 return [] # can’t reach the target from here: all routes already taken
             best = choice(possible)
         route.append(best)
@@ -147,7 +146,47 @@ def numberofconnections(net):
         numconns.append(len(n))
     return numconns
 
-def dofold(target, prev, net): 
+def smallworldbindef(numbins):
+    """Create smallworld bins."""
+    # first define the bin step: upper boundary.
+    #: [0.0005, 0.005, 0.05, 0.5]
+    return [0.5/10**((numbins-(i+1))) for i in range(numbins)]
+
+def deviationfromsmallworld(linkdistances, numbins=5):
+    """Calculate the deviation of the given local link length
+    linkdistances from a small world linkdistances.
+
+    Use logarithmic binning to represent the relevant feature correctly.
+
+    numbins should be clearly less than the number of connections
+    """
+    bindefupper = smallworldbindef(numbins)
+    # then get the total number of links
+    numlinks = len(linkdistances)
+    # and define how an ideal bin linkdistances would look
+    idealbins = [1/numbins for i in range(numbins)]
+    #idealbins = [i/sum(idealbins) for i in idealbins]
+    # now create the real bins
+    realbins = [0 for i in range(numbins)]
+    for link in linkdistances:
+        for n,upper in enumerate(bindefupper):
+            if link < upper:
+                realbins[n] += 1./numlinks
+                break
+    #print ("ideal", idealbins)
+    #print ("real", realbins)
+    #print ("def", bindefupper)
+    # the difference is the cost
+    return [abs(realbins[i] - idealbins[i]) for i in range(numbins)]
+    # diff = sum(abs(realbins[i] - idealbins[i]) for i in range(numbins)) 
+    # add additional cost for highest bin mismatch (they lose that too easily)
+    # diff += abs(realbins[-1] - idealbins[-1])
+    #print ([abs(realbins[i] - idealbins[i]) for i in range(numbins)])
+    #print("diff", diff)
+    #print()
+    # return diff
+
+def jumptotarget(target, prev, net): 
     """Actually do the switch to the target location."""
     # do not switch to exactly the target to avoid duplicate entries
     # (implementation detail)
@@ -160,50 +199,82 @@ def dofold(target, prev, net):
     for conn in connections:
         net[conn] = sorted([c for c in net[conn] if not c == prev] + [realtarget])
 
-def deviationfromsmallworld(distribution, numbins=5):
-    """Calculate the deviation of the given local link length
-    distribution from a small world distribution.
+def connecttotarget(target, prev, net):
+    """Connect prev to the target."""
+    net[prev].append(target)
+    net[target].append(prev)
 
-    Use logarithmic binning to represent the relevant feature correctly.
+def disconnectfromtarget(target, prev, net):
+    """Disconnect from the target."""
+    net[prev].remove(target)
+    net[target].remove(prev)
+    
+def checkjump(target, prev, net):
+    """Check if we should jump to the target.
 
-    numbins should be clearly less than the number of connections
-    """
-    # first define the bin step: upper boundary.
-    #: [0.0005, 0.005, 0.05, 0.5]
-    bindefupper = [0.5/10**((numbins-(i+1))) for i in range(numbins)]
-    # then get the total number of links
-    numlinks = len(distribution)
-    # and define how an ideal bin distribution would look
-    idealbins = [1/numbins for i in range(numbins)]
-    #idealbins = [i/sum(idealbins) for i in idealbins]
-    # now create the real bins
-    realbins = [0 for i in range(numbins)]
-    for link in distribution:
-        for n,upper in enumerate(bindefupper):
-            if link < upper:
-                realbins[n] += 1./numlinks
-                break
-    #print ("ideal", idealbins)
-    #print ("real", realbins)
-    #print ("def", bindefupper)
-    # the difference is the cost
-    diff = sum(abs(realbins[i] - idealbins[i]) for i in range(numbins)) 
-    # add additional cost for highest bin mismatch (they lose that too easily)
-    diff += abs(realbins[-1] - idealbins[-1])
-    #print ([abs(realbins[i] - idealbins[i]) for i in range(numbins)])
-    #print("diff", diff)
-    #print()
-    return diff
+    :param targetdeviation: deviation of the link distribution at the
+        target from a small world distribution."""
+    conns = net[prev]
+    old = distances(prev, conns)
+    new = distances(target, conns)
+    newdev = deviationfromsmallworld(new)
+    olddev = deviationfromsmallworld(old)
+    if sum(newdev) < sum(olddev):
+        jumptotarget(target, prev, net)
+        return True
 
-def checkfold(target, prev, net, probability=0.07):
+def checkconnect(target, prev, net):
+    """Check if we should connect to the target.
+
+    Check: Does it improve our local links if we replace the worst
+    connection with it."""
+    oldconns = net[prev][:]
+    olddist = distances(prev, oldconns)
+    deviation = deviationfromsmallworld(olddist, numbins=5)
+    bindefupper = smallworldbindef(numbins=5)
+    worstidx = deviation.index(max(deviation))
+    worstupper = bindefupper[worstidx]
+    if worstidx:
+        worstlower = bindefupper[worstidx-1]
+    else:
+        worstlower = 0
+    baddist = [i for i in olddist if worstlower <= i < worstupper]
+    if not baddist:
+        return # no really bad connections
+    tokill = choice(baddist)
+    newdist = [i for i in olddist if i != tokill] + [dist(prev, target)]
+    newdeviation = deviationfromsmallworld(newdist, numbins=5)
+    if sum(newdeviation) < sum(deviation):
+        connecttotarget(target, prev, net)
+        # if tokill has at least half as many connections as prev,
+        # disconnect it.
+        tokillconn = oldconns[olddist.index(tokill)]
+        enoughconnections = net[tokillconn][int(len(net[prev])/2):]
+        if enoughconnections:
+            disconnectfromtarget(tokillconn, prev, net)
+        return True
+        
+def checksimpleconnect(target, prev, net):
+    """Just connect to the target and disconnect a random node, if it
+    has enough connections."""
+    conns = net[prev][:]
+    connecttotarget(target, prev, net)
+    tokillconn = choice(conns)
+    enoughconnections = net[tokillconn][int(len(net[prev])/2):]
+    if enoughconnections:
+        disconnectfromtarget(tokillconn, prev, net)
+    return True
+
+def checkfold(target, prev, net, probability=0.07, strategy="connectsimple"):
     """switch to the target location with some probability"""
     if random() > probability:
         return
-    conns = net[prev]
-    old = distances(prev, conns)
-    new = distances(target, conns)
-    if deviationfromsmallworld(new) < deviationfromsmallworld(old):
-        dofold(target, prev, net)
+    if strategy == "jump":
+        return checkjump(target, prev, net)
+    elif strategy == "connect":
+        return checkconnect(target, prev, net)
+    elif strategy == "connectsimple":
+        return checksimpleconnect(target, prev, net)
 
 def fold(net, num=100):
     """do num path foldings.
@@ -222,7 +293,8 @@ def fold(net, num=100):
         # fold all on the route except for the start and the endpoint
         for prev in route[:-1]:
             pnet = net[prev]
-            checkfold(target, prev, net)
+            if checkfold(target, prev, net):
+                target = prev
     return routelengths
 
 def linklengths(net):
@@ -250,6 +322,8 @@ if __name__ == "__main__":
             lensnapshots[(run, i+1)] = linklengths(net), lengths
             print (np.mean(lensnapshots[(run, i+1)][0]), np.mean(lensnapshots[(run, i+1)][1]))
 
+    # now plot the data
+    import matplotlib as pl
     for key, val in sorted(lensnapshots.items()):
         run, i = key
         linklen, routelen = val