#!/usr/bin/env python3 """very simple gnutella path folding simulator. For a full check, run for size in 100 1000 10000 ; do for conn in 10 20 30 40 ; do for htl in 10 20 40 80 ; do for strategy in switch switchalways switchonesided jump jumphalf jumpgolden jumpgoldensmall connect connectsimple replacebest replacelongest; do echo ===== $size nodes $conn conn $htl htl $strategy ===== ; python3 sim.py --size $size --connections $conn --perstep 600 --strategy $strategy --maxhtl $htl -o $size-$conn-htl-$htl-$strategy.png; done; done; done; done """ from random import random, choice, randint, shuffle import numpy as np import bisect from copy import deepcopy import math def dist(loc0, loc1): """return the wraparound distance of 2 nodes in the [0,1) space.""" return min((loc0-loc1)%1, (loc1-loc0)%1) def distances(target, nodelist): """calculate the distances between the target and the given list of nodes.""" lengths = [] for n in nodelist: lengths.append(dist(n,target)) return lengths def randomnodes(num=8000): """Generate num nodes with locations between 0 and 1.""" nodes = set() for n in range(num): n = random() while n in nodes: n = random() nodes.add(n) return sorted(list(nodes)) def generateflat(nodes, connectionspernode=20): """generate a randomly connected network with the given connectionspernode.""" net = {} for node in nodes: if not node in net: net[node] = [] for n in range(connectionspernode - len(net[node])): # find connections to others we do not know yet conn = choice(nodes) while (conn == node or conn in net and ( conn in net[node] or node in net[conn] or net[conn][connectionspernode+2:])): conn = choice(nodes) net[node].append(conn) if not conn in net: net[conn] = [] net[conn].append(node) for node, conns in net.items(): net[node] == sorted(conns) return net def closetolocrandomdist(loc, nodes, lennodes): """With the assumption that our locations are in a random flat distribution, we can do a faster lookup of the location to take. Essentially this does automatic binning with the cost of some perfection. """ lo = max(0, int((loc - 10/lennodes) * lennodes)) hi = min(lennodes, int((loc + 10/lennodes) * lennodes)) return bisect.bisect_left(nodes, loc, lo=lo, hi=hi) - 1 def smallworldtargetlocations(node, connectionspernode): """Small world routing requires that the number number of connections to a certain area in the key space scales with 1/distance.""" maxdist = 0.5 dists = [maxdist / (i+1) for i in range(int(connectionspernode*100))] locs = [] for d in dists: locs.append((node + d) % 1) locs.append((node - d) % 1) shuffle(locs) return locs[:connectionspernode] def generatesmallworldunclean(nodes, connectionspernode=20): """generate an ideal small world network with the given connectionspernode.""" net = {} lennodes = len(nodes) for node in nodes: if not node in net: net[node] = [] numconn = connectionspernode - len(net[node]) for loc in smallworldtargetlocations(node, numconn): # find connections to others we do not know yet closestindex = closetolocrandomdist(loc, nodes, lennodes) conn = nodes[closestindex] while (conn == node or conn in net and ( conn in net[node] or node in net[conn] or net[conn][connectionspernode+10:])): closestindex = (closestindex + randint(int(-lennodes/connectionspernode),int(lennodes/connectionspernode-1)))%len(nodes) conn = nodes[closestindex] net[node].append(conn) if not conn in net: net[conn] = [] net[conn].append(node) # we need to make sure that all connections are sorted… for conns in net.values(): conns.sort() return net def closest(target, sortedlist): """return the node which is closest to the target.""" if not sortedlist: raise ValueError("Cannot find the closest node in an emtpy list.") dis = 1.1 for n in sortedlist: d = dist(n,target) if d < dis: best = n dis = d else: return best return best def findroute(target, start, net, maxhtl=9999): """Find the best route to the target usinggreedy routing.""" best = start seen = set() route = [] for i in range(maxhtl): if best == target: return route prev = best best = closest(target, net[prev]) # never repeat a route deterministically if best in seen: possible = [node for node in net[prev] if not node in seen] if not possible: #print ("all routes already taken from", start, "to", target, "over", prev) return [] # can’t reach the target from here: all routes already taken best = choice(possible) route.append(best) seen.add(best) return [] def numberofconnections(net): """Get the number of connections for each node.""" numconns = [] for n in net.values(): numconns.append(len(n)) return numconns def smallworldbindef(numbins): """Create smallworld bins.""" # first define the bin step: upper boundary. #: [0.0005, 0.005, 0.05, 0.5] return [0.5/10**((numbins-(i+1))) for i in range(numbins)] def deviationfromsmallworld(linkdistances, numbins=5): """Calculate the deviation of the given local link length linkdistances from a small world linkdistances. Use logarithmic binning to represent the relevant feature correctly. numbins should be clearly less than the number of connections """ bindefupper = smallworldbindef(numbins) # then get the total number of links numlinks = len(linkdistances) # and define how an ideal bin linkdistances would look idealbins = [1/numbins for i in range(numbins)] #idealbins = [i/sum(idealbins) for i in idealbins] # now create the real bins realbins = [0 for i in range(numbins)] for link in linkdistances: for n,upper in enumerate(bindefupper): if link < upper: realbins[n] += 1./numlinks break #print ("ideal", idealbins) #print ("real", realbins) #print ("def", bindefupper) # the difference is the cost return [abs(realbins[i] - idealbins[i]) for i in range(numbins)] # diff = sum(abs(realbins[i] - idealbins[i]) for i in range(numbins)) # add additional cost for highest bin mismatch (they lose that too easily) # diff += abs(realbins[-1] - idealbins[-1]) #print ([abs(realbins[i] - idealbins[i]) for i in range(numbins)]) #print("diff", diff) #print() # return diff def jumptotarget(target, prev, net): """Actually do the switch to the target location.""" # do not switch to exactly the target to avoid duplicate entries # (implementation detail) realtarget = target while realtarget in net or realtarget == prev: realtarget = (realtarget + (random()-0.5)*1.e-9) % 1 connections = net[prev][:] net[realtarget] = connections del net[prev] for conn in connections: net[conn] = sorted([c for c in net[conn] if not c == prev] + [realtarget]) def switchwithtarget(target, prev, net): """switch the location with the target.""" prevconnections = net[prev][:] targetconnections = net[target][:] net[target] = prevconnections net[prev] = targetconnections for conn in prevconnections: net[conn] = sorted([c for c in net[conn] if not c == prev] + [target]) for conn in targetconnections: net[conn] = sorted([c for c in net[conn] if not c == target] + [prev]) def connecttotarget(target, prev, net): """Connect prev to the target.""" net[prev].append(target) net[target].append(prev) def disconnectfromtarget(target, prev, net): """Disconnect from the target.""" net[prev].remove(target) net[target].remove(prev) def hasbetterlinks(target, prev, net): """Check if the target position has better links than the previous.""" conns = net[prev][:] old = distances(prev, conns) new = distances(target, conns) newdev = deviationfromsmallworld(new) olddev = deviationfromsmallworld(old) if sum(newdev) < sum(olddev): return True def checkjump(target, prev, net): """Check if we should jump to the target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" if hasbetterlinks(target, prev, net): jumptotarget(target, prev, net) return True def checkswitch(target, prev, net): """Check if we should switch positions with the target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" if hasbetterlinks(target, prev, net): if hasbetterlinks(prev, target, net): switchwithtarget(target, prev, net) return True def checkswitchalways(target, prev, net): """Always switch positions with the target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" switchwithtarget(target, prev, net) return True def checkswitchonesided(target, prev, net): """Always switch positions with the target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" if hasbetterlinks(target, prev, net): switchwithtarget(target, prev, net) return True def checkjumphalf(target, prev, net): """Check if we should jump halfway to the target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" conns = net[prev][:] wrap = dist(target, prev) + 1.e-9 < abs(target - prev) if wrap and prev < target: target = ((prev + (target-1))/2)%1 elif wrap and prev > target: target = (((prev-1) + (target))/2)%1 else: target = (prev + (target))/2 target = (prev+target)/2 old = distances(prev, conns) new = distances(target, conns) newdev = deviationfromsmallworld(new) olddev = deviationfromsmallworld(old) if sum(newdev) < sum(olddev): jumptotarget(target, prev, net) return True def checkjumpratio(target, prev, net, ratio): """Check if we should jump to the given ratio between prev and target. :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" conns = net[prev][:] # if the distance through the edge is shorter, go that way wrap = dist(target, prev) + 1.e-9 < abs(target - prev) if wrap and prev < target: target = (prev * (1-ratio) + (target-1) * ratio)%1 elif wrap and prev > target: target = ((prev-1) * (1-ratio) + target * ratio)%1 else: target = prev * (1-ratio) + target * ratio old = distances(prev, conns) new = distances(target, conns) newdev = deviationfromsmallworld(new) olddev = deviationfromsmallworld(old) if sum(newdev) < sum(olddev): jumptotarget(target, prev, net) return True def checkjumpgolden(target, prev, net, small=False): """Check if we should jump the small or large golden ratio (√5-1)/2 to the target (small=closer to the prev, not small=closer to the target). :param targetdeviation: deviation of the link distribution at the target from a small world distribution.""" golden = (math.sqrt(5)-1)/2.0 if small: return checkjumpratio(target, prev, net, 1-golden) else: return checkjumpratio(target, prev, net, golden) def checkconnect(target, prev, net): """Check if we should connect to the target. Check: Does it improve our local links if we replace the worst connection with it.""" oldconns = net[prev][:] olddist = distances(prev, oldconns) deviation = deviationfromsmallworld(olddist, numbins=5) bindefupper = smallworldbindef(numbins=5) worstidx = deviation.index(max(deviation)) worstupper = bindefupper[worstidx] if worstidx: worstlower = bindefupper[worstidx-1] else: worstlower = 0 baddist = [i for i in olddist if worstlower <= i < worstupper] if not baddist: return # no really bad connections tokill = choice(baddist) newdist = [i for i in olddist if i != tokill] + [dist(prev, target)] newdeviation = deviationfromsmallworld(newdist, numbins=5) if sum(newdeviation) < sum(deviation): connecttotarget(target, prev, net) # if tokill has at least half as many connections as prev, # disconnect it. tokillconn = oldconns[olddist.index(tokill)] enoughconnections = net[tokillconn][int(len(net[prev])/2):] if enoughconnections: disconnectfromtarget(tokillconn, prev, net) return True def checkreplacebest(target, prev, net): """Check if we should connect to the target. Check: Does it improve our local links if we replace the worst connection with it.""" oldconns = net[prev][:] olddist = distances(prev, oldconns) deviation = deviationfromsmallworld(olddist, numbins=5) bindefupper = smallworldbindef(numbins=5) bestidx = deviation.index(min(deviation)) bestupper = bindefupper[bestidx] if bestidx: bestlower = bindefupper[bestidx-1] else: bestlower = 0 gooddist = [i for i in olddist if bestlower <= i < bestupper] if not gooddist: return # no really good connections tokill = choice(gooddist) connecttotarget(target, prev, net) # if tokill has at least half as many connections as prev, # disconnect it. tokillconn = oldconns[olddist.index(tokill)] enoughconnections = net[tokillconn][int(len(net[prev])/2):] if enoughconnections: disconnectfromtarget(tokillconn, prev, net) return True def checksimpleconnect(target, prev, net): """Just connect to the target and disconnect a random node, if it has enough connections.""" conns = net[prev][:] connecttotarget(target, prev, net) tokillconn = choice(conns) enoughconnections = net[tokillconn][int(len(net[prev])/2):] if enoughconnections: disconnectfromtarget(tokillconn, prev, net) return True def checkreplacelongest(target, prev, net): """Just connect to the target and disconnect a random node, if it has enough connections.""" conns = net[prev][:] connecttotarget(target, prev, net) dist = distances(prev, conns) idx = dist.index(max(dist)) tokillconn = conns[idx] enoughconnections = net[tokillconn][int(len(net[prev])/2):] if enoughconnections: disconnectfromtarget(tokillconn, prev, net) return True def checkfold(target, prev, net, probability=0.07, strategy="jumphalf"): """switch to the target location with some probability""" if random() > probability: return if strategy == "jump": return checkjump(target, prev, net) if strategy == "switch": return checkswitch(target, prev, net) if strategy == "switchalways": return checkswitchalways(target, prev, net) if strategy == "switchonesided": return checkswitchonesided(target, prev, net) if strategy == "jumphalf": return checkjumphalf(target, prev, net) if strategy == "jumpgolden": return checkjumpgolden(target, prev, net) if strategy == "jumpgoldensmall": return checkjumpgolden(target, prev, net, small=True) if strategy == "connect": return checkconnect(target, prev, net) if strategy == "replacebest": return checkreplacebest(target, prev, net) if strategy == "replacelongest": return checkreplacelongest(target, prev, net) if strategy == "connectsimple": return checksimpleconnect(target, prev, net) def fold(net, num=100, strategy="jumphalf", maxhtl=20, fullroutes=True): """do num path foldings. :param fullroutes: Calculate the full route, regardless of maxhtl. Only use the HTL for deciding whether to try to fold the route. :return: the lengths of all used routes.""" routelengths = [] for i in range(num): nodes = list(net.keys()) start = choice(nodes) target = choice(nodes) while target == start: target = choice(nodes) if fullroutes: route = findroute(target, start, net, maxhtl=9999) else: route = findroute(target, start, net, maxhtl=maxhtl) if not route: # in case we break the route at maxhtl, we just add the # HTL+1 to be able to do the simplistic success stats # later. if not fullroutes: routelengths.append(maxhtl+1) continue routelen = len(route) routelengths.append(routelen) if routelen > maxhtl: continue # fold all on the route except for the start and the endpoint for prev in route[:-1]: pnet = net[prev] didfold = checkfold(target, prev, net, strategy=strategy) if didfold: target = prev return routelengths def linklengths(net): """calculate the lengths of all links""" lengths = [] for node, targets in net.items(): lengths.extend(distances(node, targets)) return lengths def parse_args(): from argparse import ArgumentParser parser = ArgumentParser(description="Simulate Freenet network optimization.") parser.add_argument("--strategy", help="The optimization strategy: switch switchalways switchonesided jump jumphalf jumpgolden jumpgoldensmall connect connectsimple replacebest replacelongest", default="jumphalf") parser.add_argument("--starting-config", help="The starting configurations: flat or ideal. Take two by seperating them with a comma", default="ideal,flat") parser.add_argument("--size", help="The size of the network.", default=1000, type=int) parser.add_argument("--connections", help="The mean number of connections per node.", default=20, type=int) parser.add_argument("--maxhtl", help="The maximum length of routes to be successful.", default=20, type=int) parser.add_argument("--steps", help="The maximum number of modelsteps.", default=60, type=int) parser.add_argument("--perstep", help="The number of requests to run per step.", default=100, type=int) parser.add_argument("--nofullroutes", help="Don’t calculate full routes but break at maxhtl. This gives weaker statistics but is much faster for random networks with very long routes and very low success rate", action="store_true") parser.add_argument("-o", "--output", help="Filename of the pylab plot.", default="plot.png") return parser.parse_args() if __name__ == "__main__": args = parse_args() nodes = randomnodes(args.size) lensnapshots = {} foldperstep = args.perstep runs = args.starting_config.split(",") for run in runs: if run == "ideal": net = generatesmallworldunclean(nodes, args.connections) else: net = generateflat(nodes, args.connections) lensnapshots[(run,0)] = linklengths(net), [0] print (np.mean(lensnapshots[(run,0)][0])) print("===", run, "===") routelengths = fold(net, foldperstep, args.strategy, args.maxhtl, fullroutes=not args.nofullroutes) linklens = linklengths(net) print (np.mean(linklens), np.mean(routelengths), "±", np.std(routelengths), "succ", len([r for r in routelengths if r < args.maxhtl])/len(routelengths), min(routelengths), max(routelengths), sum(deviationfromsmallworld(linklens, numbins=10))) for i in range(args.steps): routelengths = fold(net, foldperstep, args.strategy, args.maxhtl, fullroutes=not args.nofullroutes) linklens = linklengths(net) lensnapshots[(run, i+1)] = linklens, routelengths print (np.mean(linklens), np.mean(routelengths), "±", np.std(routelengths), "succ", len([r for r in routelengths if r < args.maxhtl])/len(routelengths), min(routelengths), max(routelengths), sum(deviationfromsmallworld(linklens, numbins=10))) print() # now plot the data import pylab as pl pl.rcParams['font.size'] = 5 for key, val in sorted(lensnapshots.items()): run, i = key linklen, routelen = val # only plot one in 10 results if i % 10 and i != 1: continue succ = len([r for r in routelen if r < args.maxhtl])/len(routelen) pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + "{:}".format(i*foldperstep) + ", len: " + "{:.2f}".format(np.mean(routelen)) + ", succ: " + "{:.2f}".format(succ)) pl.semilogx() pl.legend(loc="upper left") pl.title(str(args.size) + " nodes, " + str(args.connections) + " connections per node, optimized with strategy " + args.strategy + ". succ: found in max " + str(args.maxhtl) + " steps.") pl.savefig(args.output)