#!/usr/bin/env python3
"""very simple gnutella path folding simulator."""
from random import random, choice, randint, shuffle
import numpy as np
import bisect
from copy import deepcopy
import math
def dist(loc0, loc1):
"""return the wraparound distance of 2 nodes in the [0,1) space."""
return min((loc0-loc1)%1, (loc1-loc0)%1)
def distances(target, nodelist):
"""calculate the distances between the target and the given list of nodes."""
lengths = []
for n in nodelist:
lengths.append(dist(n,target))
return lengths
def randomnodes(num=8000):
"""Generate num nodes with locations between 0 and 1."""
nodes = set()
for n in range(num):
n = random()
while n in nodes:
n = random()
nodes.add(n)
return sorted(list(nodes))
def generateflat(nodes, connectionspernode=20):
"""generate a randomly connected network with the given connectionspernode."""
net = {}
for node in nodes:
if not node in net:
net[node] = []
for n in range(connectionspernode - len(net[node])):
# find connections to others we do not know yet
conn = choice(nodes)
while (conn == node or
conn in net and (
conn in net[node] or
node in net[conn] or
net[conn][connectionspernode+2:])):
conn = choice(nodes)
net[node].append(conn)
if not conn in net:
net[conn] = []
net[conn].append(node)
for node, conns in net.items():
net[node] == sorted(conns)
return net
def closetolocrandomdist(loc, nodes, lennodes):
"""With the assumption that our locations are in a random flat
distribution, we can do a faster lookup of the location to
take.
Essentially this does automatic binning with the cost of some perfection.
"""
lo = max(0, int((loc - 10/lennodes) * lennodes))
hi = min(lennodes, int((loc + 10/lennodes) * lennodes))
return bisect.bisect_left(nodes, loc, lo=lo, hi=hi) - 1
def smallworldtargetlocations(node, connectionspernode):
"""Small world routing requires that the number number of
connections to a certain area in the key space scales with
1/distance."""
maxdist = 0.5
dists = [maxdist / (i+1) for i in range(int(connectionspernode*100))]
locs = []
for d in dists:
locs.append((node + d) % 1)
locs.append((node - d) % 1)
shuffle(locs)
return locs[:connectionspernode]
def generatesmallworldunclean(nodes, connectionspernode=20):
"""generate an ideal small world network with the given connectionspernode."""
net = {}
lennodes = len(nodes)
for node in nodes:
if not node in net:
net[node] = []
numconn = connectionspernode - len(net[node])
for loc in smallworldtargetlocations(node, numconn):
# find connections to others we do not know yet
closestindex = closetolocrandomdist(loc, nodes, lennodes)
conn = nodes[closestindex]
while (conn == node or
conn in net and (
conn in net[node] or
node in net[conn] or
net[conn][connectionspernode+10:])):
closestindex = (closestindex + randint(int(-lennodes/connectionspernode),int(lennodes/connectionspernode-1)))%len(nodes)
conn = nodes[closestindex]
net[node].append(conn)
if not conn in net:
net[conn] = []
net[conn].append(node)
# we need to make sure that all connections are sorted…
for conns in net.values():
conns.sort()
return net
def closest(target, sortedlist):
"""return the node which is closest to the target."""
if not sortedlist:
raise ValueError("Cannot find the closest node in an emtpy list.")
dis = 1.1
for n in sortedlist:
d = dist(n,target)
if d < dis:
best = n
dis = d
else:
return best
return best
def findroute(target, start, net, maxhtl=9999):
"""Find the best route to the target usinggreedy routing."""
best = start
seen = set()
route = []
for i in range(maxhtl):
if best == target:
return route
prev = best
best = closest(target, net[prev])
# never repeat a route deterministically
if best in seen:
possible = [node for node in net[prev] if not node in seen]
if not possible:
#print ("all routes already taken from", start, "to", target, "over", prev)
return [] # can’t reach the target from here: all routes already taken
best = choice(possible)
route.append(best)
seen.add(best)
return []
def numberofconnections(net):
"""Get the number of connections for each node."""
numconns = []
for n in net.values():
numconns.append(len(n))
return numconns
def smallworldbindef(numbins):
"""Create smallworld bins."""
# first define the bin step: upper boundary.
#: [0.0005, 0.005, 0.05, 0.5]
return [0.5/10**((numbins-(i+1))) for i in range(numbins)]
def deviationfromsmallworld(linkdistances, numbins=5):
"""Calculate the deviation of the given local link length
linkdistances from a small world linkdistances.
Use logarithmic binning to represent the relevant feature correctly.
numbins should be clearly less than the number of connections
"""
bindefupper = smallworldbindef(numbins)
# then get the total number of links
numlinks = len(linkdistances)
# and define how an ideal bin linkdistances would look
idealbins = [1/numbins for i in range(numbins)]
#idealbins = [i/sum(idealbins) for i in idealbins]
# now create the real bins
realbins = [0 for i in range(numbins)]
for link in linkdistances:
for n,upper in enumerate(bindefupper):
if link < upper:
realbins[n] += 1./numlinks
break
#print ("ideal", idealbins)
#print ("real", realbins)
#print ("def", bindefupper)
# the difference is the cost
return [abs(realbins[i] - idealbins[i]) for i in range(numbins)]
# diff = sum(abs(realbins[i] - idealbins[i]) for i in range(numbins))
# add additional cost for highest bin mismatch (they lose that too easily)
# diff += abs(realbins[-1] - idealbins[-1])
#print ([abs(realbins[i] - idealbins[i]) for i in range(numbins)])
#print("diff", diff)
#print()
# return diff
def jumptotarget(target, prev, net):
"""Actually do the switch to the target location."""
# do not switch to exactly the target to avoid duplicate entries
# (implementation detail)
realtarget = target
while realtarget in net or realtarget == prev:
realtarget = (realtarget + (random()-0.5)*1.e-9) % 1
connections = net[prev][:]
net[realtarget] = connections
del net[prev]
for conn in connections:
net[conn] = sorted([c for c in net[conn] if not c == prev] + [realtarget])
def switchwithtarget(target, prev, net):
"""switch the location with the target."""
prevconnections = net[prev][:]
targetconnections = net[target][:]
net[target] = prevconnections
net[prev] = targetconnections
for conn in prevconnections:
net[conn] = sorted([c for c in net[conn] if not c == prev] + [target])
for conn in targetconnections:
net[conn] = sorted([c for c in net[conn] if not c == target] + [prev])
def connecttotarget(target, prev, net):
"""Connect prev to the target."""
net[prev].append(target)
net[target].append(prev)
def disconnectfromtarget(target, prev, net):
"""Disconnect from the target."""
net[prev].remove(target)
net[target].remove(prev)
def hasbetterlinks(target, prev, net):
"""Check if the target position has better links than the
previous."""
conns = net[prev][:]
old = distances(prev, conns)
new = distances(target, conns)
newdev = deviationfromsmallworld(new)
olddev = deviationfromsmallworld(old)
if sum(newdev) < sum(olddev):
return True
def checkjump(target, prev, net):
"""Check if we should jump to the target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
if hasbetterlinks(target, prev, net):
jumptotarget(target, prev, net)
return True
def checkswitch(target, prev, net):
"""Check if we should switch positions with the target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
if hasbetterlinks(target, prev, net):
if hasbetterlinks(prev, target, net):
switchwithtarget(target, prev, net)
return True
def checkswitchalways(target, prev, net):
"""Always switch positions with the target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
switchwithtarget(target, prev, net)
return True
def checkswitchonesided(target, prev, net):
"""Always switch positions with the target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
if hasbetterlinks(target, prev, net):
switchwithtarget(target, prev, net)
return True
def checkjumphalf(target, prev, net):
"""Check if we should jump halfway to the target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
conns = net[prev][:]
wrap = dist(target, prev) + 1.e-9 < abs(target - prev)
if wrap and prev < target:
target = ((prev + (target-1))/2)%1
elif wrap and prev > target:
target = (((prev-1) + (target))/2)%1
else:
target = (prev + (target))/2
target = (prev+target)/2
old = distances(prev, conns)
new = distances(target, conns)
newdev = deviationfromsmallworld(new)
olddev = deviationfromsmallworld(old)
if sum(newdev) < sum(olddev):
jumptotarget(target, prev, net)
return True
def checkjumpratio(target, prev, net, ratio):
"""Check if we should jump to the given ratio between prev and
target.
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
conns = net[prev][:]
# if the distance through the edge is shorter, go that way
wrap = dist(target, prev) + 1.e-9 < abs(target - prev)
if wrap and prev < target:
target = (prev * (1-ratio) + (target-1) * ratio)%1
elif wrap and prev > target:
target = ((prev-1) * (1-ratio) + target * ratio)%1
else:
target = prev * (1-ratio) + target * ratio
old = distances(prev, conns)
new = distances(target, conns)
newdev = deviationfromsmallworld(new)
olddev = deviationfromsmallworld(old)
if sum(newdev) < sum(olddev):
jumptotarget(target, prev, net)
return True
def checkjumpgolden(target, prev, net, small=False):
"""Check if we should jump the small or large golden ratio
(√5-1)/2 to the target (small=closer to the prev, not small=closer
to the target).
:param targetdeviation: deviation of the link distribution at the
target from a small world distribution."""
golden = (math.sqrt(5)-1)/2.0
if small:
return checkjumpratio(target, prev, net, 1-golden)
else:
return checkjumpratio(target, prev, net, golden)
def checkconnect(target, prev, net):
"""Check if we should connect to the target.
Check: Does it improve our local links if we replace the worst
connection with it."""
oldconns = net[prev][:]
olddist = distances(prev, oldconns)
deviation = deviationfromsmallworld(olddist, numbins=5)
bindefupper = smallworldbindef(numbins=5)
worstidx = deviation.index(max(deviation))
worstupper = bindefupper[worstidx]
if worstidx:
worstlower = bindefupper[worstidx-1]
else:
worstlower = 0
baddist = [i for i in olddist if worstlower <= i < worstupper]
if not baddist:
return # no really bad connections
tokill = choice(baddist)
newdist = [i for i in olddist if i != tokill] + [dist(prev, target)]
newdeviation = deviationfromsmallworld(newdist, numbins=5)
if sum(newdeviation) < sum(deviation):
connecttotarget(target, prev, net)
# if tokill has at least half as many connections as prev,
# disconnect it.
tokillconn = oldconns[olddist.index(tokill)]
enoughconnections = net[tokillconn][int(len(net[prev])/2):]
if enoughconnections:
disconnectfromtarget(tokillconn, prev, net)
return True
def checkreplacebest(target, prev, net):
"""Check if we should connect to the target.
Check: Does it improve our local links if we replace the worst
connection with it."""
oldconns = net[prev][:]
olddist = distances(prev, oldconns)
deviation = deviationfromsmallworld(olddist, numbins=5)
bindefupper = smallworldbindef(numbins=5)
bestidx = deviation.index(min(deviation))
bestupper = bindefupper[bestidx]
if bestidx:
bestlower = bindefupper[bestidx-1]
else:
bestlower = 0
gooddist = [i for i in olddist if bestlower <= i < bestupper]
if not gooddist:
return # no really good connections
tokill = choice(gooddist)
connecttotarget(target, prev, net)
# if tokill has at least half as many connections as prev,
# disconnect it.
tokillconn = oldconns[olddist.index(tokill)]
enoughconnections = net[tokillconn][int(len(net[prev])/2):]
if enoughconnections:
disconnectfromtarget(tokillconn, prev, net)
return True
def checksimpleconnect(target, prev, net):
"""Just connect to the target and disconnect a random node, if it
has enough connections."""
conns = net[prev][:]
connecttotarget(target, prev, net)
tokillconn = choice(conns)
enoughconnections = net[tokillconn][int(len(net[prev])/2):]
if enoughconnections:
disconnectfromtarget(tokillconn, prev, net)
return True
def checkreplacelongest(target, prev, net):
"""Just connect to the target and disconnect a random node, if it
has enough connections."""
conns = net[prev][:]
connecttotarget(target, prev, net)
dist = distances(prev, conns)
idx = dist.index(max(dist))
tokillconn = conns[idx]
enoughconnections = net[tokillconn][int(len(net[prev])/2):]
if enoughconnections:
disconnectfromtarget(tokillconn, prev, net)
return True
def checkfold(target, prev, net, probability=0.07, strategy="jumphalf"):
"""switch to the target location with some probability"""
if random() > probability:
return
if strategy == "jump":
return checkjump(target, prev, net)
if strategy == "switch":
return checkswitch(target, prev, net)
if strategy == "switchalways":
return checkswitchalways(target, prev, net)
if strategy == "switchonesided":
return checkswitchonesided(target, prev, net)
if strategy == "jumphalf":
return checkjumphalf(target, prev, net)
if strategy == "jumpgolden":
return checkjumpgolden(target, prev, net)
if strategy == "jumpgoldensmall":
return checkjumpgolden(target, prev, net, small=True)
if strategy == "connect":
return checkconnect(target, prev, net)
if strategy == "replacebest":
return checkreplacebest(target, prev, net)
if strategy == "replacelongest":
return checkreplacelongest(target, prev, net)
if strategy == "connectsimple":
return checksimpleconnect(target, prev, net)
def fold(net, num=100, strategy="jumphalf", maxhtl=20):
"""do num path foldings.
:return: the lengths of all used routes."""
routelengths = []
for i in range(num):
nodes = list(net.keys())
start = choice(nodes)
target = choice(nodes)
while target == start:
target = choice(nodes)
route = findroute(target, start, net)
if not route:
continue
routelen = len(route)
routelengths.append(routelen)
if routelen > maxhtl:
continue
# fold all on the route except for the start and the endpoint
for prev in route[:-1]:
pnet = net[prev]
didfold = checkfold(target, prev, net, strategy=strategy)
if didfold:
target = prev
return routelengths
def linklengths(net):
"""calculate the lengths of all links"""
lengths = []
for node, targets in net.items():
lengths.extend(distances(node, targets))
return lengths
def parse_args():
from argparse import ArgumentParser
parser = ArgumentParser(description="Simulate Freenet network optimization.")
parser.add_argument("--strategy", help="The optimization strategy: switch switchalways switchonesided jump jumphalf jumpgolden jumpgoldensmall connect connectsimple replacebest replacelongest", default="jumphalf")
parser.add_argument("--size", help="The size of the network.", default=1000, type=int)
parser.add_argument("--connections", help="The mean number of connections per node.", default=20, type=int)
parser.add_argument("--maxhtl", help="The maximum length of routes to be successful.", default=20, type=int)
parser.add_argument("--steps", help="The maximum number of modelsteps.", default=60, type=int)
parser.add_argument("--perstep", help="The number of requests to run per step.", default=100, type=int)
parser.add_argument("-o", "--output", help="Filename of the pylab plot.", default="plot.png")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
nodes = randomnodes(args.size)
lensnapshots = {}
foldperstep = args.perstep
runs = ["ideal", "flat"]
for run in runs:
if run == "ideal":
net = generatesmallworldunclean(nodes, args.connections)
else:
net = generateflat(nodes, args.connections)
lensnapshots[(run,0)] = linklengths(net), [0]
print (np.mean(lensnapshots[(run,0)][0]))
print("===", run, "===")
routelengths = fold(net, 20)
linklens = linklengths(net)
print (np.mean(linklens), np.mean(routelengths), "±", np.std(routelengths), "succ", len([r for r in routelengths if r < args.maxhtl])/len(routelengths), min(routelengths), max(routelengths), sum(deviationfromsmallworld(linklens, numbins=10)))
for i in range(args.steps):
routelengths = fold(net, foldperstep, args.strategy, args.maxhtl)
linklens = linklengths(net)
lensnapshots[(run, i+1)] = linklens, routelengths
print (np.mean(linklens), np.mean(routelengths), "±", np.std(routelengths), "succ", len([r for r in routelengths if r < args.maxhtl])/len(routelengths),
min(routelengths), max(routelengths), sum(deviationfromsmallworld(linklens, numbins=10)))
# now plot the data
import pylab as pl
pl.rcParams['font.size'] = 5
for key, val in sorted(lensnapshots.items()):
run, i = key
linklen, routelen = val
# only plot one in 10 results
if i % 10 and i != 1:
continue
succ = len([r for r in routelen if r < args.maxhtl])/len(routelen)
pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + "{:}".format(i*foldperstep) + ", len: " + "{:.2f}".format(np.mean(routelen)) + ", succ: " + "{:.2f}".format(succ))
pl.semilogx()
pl.legend(loc="upper left")
pl.title(str(args.size) + " nodes, " + str(args.connections) + " connections per node, optimized with strategy " + args.strategy + ". succ: found in max " + str(args.maxhtl) + " steps.")
pl.savefig(args.output)