the implemented path folding algorithm really sucks. It actually makes an ideal network random.
diff --git a/sim.py b/sim.py --- a/sim.py +++ b/sim.py @@ -2,11 +2,13 @@ """very simple gnutella path folding simulator.""" -from random import random, choice +from random import random, choice, randint, shuffle import numpy as np import pylab as pl +import bisect +from copy import deepcopy -def randomnodes(num=8000): +def randomnodes(num=1000): """Generate num nodes with locations between 0 and 1.""" nodes = set() for n in range(num): @@ -14,7 +16,7 @@ def randomnodes(num=8000): while n in nodes: n = random() nodes.add(n) - return list(nodes) + return sorted(list(nodes)) def generateflat(nodes, connectionspernode=10): """generate a randomly connected network with the given connectionspernode.""" @@ -39,6 +41,60 @@ def generateflat(nodes, connectionsperno net[node] == sorted(conns) return net +def closetolocrandomdist(loc, nodes, lennodes): + """With the assumption that our locations are in a random flat + distribution, we can do a faster lookup of the location to + take. + + Essentially this does automatic binning with the cost of some perfection. + """ + lo = max(0, int((loc - 10/lennodes) * lennodes)) + hi = min(lennodes, int((loc + 10/lennodes) * lennodes)) + return bisect.bisect_left(nodes, loc, lo=lo, hi=hi) - 1 + +def smallworldtargetlocations(node, connectionspernode): + """Small world routing requires that the number number of + connections to a certain area in the key space scales with + 1/distance.""" + maxdist = 0.5 + dists = [maxdist / (i+1) for i in range(int(connectionspernode*100))] + locs = [] + for d in dists: + locs.append((node + d) % 1) + locs.append((node - d) % 1) + shuffle(locs) + return locs[:connectionspernode] + +def generatesmallworldunclean(nodes, connectionspernode=10): + """generate an ideal small world network with the given connectionspernode.""" + + net = {} + lennodes = len(nodes) + for node in nodes: + if not node in net: + net[node] = [] + numconn = connectionspernode - len(net[node]) + for loc in smallworldtargetlocations(node, numconn): + # find connections to others we do not know yet + closestindex = closetolocrandomdist(loc, nodes, lennodes) + conn = nodes[closestindex] + while (conn == node or + conn in net and ( + conn in net[node] or + node in net[conn] or + net[conn][connectionspernode+10:])): + closestindex = (closestindex + randint(-lennodes/connectionspernode,lennodes/connectionspernode-1))%len(nodes) + conn = nodes[closestindex] + net[node].append(conn) + if not conn in net: + net[conn] = [] + net[conn].append(node) + + # we need to make sure that all connections are sorted… + for conns in net.values(): + conns.sort() + return net + def closest(target, sortedlist): """return the node which is closest to the target.""" if not sortedlist: @@ -109,17 +165,12 @@ def fold(net, num=10): target = choice(nodes) while target == start: target = choice(nodes) - route = findroute(target, start, net) routelen = len(route) routelengths.append(routelen) # fold all on the route except for the start and the endpoint for prev in route[1:-1]: - try: - pnet = net[prev] - except KeyError: - idx = route.index(prev) - print (route[idx:], prev, target, idx, prev in route[:idx]) + pnet = net[prev] checkfold(target, prev, net) return routelengths @@ -134,17 +185,21 @@ def linklengths(net): if __name__ == "__main__": nodes = randomnodes() - net = generateflat(nodes) + basenet = generatesmallworldunclean(nodes) lensnapshots = {} - lensnapshots[(0,0)] = linklengths(net) - print (np.mean(lensnapshots[(0,0)])) - foldperstep = 500 + foldperstep = 50 for run in range(2): + if not run: + net = deepcopy(basenet) + else: + net = generateflat(nodes) + lensnapshots[(run,0)] = linklengths(net), [0] + print (np.mean(lensnapshots[(0,0)][0])) print("===", "run", run, "===") for i in range(40): lengths = fold(net, foldperstep) - lensnapshots[(run+1, i+1)] = [linklengths(net), lengths] - print (np.mean(lensnapshots[(run+1, i+1)][0]), np.mean(lensnapshots[(run+1, i+1)][1])) + lensnapshots[(run, i+1)] = linklengths(net), lengths + print (np.mean(lensnapshots[(run, i+1)][0]), np.mean(lensnapshots[(run, i+1)][1])) for key, val in sorted(lensnapshots.items()): run, i = key @@ -152,14 +207,8 @@ if __name__ == "__main__": # only plot one in 10 results if i % 10: continue - pdf, bins, patches = pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + str(i*foldperstep) + ", " + str(routelen)) + pl.hist(linklen, 10000, cumulative=True, normed=True, histtype='step', label=str(run) + ", " + str(i*foldperstep) + ", " + str(np.mean(routelen))) pl.semilogx() pl.legend(loc="best") pl.savefig("123.png") - - - - - -