Guile Freenet
 
(drak)
2016-02-12: anonymization script.

anonymization script.

diff --git a/anonymize-csv.scm b/anonymize-csv.scm
new file mode 100755
--- /dev/null
+++ b/anonymize-csv.scm
@@ -0,0 +1,107 @@
+#!/bin/sh
+# -*- scheme -*-
+exec guile -e main -s "$0" "$@"
+!#
+
+;; Double-Anonymize the trust.csv by replacing keys with
+;; index-numbers.  This prevents results from evaluations of the trust
+;; graph from being applied directly to correlation attacks on Freenet
+;; users.
+
+;; TODO: use vhashes instead of regular hash tables.
+
+(use-modules (ice-9 rdelim)
+             (ice-9 i18n)
+             (srfi srfi-69) ; hash tables
+             (srfi srfi-1) ; first, second, third
+             )
+
+
+(define (set-add table . elements)
+  (let add ((elements elements))
+    (cond
+     ((null? elements)
+      table)
+     (else
+      (hash-table-set! table (car elements) #t)
+      (add (cdr elements))))))
+
+(define (set-keys table)
+  (hash-table-keys table))
+
+(define (set-size table)
+  (hash-table-size table))
+
+(define (set->list-sorted table)
+  (sort-list (set-keys table) string<?))
+
+(define (make-set)
+  (make-hash-table))
+
+(define (get-ids port)
+  (let collect-ids ((ids (make-set)))
+    (let ((line (read-line port)))
+      (cond
+       ((eof-object? line)
+        (set->list-sorted ids))
+       (else
+          (let* ((columns (string-split line #\;))
+                 (source (first columns))
+                 (target (second columns)))
+            (collect-ids (set-add ids source target))))))))
+
+
+(define (index-ids-fun ids)
+  (let ((id-to-index (make-hash-table)))
+    (let fill-table ((ids ids)
+                     (index 0))
+      (cond ((null? ids)
+             id-to-index)
+            (else
+             (hash-table-set! id-to-index (car ids) index)
+             (fill-table (cdr ids)
+                         (+ 1 index)))))
+    (lambda (id) (hash-table-ref id-to-index id))))
+
+(define (check-csv-header port)
+  (let ((header (read-line port))
+        (required-header-lowercase "source;target"))
+    
+    (when (not (string-prefix? required-header-lowercase (string-locale-downcase header)))
+      (error (format #f "input file must have header '~A' (regardless of case) but has header '~A'" required-header-lowercase header)))))
+
+(define (index-ids-from-file port)
+  (check-csv-header port)
+  (let* ((ids (get-ids port))
+         (id->index (index-ids-fun ids)))
+    id->index))
+    
+
+(define (anonymize-ids id->index inport outport)
+  (check-csv-header inport)
+  (format outport "Source;Target;Weight\n")
+  (let anonymize ((line (read-line inport)))
+    (cond
+     ((eof-object? line) #t)
+     (else
+      (let* ((columns (string-split line #\;))
+             (source (id->index (first columns)))
+             (target (id->index (second columns)))
+             (weight (third columns)))
+        (format outport "~A;~A;~A\n" source target weight))
+      (anonymize (read-line inport))))))
+
+
+(define (main args)
+  (let ((infile (if (null? (cdr args))
+                    "trust.csv"
+                    (second args)))
+        (outfile (if (or (null? (cdr args)) (null? (cdr (cdr args))))
+                     "trust-anonymized.csv"
+                     (third args))))
+    (let ((id->index (call-with-input-file infile index-ids-from-file))
+          (inport (open-input-file infile))
+          (outport (open-output-file outfile)))
+      (anonymize-ids id->index inport outport)
+      (close-port inport)
+      (close-port outport))))