#!/bin/sh # -*- scheme -*- exec guile -e main -s "$0" "$@" !# ;; Double-Anonymize the trust.csv by replacing keys with ;; index-numbers. This prevents results from evaluations of the trust ;; graph from being applied directly to correlation attacks on Freenet ;; users. ;; TODO: use vhashes instead of regular hash tables. (use-modules (ice-9 rdelim) (ice-9 i18n) (srfi srfi-69) ; hash tables (srfi srfi-1) ; first, second, third ) (define (set-add table . elements) (let add ((elements elements)) (cond ((null? elements) table) (else (hash-table-set! table (car elements) #t) (add (cdr elements)))))) (define (set-keys table) (hash-table-keys table)) (define (set-size table) (hash-table-size table)) (define (set->list-sorted table) (sort-list (set-keys table) string<?)) (define (make-set) (make-hash-table)) (define (get-ids port) (let collect-ids ((ids (make-set))) (let ((line (read-line port))) (cond ((eof-object? line) (set->list-sorted ids)) (else (let* ((columns (string-split line #\;)) (source (first columns)) (target (second columns))) (collect-ids (set-add ids source target)))))))) (define (index-ids-fun ids) (let ((id-to-index (make-hash-table))) (let fill-table ((ids ids) (index 0)) (cond ((null? ids) id-to-index) (else (hash-table-set! id-to-index (car ids) index) (fill-table (cdr ids) (+ 1 index))))) (lambda (id) (hash-table-ref id-to-index id)))) (define (check-csv-header port) (let ((header (read-line port)) (required-header-lowercase "source;target")) (when (not (string-prefix? required-header-lowercase (string-locale-downcase header))) (error (format #f "input file must have header '~A' (regardless of case) but has header '~A'" required-header-lowercase header))))) (define (index-ids-from-file port) (check-csv-header port) (let* ((ids (get-ids port)) (id->index (index-ids-fun ids))) id->index)) (define (anonymize-ids id->index inport outport) (check-csv-header inport) (format outport "Source;Target;Weight\n") (let anonymize ((line (read-line inport))) (cond ((eof-object? line) #t) (else (let* ((columns (string-split line #\;)) (source (id->index (first columns))) (target (id->index (second columns))) (weight (third columns))) (format outport "~A;~A;~A\n" source target weight)) (anonymize (read-line inport)))))) (define (main args) (let ((infile (if (null? (cdr args)) "trust-deduplicated.csv" (second args))) (outfile (if (or (null? (cdr args)) (null? (cdr (cdr args)))) "trust-anonymized.csv" (third args)))) (let ((id->index (call-with-input-file infile index-ids-from-file)) (inport (open-input-file infile)) (outport (open-output-file outfile))) (anonymize-ids id->index inport outport) (close-port inport) (close-port outport))))