(drak)
2016-02-10: flatten the return value of the crawled wot flatten the return value of the crawled wot
diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -101,43 +101,50 @@
(error (format #t "tried to save in file ~A\n" filename))))
(error (format #t "tried to save in file ~A\n" filename)))))
+(define (flatten l)
+ "Flatten a nested list into a single list."
+ (cond ((null? l) '())
+ ((list? l) (append (flatten (car l)) (flatten (cdr l))))
+ (else (list l))))
+
(define* (crawl-wot seed-id #:key (redownload #f))
- (let ((known '()))
- (let crawl ((seed seed-id))
- ;; save the data
- (if (catch 'misc-error
- (lambda () (let* ((filename (wot-uri-filename seed))
- (dump (lambda () (dump-wot-id seed filename))))
- (if (and (not redownload) (file-exists? filename))
- (let* ((s (stat filename))
- (size (stat:size s)))
- (if (= size 0)
- (dump)
- (format #t "Use local copy of file ~A (redownload ~A).\n" filename redownload)))
- (dump))
- #f))
- (lambda (key . args) #t))
- known
- ;; snarf all uris
- (let ((uris (call-with-input-file (wot-uri-filename seed) snarf-wot-ids)))
- ;; (write seed)(newline)
- ;; (when (not (null? uris))
- ;; (write (car uris))(newline))
- (let ((new (list-ec (: u uris) (if (and
- (not (pair? u)) ; TODO: this is a hack. I do not know why u can be the full sxml. Seems to happen with IDs who do not have any trust set.
- (not (member (wot-uri-key u) known)))) u)))
- (when (not (null? new))
- (display 'new:)
- (write (car new))(newline))
- (when (not (null? known))
- (display 'known:)
- (write (car known))(newline)(write (length known))(newline))
- (set! known (lset-union equal?
- (list-ec (: u new) (wot-uri-key u))
- known))
- (if (null? new)
- known
- (append known (map crawl new)))))))))
+ (flatten
+ (let ((known '()))
+ (let crawl ((seed seed-id))
+ ;; save the data
+ (if (catch 'misc-error
+ (lambda () (let* ((filename (wot-uri-filename seed))
+ (dump (lambda () (dump-wot-id seed filename))))
+ (if (and (not redownload) (file-exists? filename))
+ (let* ((s (stat filename))
+ (size (stat:size s)))
+ (if (= size 0)
+ (dump)
+ (format #t "Use local copy of file ~A (redownload ~A).\n" filename redownload)))
+ (dump))
+ #f))
+ (lambda (key . args) #t))
+ known
+ ;; snarf all uris
+ (let ((uris (call-with-input-file (wot-uri-filename seed) snarf-wot-ids)))
+ ;; (write seed)(newline)
+ ;; (when (not (null? uris))
+ ;; (write (car uris))(newline))
+ (let ((new (list-ec (: u uris) (if (and
+ (not (pair? u)) ; TODO: this is a hack. I do not know why u can be the full sxml. Seems to happen with IDs who do not have any trust set.
+ (not (member (wot-uri-key u) known)))) u)))
+ (when (not (null? new))
+ (display 'new:)
+ (write (car new))(newline))
+ (when (not (null? known))
+ (display 'known:)
+ (write (car known))(newline)(write (length known))(newline))
+ (set! known (lset-union equal?
+ (list-ec (: u new) (wot-uri-key u))
+ known))
+ (if (null? new)
+ known
+ (append known (map crawl new))))))))))
(define (parse-datehint str)
(let ((lines (string-split str #\newline)))
@@ -209,5 +216,5 @@
seed-id
(string-append "USK" (string-drop seed-id 3) "/WebOfTrust/0"))))
(write (download-by-date-hint seed))
- (map download-by-date-hint
- (crawl-wot seed)))))
+ (par-map download-by-date-hint
+ (crawl-wot seed)))))