(drak)
2016-02-10: flatten the return value of the crawled wot flatten the return value of the crawled wot
diff --git a/crawl-wot.scm b/crawl-wot.scm --- a/crawl-wot.scm +++ b/crawl-wot.scm @@ -101,43 +101,50 @@ (error (format #t "tried to save in file ~A\n" filename)))) (error (format #t "tried to save in file ~A\n" filename))))) +(define (flatten l) + "Flatten a nested list into a single list." + (cond ((null? l) '()) + ((list? l) (append (flatten (car l)) (flatten (cdr l)))) + (else (list l)))) + (define* (crawl-wot seed-id #:key (redownload #f)) - (let ((known '())) - (let crawl ((seed seed-id)) - ;; save the data - (if (catch 'misc-error - (lambda () (let* ((filename (wot-uri-filename seed)) - (dump (lambda () (dump-wot-id seed filename)))) - (if (and (not redownload) (file-exists? filename)) - (let* ((s (stat filename)) - (size (stat:size s))) - (if (= size 0) - (dump) - (format #t "Use local copy of file ~A (redownload ~A).\n" filename redownload))) - (dump)) - #f)) - (lambda (key . args) #t)) - known - ;; snarf all uris - (let ((uris (call-with-input-file (wot-uri-filename seed) snarf-wot-ids))) - ;; (write seed)(newline) - ;; (when (not (null? uris)) - ;; (write (car uris))(newline)) - (let ((new (list-ec (: u uris) (if (and - (not (pair? u)) ; TODO: this is a hack. I do not know why u can be the full sxml. Seems to happen with IDs who do not have any trust set. - (not (member (wot-uri-key u) known)))) u))) - (when (not (null? new)) - (display 'new:) - (write (car new))(newline)) - (when (not (null? known)) - (display 'known:) - (write (car known))(newline)(write (length known))(newline)) - (set! known (lset-union equal? - (list-ec (: u new) (wot-uri-key u)) - known)) - (if (null? new) - known - (append known (map crawl new))))))))) + (flatten + (let ((known '())) + (let crawl ((seed seed-id)) + ;; save the data + (if (catch 'misc-error + (lambda () (let* ((filename (wot-uri-filename seed)) + (dump (lambda () (dump-wot-id seed filename)))) + (if (and (not redownload) (file-exists? filename)) + (let* ((s (stat filename)) + (size (stat:size s))) + (if (= size 0) + (dump) + (format #t "Use local copy of file ~A (redownload ~A).\n" filename redownload))) + (dump)) + #f)) + (lambda (key . args) #t)) + known + ;; snarf all uris + (let ((uris (call-with-input-file (wot-uri-filename seed) snarf-wot-ids))) + ;; (write seed)(newline) + ;; (when (not (null? uris)) + ;; (write (car uris))(newline)) + (let ((new (list-ec (: u uris) (if (and + (not (pair? u)) ; TODO: this is a hack. I do not know why u can be the full sxml. Seems to happen with IDs who do not have any trust set. + (not (member (wot-uri-key u) known)))) u))) + (when (not (null? new)) + (display 'new:) + (write (car new))(newline)) + (when (not (null? known)) + (display 'known:) + (write (car known))(newline)(write (length known))(newline)) + (set! known (lset-union equal? + (list-ec (: u new) (wot-uri-key u)) + known)) + (if (null? new) + known + (append known (map crawl new)))))))))) (define (parse-datehint str) (let ((lines (string-split str #\newline))) @@ -209,5 +216,5 @@ seed-id (string-append "USK" (string-drop seed-id 3) "/WebOfTrust/0")))) (write (download-by-date-hint seed)) - (map download-by-date-hint - (crawl-wot seed))))) + (par-map download-by-date-hint + (crawl-wot seed)))))