Guile Freenet
 
(drak)
2016-01-27: working WoT crawler

working WoT crawler

diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -16,6 +16,7 @@
              (rnrs io ports)
              (ice-9 match)
              (srfi srfi-42)
+             (srfi srfi-1)
              (rnrs bytevectors)
              (sxml simple)
              (sxml match))
@@ -55,15 +56,21 @@
               (assoc-ref h 'content-type))))))))
 
 
+(define (non-breaking-sxml-reader xml-port)
+  (catch #t
+         (lambda () (xml->sxml xml-port))
+         (lambda (key . args) (format #t "~A: ~A" key args)(newline) '())))
+
 (define (snarf-wot-ids xml-port)
-  (let ((uris '()))
-    (let grab-uris ((sxml (xml->sxml xml-port)))
-      (match sxml
-             (('Identity uri) (set! uris (cons uri uris)))
-             ((a b ...)
-              (map grab-uris sxml))
-             (else sxml)))
-    uris))
+  (let ((sxml (non-breaking-sxml-reader xml-port)))
+    (let ((uris '()))
+      (let grab-uris ((sxml sxml))
+        (match sxml
+               (('Identity uri) (set! uris (cons uri uris)))
+               ((a b ...)
+                (map grab-uris sxml))
+               (else sxml)))
+      uris)))
 
 (define (wot-uri-key uri)
   (string-take uri (string-index uri #\/)))
@@ -90,8 +97,10 @@
               (write (car new))(newline))
         (when (not (null? known))
               (display 'known:)
-              (write (car known))(newline))
-        (set! known (cons new known))
+              (write (car known))(newline)(write (length known))(newline))
+        (set! known (lset-union equal?
+                     (list-ec (: u new) (wot-uri-key u))
+                     known))
         (map crawl new)))))
 
 (define (main args)