(drak)
2016-01-27: working WoT crawler working WoT crawler
diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -16,6 +16,7 @@
(rnrs io ports)
(ice-9 match)
(srfi srfi-42)
+ (srfi srfi-1)
(rnrs bytevectors)
(sxml simple)
(sxml match))
@@ -55,15 +56,21 @@
(assoc-ref h 'content-type))))))))
+(define (non-breaking-sxml-reader xml-port)
+ (catch #t
+ (lambda () (xml->sxml xml-port))
+ (lambda (key . args) (format #t "~A: ~A" key args)(newline) '())))
+
(define (snarf-wot-ids xml-port)
- (let ((uris '()))
- (let grab-uris ((sxml (xml->sxml xml-port)))
- (match sxml
- (('Identity uri) (set! uris (cons uri uris)))
- ((a b ...)
- (map grab-uris sxml))
- (else sxml)))
- uris))
+ (let ((sxml (non-breaking-sxml-reader xml-port)))
+ (let ((uris '()))
+ (let grab-uris ((sxml sxml))
+ (match sxml
+ (('Identity uri) (set! uris (cons uri uris)))
+ ((a b ...)
+ (map grab-uris sxml))
+ (else sxml)))
+ uris)))
(define (wot-uri-key uri)
(string-take uri (string-index uri #\/)))
@@ -90,8 +97,10 @@
(write (car new))(newline))
(when (not (null? known))
(display 'known:)
- (write (car known))(newline))
- (set! known (cons new known))
+ (write (car known))(newline)(write (length known))(newline))
+ (set! known (lset-union equal?
+ (list-ec (: u new) (wot-uri-key u))
+ known))
(map crawl new)))))
(define (main args)