(drak)
2016-02-16: avoid weeks earlier than the date in the yearly date hint avoid weeks earlier than the date in the yearly date hint
diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -142,9 +142,9 @@
(set! known (lset-union equal?
(list-ec (: u new) (wot-uri-key u))
known))
- (if (null? new)
+ (if (or (null? new) (> (length known) 10))
known
- (append known (map crawl new)))))))))
+ (lset-union equal? known (map crawl new)))))))))
(define (parse-datehint str)
(let ((lines (string-split str #\newline)))
@@ -194,17 +194,23 @@
;; see http://draketo.de/light/english/freenet/usk-and-date-hints
;; Approach: First check whether the ID has a date hint for each year. Then check each weak in the matching years.
;; download the versions into directories ordered as YEAR-month-day/SSK@...-WebOfTrust-version
- (let ((years (iota 10 2016 -1))
- (weeks (iota 52 1))) ; 52-1
+ (let ((years (iota 10 2016 -1)) ; last 10 years
+ (weeks (iota 2 1))) ; 1-52
(delete #f ;; only return the filenames of successful downloads
(par-map (lambda (year)
(let* ((yearuri (datehint-for-key (wot-uri-key uri) year))
- (hint (get (furl-uri yearuri))))
+ (hint (get (furl-uri yearuri)))
+ (hint-alist (parse-datehint hint))
+ (date (assoc-ref hint-alist 'date))
+ (month (string->number (list-ref (string-split date #\-) 2)))
+ (min-week (* (- month 1) 4))) ; avoid trying to download weeks which cannot be available.
(if (not (string? hint))
#f
(delete #f ;; only return the filenames of successful downloads
(n-par-map 52 (lambda (week)
- (download-by-weekly-date-hint uri year week))
+ (if (< week min-week) ; avoid weeks earlier than the date in the yearly date hint
+ #f
+ (download-by-weekly-date-hint uri year week)))
weeks)))))
years))))
@@ -215,6 +221,6 @@
(let ((seed (if (string-index seed-id #\/)
seed-id
(string-append "USK" (string-drop seed-id 3) "/WebOfTrust/0"))))
- (write (download-by-date-hint seed))
+ ;; (write (download-by-date-hint seed))
(par-map (lambda (x) (map download-by-date-hint x))
(crawl-wot seed)))))