(drak)
2016-02-16: avoid weeks earlier than the date in the yearly date hint avoid weeks earlier than the date in the yearly date hint
diff --git a/crawl-wot.scm b/crawl-wot.scm --- a/crawl-wot.scm +++ b/crawl-wot.scm @@ -142,9 +142,9 @@ (set! known (lset-union equal? (list-ec (: u new) (wot-uri-key u)) known)) - (if (null? new) + (if (or (null? new) (> (length known) 10)) known - (append known (map crawl new))))))))) + (lset-union equal? known (map crawl new))))))))) (define (parse-datehint str) (let ((lines (string-split str #\newline))) @@ -194,17 +194,23 @@ ;; see http://draketo.de/light/english/freenet/usk-and-date-hints ;; Approach: First check whether the ID has a date hint for each year. Then check each weak in the matching years. ;; download the versions into directories ordered as YEAR-month-day/SSK@...-WebOfTrust-version - (let ((years (iota 10 2016 -1)) - (weeks (iota 52 1))) ; 52-1 + (let ((years (iota 10 2016 -1)) ; last 10 years + (weeks (iota 2 1))) ; 1-52 (delete #f ;; only return the filenames of successful downloads (par-map (lambda (year) (let* ((yearuri (datehint-for-key (wot-uri-key uri) year)) - (hint (get (furl-uri yearuri)))) + (hint (get (furl-uri yearuri))) + (hint-alist (parse-datehint hint)) + (date (assoc-ref hint-alist 'date)) + (month (string->number (list-ref (string-split date #\-) 2))) + (min-week (* (- month 1) 4))) ; avoid trying to download weeks which cannot be available. (if (not (string? hint)) #f (delete #f ;; only return the filenames of successful downloads (n-par-map 52 (lambda (week) - (download-by-weekly-date-hint uri year week)) + (if (< week min-week) ; avoid weeks earlier than the date in the yearly date hint + #f + (download-by-weekly-date-hint uri year week))) weeks))))) years)))) @@ -215,6 +221,6 @@ (let ((seed (if (string-index seed-id #\/) seed-id (string-append "USK" (string-drop seed-id 3) "/WebOfTrust/0")))) - (write (download-by-date-hint seed)) + ;; (write (download-by-date-hint seed)) (par-map (lambda (x) (map download-by-date-hint x)) (crawl-wot seed)))))