Guile Freenet
 
(drak)
2016-02-16: avoid weeks earlier than the date in the yearly date hint

avoid weeks earlier than the date in the yearly date hint

diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -142,9 +142,9 @@
               (set! known (lset-union equal?
                                       (list-ec (: u new) (wot-uri-key u))
                                       known))
-              (if (null? new)
+              (if (or (null? new) (> (length known) 10))
                   known
-                  (append known (map crawl new)))))))))
+                  (lset-union equal? known (map crawl new)))))))))
 
 (define (parse-datehint str)
   (let ((lines (string-split str #\newline)))
@@ -194,17 +194,23 @@
   ;; see http://draketo.de/light/english/freenet/usk-and-date-hints
   ;; Approach: First check whether the ID has a date hint for each year. Then check each weak in the matching years.
   ;; download the versions into directories ordered as YEAR-month-day/SSK@...-WebOfTrust-version
-  (let ((years (iota 10 2016 -1))
-        (weeks (iota 52 1))) ; 52-1
+  (let ((years (iota 10 2016 -1)) ; last 10 years
+        (weeks (iota 2 1))) ; 1-52
     (delete #f ;; only return the filenames of successful downloads 
             (par-map (lambda (year)
                        (let* ((yearuri (datehint-for-key (wot-uri-key uri) year))
-                              (hint (get (furl-uri yearuri))))
+                              (hint (get (furl-uri yearuri)))
+                              (hint-alist (parse-datehint hint))
+                              (date (assoc-ref hint-alist 'date))
+                              (month (string->number (list-ref (string-split date #\-) 2)))
+                              (min-week (* (- month 1) 4))) ; avoid trying to download weeks which cannot be available.
                          (if (not (string? hint))
                              #f
                              (delete #f ;; only return the filenames of successful downloads 
                                      (n-par-map 52 (lambda (week)
-                                                     (download-by-weekly-date-hint uri year week))
+                                                     (if (< week min-week) ; avoid weeks earlier than the date in the yearly date hint
+                                                         #f
+                                                         (download-by-weekly-date-hint uri year week)))
                                                 weeks)))))
                      years))))
 
@@ -215,6 +221,6 @@
     (let ((seed (if (string-index seed-id #\/)
                     seed-id
                     (string-append "USK" (string-drop seed-id 3) "/WebOfTrust/0"))))
-      (write (download-by-date-hint seed))
+      ;; (write (download-by-date-hint seed))
       (par-map (lambda (x) (map download-by-date-hint x))
                (crawl-wot seed)))))