Guile Freenet
 
(drak)
2016-02-02: Crawl all versions and save with the date from the weekly date hint.

Crawl all versions and save with the date from the weekly date hint.

diff --git a/crawl-wot.scm b/crawl-wot.scm
--- a/crawl-wot.scm
+++ b/crawl-wot.scm
@@ -31,8 +31,8 @@
   (string-append base-url "/" uri "?forcedownload=true"))
 
 
-(define (get uri)
-  (let* ((u (string->uri uri))
+(define (get url)
+  (let* ((u (string->uri url))
          (r (build-request u))
          (p (open-socket-for-uri u))
          (rr (write-request r p))
@@ -45,15 +45,18 @@
       (let ((c (response-code resp))
             (h (response-headers resp))
             (b (read-response-body resp)))
-        (if (= c 301)
-            (get (furl (assoc-ref h 'location)))
-            (cond
-             ((equal? '(text/html (charset . "utf-8")) (assoc-ref h 'content-type))
-              (utf8->string b))
-             ((equal? '(application/force-download) (assoc-ref h 'content-type))
-              (utf8->string b))
-             (else
-              (assoc-ref h 'content-type))))))))
+        (cond
+         ((= c 301)
+          (get (furl (assoc-ref h 'location))))
+          ((= c 200)
+           (cond
+            ((equal? '(text/html (charset . "utf-8")) (assoc-ref h 'content-type))
+             (utf8->string b))
+            ((equal? '(application/force-download) (assoc-ref h 'content-type))
+             (utf8->string b))
+            (else (assoc-ref h 'content-type))))
+          (else c))))))
+
 
 
 (define (non-breaking-sxml-reader xml-port)
@@ -117,12 +120,72 @@
               known
               (append known (map crawl new))))))))
 
+(define (parse-datehint str)
+  (let ((lines (string-split str #\newline)))
+    `((version . ,(list-ref lines 1))
+      (date . ,(list-ref lines 2)))))
+
+(define* (datehint-for-key key year #:key (sitename "WebOfTrust") (week #f))
+  (string-append "SSK" (substring key 3)
+                 "/" sitename
+                 "-" "DATEHINT"
+                 "-" (number->string year)
+                 (if week (string-append "-WEEK-" (number->string week)) "")))
+  
+
+(define (furl-key-name-version key name version)
+  "Get a freenet URL for the key and the version"
+  (furl-uri (string-append key "/" name "-" (number->string version))))
+
+(define (download-by-date-hint uri)
+  "Download all versions of the ID, ordered by the week in the DATEHINT."
+  ;; An uri looks like this: USK@QWW2a74OWrtN-aWJ80fjWhfFx8NlNrlU0dQfd3J7t1E,2g-wfM57Up9DV1qoEDMPcDU-KPskk0yyiYFz67ydSos,AQACAAE
+  ;; A date hint for WoT looks like this: SSK@QWW2a74OWrtN-aWJ80fjWhfFx8NlNrlU0dQfd3J7t1E,2g-wfM57Up9DV1qoEDMPcDU-KPskk0yyiYFz67ydSos,AQACAAE-WebOfTrust-DATEHINT-2015
+  ;; or
+  ;; SSK@[key]/[sitename]-DATEHINT-[year]
+  ;; SSK@[key]/[sitename]-DATEHINT-[year]-WEEK-[week]
+  ;; SSK@[key]/[sitename]-DATEHINT-[year]-[month]
+  ;; SSK@[key]/[sitename]-DATEHINT-[year]-[month]-[day]
+  ;; see http://draketo.de/light/english/freenet/usk-and-date-hints
+  ;; Approach: First check whether the ID has a date hint for each year. Then check each weak in the matching years.
+  ;; download the versions into directories ordered as YEAR-month-day/SSK@...-WebOfTrust-version
+  (let ((years (iota 10 2016 -1))
+        (weeks (iota 52 52 -1))) ; 52-1
+    (delete #f
+            (map (lambda (year)
+                   (let* ((yearuri (datehint-for-key (wot-uri-key uri) year))
+                          (hint (get (furl-uri yearuri))))
+                     (write yearuri)(newline)
+                     (write hint)(newline)
+                     (if (not (string? hint))
+                         #f
+                         (map (lambda (week)
+                                (let* ((weekuri (datehint-for-key (wot-uri-key uri) year #:week week))
+                                       (hint (get (furl-uri weekuri))))
+                                  (if (not (string? hint))
+                                      #f
+                                      (let* ((hint-alist (parse-datehint hint))
+                                             (version (assoc 'version  hint-alist))
+                                             (date (assoc 'date hint-alist))
+                                             (url (furl-key-name-version (wot-uri-key uri) "WebOfTrust" version))
+                                             (filename (string-append date "/" uri "-" (number->string version))))
+                                        (when (not (file-exists? date))
+                                          (mkdir date))
+                                        (let ((port (open-output-file filename)))
+                                          (put-string port (get url))
+                                          (close-port port))
+                                        filename))))
+                              weeks))))
+                 years))))
+
 (define (main args)
   (write args)(newline)
   (let ((seed-id (if (null? (cdr args))
                      seed-id
                      (car (cdr args)))))
-    (dump-wot-id seed-id
-                 (wot-uri-filename seed-id))
-    (crawl-wot seed-id)
+    (write (download-by-date-hint seed-id))
     (newline)))
+    ; (dump-wot-id seed-id
+    ;              (wot-uri-filename seed-id))
+    ; (let ((known-ids (crawl-wot seed-id)))
+    ;   (newline))))