wisp: wisp-scheme.w

#!/home/arne/wisp/wisp-multiline.sh
; !#

;; Scheme-only implementation of a wisp-preprocessor which output a
;; scheme Tree IL to feed to a scheme interpreter instead of a
;; preprocessed file.

;; Plan:
;; read reads the first expression from a string. It ignores comments,
;; so we have to treat these specially. Our wisp-reader only needs to
;; worry about whitespace.
;; 
;; So we can skip all the string and bracket linebreak escaping and
;; directly create a list of codelines with indentation. For this we
;; then simply reuse the appropriate function from the generic wisp
;; preprocessor.
;; 
;; TODO: use match:
;; (use-modules (ice-9 match))
;; (define dot (call-with-input-string "." read))
;; (match (list 'u 'v dot 'w) ((a ... b '#{.}# c) (append a (cons b c))) )

define-module : wisp-scheme
   . #:export (wisp-scheme-read-chunk wisp-scheme-read-all 
               wisp-scheme-read-file-chunk wisp-scheme-read-file
               wisp-scheme-read-string)

use-modules 
  srfi srfi-1
  srfi srfi-11 ; for let-values
  ice-9 rw ; for write-string/partial
  ice-9 match

;; Helper functions for the indent-and-symbols data structure: '((indent token token ...) ...)
define : line-indent line
         car line

define : line-real-indent line
         . "Get the indentation without the comment-marker for unindented lines (-1 is treated as 0)."
         let : :  indent : line-indent line
             if : = -1 indent
               . 0
               . indent

define : line-code line
         cdr line

; literal values I need
define readcolon 
       string->symbol ":"
define readdot
       string->symbol "."

define : line-continues? line
         equal? readdot : car : line-code line

define : line-only-colon? line
         and
           equal? ":" : car : line-code line
           null? : cdr : line-code line

define : line-empty-code? line
         null? : line-code line

define : line-empty? line
         and
           ; if indent is -1, we stripped a comment, so the line was not really empty.
           = 0 : line-indent line
           line-empty-code? line

define : line-strip-continuation line   
         if : line-continues? line
              append 
                list 
                  line-indent line
                cdr : line-code line
              . line

define : line-strip-indentation-marker line
         ' "Strip the indentation markers from the beginning of the line"
         cdr line

define : indent-level-reduction indentation-levels level select-fun
         . "Reduce the INDENTATION-LEVELS to the given LEVEL and return the value selected by SELECT-FUN"
         let loop 
           : newlevels indentation-levels
             diff 0
           cond
             : = level : car newlevels
               select-fun : list diff indentation-levels
             : < level : car newlevels
               loop
                 cdr newlevels
                 1+ diff
             else
               throw 'wisp-syntax-error "Level ~A not found in the indentation-levels ~A."

define : indent-level-difference indentation-levels level
         . "Find how many indentation levels need to be popped off to find the given level."
         indent-level-reduction indentation-levels level
           lambda : x ; get the count
                    car x

define : indent-reduce-to-level indentation-levels level
         . "Find how many indentation levels need to be popped off to find the given level."
         indent-level-reduction indentation-levels level
           lambda : x ; get the levels
                    car : cdr x


define : wisp-scheme-read-chunk-lines port
         let loop
           : indent-and-symbols : list ; '((5 "(foobar)" "\"yobble\"")(3 "#t"))
             inindent #t
             inunderscoreindent : equal? #\_ : peek-char port
             incomment #f
             currentindent 0
             currentsymbols '()
             emptylines 0
           let : : next-char : peek-char port
             cond
               : eof-object? next-char
                 append indent-and-symbols : list : append (list currentindent) currentsymbols
               : <= 2 emptylines
                 . indent-and-symbols
               : and inindent : equal? #\space next-char
                 read-char port ; remove char
                 loop
                   . indent-and-symbols
                   . #t ; inindent
                   . #f ; inunderscoreindent
                   . #f ; incomment
                   1+ currentindent
                   . currentsymbols
                   . emptylines
               : and inunderscoreindent : equal? #\_ next-char
                 read-char port ; remove char
                 loop 
                   . indent-and-symbols
                   . #t ; inindent
                   . #t ; inunderscoreindent
                   . #f ; incomment
                   1+ currentindent
                   . currentsymbols
                   . emptylines
               ; any char but whitespace *after* underscoreindent is
               ; an error. This is stricter than the current wisp
               ; syntax definition. TODO: Fix the definition. Better
               ; start too strict.
               : and inunderscoreindent : not : equal? #\space next-char
                 throw 'wisp-syntax-error "initial underscores without following whitespace at beginning of the line after" : last indent-and-symbols
               : or (equal? #\newline next-char) (equal? #\return next-char)
                 read-char port ; remove the newline
                 ; TODO: Check whether when or if should be preferred here. guile 1.8 only has if.
                 if : and (equal? #\newline next-char) : equal? #\return : peek-char port
                      read-char port ; remove a full \n\r. Damn special cases...
                 let* ; distinguish pure whitespace lines and lines
                      ; with comment by giving the former zero
                      ; indent. Lines with a comment at zero indent
                      ; get indent -1 for the same reason - meaning
                      ; not actually empty.
                   :
                     indent 
                       cond 
                         incomment 
                           if : = 0 currentindent ; specialcase
                             . -1
                             . currentindent 
                         : not : null? currentsymbols ; pure whitespace
                           . currentindent
                         else
                           . 0
                     parsedline : append (list indent) currentsymbols
                   ; TODO: If the line is empty. Either do it here and do not add it, just
                   ; increment the empty line counter, or strip it later. Replace indent
                   ; -1 by indent 0 afterwards.
                   loop
                     append indent-and-symbols : list parsedline
                     . #t ; inindent
                     equal? #\_ : peek-char port
                     . #f ; incomment
                     . 0
                     . '()
                     if : line-empty? parsedline
                       1+ emptylines
                       . 0
               : equal? #t incomment
                 read-char port ; remove one comment character
                 loop 
                   . indent-and-symbols
                   . #f ; inindent 
                   . #f ; inunderscoreindent 
                   . #t ; incomment
                   . currentindent
                   . currentsymbols
                   . emptylines
               : or (equal? #\space next-char) (equal? #\tab next-char) ; remove whitespace when not in indent
                 read-char port ; remove char
                 loop 
                   . indent-and-symbols
                   . #f ; inindent
                   . #f ; inunderscoreindent
                   . #f ; incomment
                   . currentindent
                   . currentsymbols
                   . emptylines
                        ; | cludge to appease the former wisp parser
                        ; | which had a problem with the literal comment
                        ; v char.
               : equal? (string-ref ";" 0) next-char
                 loop 
                   . indent-and-symbols
                   . #f ; inindent 
                   . #f ; inunderscoreindent 
                   . #t ; incomment
                   . currentindent
                   . currentsymbols
                   . emptylines
               else ; use the reader
                 loop 
                   . indent-and-symbols
                   . #f ; inindent
                   . #f ; inunderscoreindent
                   . #f ; incomment
                   . currentindent
                   ; this also takes care of the hashbang and leading comments.
                   ; TODO: If used from Guile, activate curly infix via read-options.
                   append currentsymbols : list : read port
                   . emptylines


define : line-code-replace-inline-colons line
         ' "Replace inline colons by opening parens which close at the end of the line"
         ; format #t "replace inline colons for line ~A\n" line
         let loop
           : processed '()
              unprocessed line
           cond
             : null? unprocessed
               ; format #t "inline-colons processed line: ~A\n" processed
               . processed
             : equal? readcolon : car unprocessed
               loop
                 ; FIXME: This should turn unprocessed into a list. 
                 append processed
                   list : loop '() (cdr unprocessed)
                 . '()
             else
               loop 
                 append processed
                   list : car unprocessed
                 cdr unprocessed

define : line-replace-inline-colons line
         cons 
           line-indent line
           line-code-replace-inline-colons : line-code line

define : line-strip-lone-colon line
         . "A line consisting only of a colon is just a marked indentation level. We need to kill the colon before replacing inline colons."
         if 
           equal? 
             line-code line
             list readcolon
           list : line-indent line
           . line

define : line-finalize line
         . "Process all wisp-specific information in a line and strip it"
         line-code-replace-inline-colons 
           line-strip-indentation-marker
             line-strip-lone-colon
               line-strip-continuation line


define : wisp-scheme-indentation-to-parens lines
         . "Add parentheses to lines and remove the indentation markers"
         ; FIXME: Find new algorithm which mostly uses current-line
         ; and the indentation-levels for tracking. The try I have in
         ; here right now is wrong.
         when 
           and 
             not : null? lines
             not : line-empty-code? : car lines
             not : = 0 : line-real-indent : car lines ; -1 is a line with a comment
           throw 'wisp-syntax-error 
             format #f "The first symbol in a chunk must start at zero indentation. Indentation and line: ~A"
               car lines
         let loop
           : processed '()
             unprocessed lines
             indentation-levels '(0)
           let*
             : 
               current-line 
                 if : <= 1 : length unprocessed
                      car unprocessed
                      list 0 ; empty code
               next-line
                 if : <= 2 : length unprocessed
                      car : cdr unprocessed
                      list 0 ; empty code
               current-indentation
                      car indentation-levels
               current-line-indentation : line-real-indent current-line
             ; format #t "processed: ~A\ncurrent-line: ~A\nnext-line: ~A\nunprocessed: ~A\nindentation-levels: ~A\ncurrent-indentation: ~A\n\n"
             ;     . processed current-line next-line unprocessed indentation-levels current-indentation
             cond
                 ; the real end: this is reported to the outside world.
               : and (null? unprocessed) (not (null? indentation-levels)) (null? (cdr indentation-levels))
                 ; display "done\n"
                 ; reverse the processed lines, because I use cons.
                 . processed
               ; the recursion end-condition
               : and (null? unprocessed)
                 ; display "last step\n"
                 ; this is the last step. Nothing more to do except
                 ; for rolling up the indentation levels.  return the
                 ; new processed and unprocessed lists: this is a
                 ; side-recursion
                 values processed unprocessed
               : null? indentation-levels
                 ; display "indentation-levels null\n"
                 throw 'wisp-programming-error "The indentation-levels are null but the current-line is null: Something killed the indentation-levels."
               else ; now we come to the line-comparisons and indentation-counting.
                   cond
                     : line-empty-code? current-line
                       ; display "current-line empty\n"
                       ; We cannot process indentation without
                       ; code. Just switch to the next line. This should
                       ; only happen at the start of the recursion.
                       ; TODO: Somehow preserve the line-numbers.
                       loop
                         . processed
                         cdr unprocessed
                         . indentation-levels
                     : and (line-empty-code? next-line) : <= 2 : length unprocessed 
                       ; display "next-line empty\n"
                       ; TODO: Somehow preserve the line-numbers.
                       ; take out the next-line from unprocessed.
                       loop
                         . processed
                         cons current-line
                           cdr : cdr unprocessed
                         . indentation-levels
                     : > current-indentation current-line-indentation
                       ; display "current-indent > next-line\n"
                       ; this just steps back one level via the side-recursion.
                       values processed unprocessed
                     : = current-indentation current-line-indentation
                       ; display "current-indent = next-line\n"
                       let 
                         : line : line-finalize current-line
                           next-line-indentation : line-real-indent next-line
                         cond
                           : >= current-line-indentation next-line-indentation
                             ; simple recursiive step to the next line
                             ; display "current-line-indent >= next-line-indent\n"
                             loop
                               append processed 
                                 if : line-continues? current-line
                                      . line
                                      list line
                               cdr unprocessed ; recursion here
                               . indentation-levels
                           : < current-line-indentation next-line-indentation
                             ; display "current-line-indent < next-line-indent\n"
                             ; format #t "line: ~A\n" line
                             ; side-recursion via a sublist
                             let-values 
                               :
                                 : sub-processed sub-unprocessed
                                   loop
                                     . line
                                     cdr unprocessed ; recursion here
                                     . indentation-levels
                               ; format #t "side-recursion:\n  sub-processed: ~A\n  processed: ~A\n\n" sub-processed processed
                               loop
                                 append processed : list sub-processed
                                 . sub-unprocessed ; simply use the recursion from the sub-recursion
                                 . indentation-levels
                     : < current-indentation current-line-indentation
                       ; display "current-indent < next-line\n"
                       loop
                         . processed
                         . unprocessed
                         cons ; recursion via the indentation-levels
                           . current-line-indentation 
                           . indentation-levels
                     else
                       throw 'wisp-not-implemented 
                             format #f "Need to implement further line comparison: current: ~A, next: ~A, processed: ~A."
                               . current-line next-line processed


define : wisp-scheme-replace-inline-colons lines
         ' "Replace inline colons by opening parens which close at the end of the line"
         let loop
           : processed '()
             unprocessed lines
           if : null? unprocessed
                . processed
                loop
                  append processed : list : line-replace-inline-colons : car unprocessed
                  cdr unprocessed
                  

define : wisp-scheme-strip-indentation-markers lines
         ' "Strip the indentation markers from the beginning of the lines"
         let loop
           : processed '()
             unprocessed lines
           if : null? unprocessed
                . processed
                loop
                  append processed : cdr : car unprocessed
                  cdr unprocessed

define : wisp-make-improper code
         . "Turn (a #{.}# b) into the correct (a . b).

read called on a single dot creates a variable named #{.}# (|.|
in r7rs). Due to parsing the indentation before the list
structure is known, the reader cannot create improper lists
when it reads a dot. So we have to take another pass over the
code to recreate the improper lists.

Match is awesome!"
      let 
        : 
          improper
            match code
               : a ... b '#{.}# c
                 append (map wisp-make-improper a) 
                   cons (wisp-make-improper b) (wisp-make-improper c)
               : a ...
                 map wisp-make-improper a
               a
                 . a
        define : syntax-error li
                throw 'wisp-syntax-error (format #f "incorrect dot-syntax #{.}# in code: not a proper pair: ~A" li)
        let check
          : tocheck improper
          match tocheck
            ; lists with only one member
            : '#{.}#
              syntax-error tocheck
            ; list with remaining dot.
            : a ...
              if : member readdot a
                   syntax-error tocheck
                   map check a
            ; simple pair
            : '#{.}# . c
              syntax-error tocheck
            ; simple pair, other way round
            : a . '#{.}#
              syntax-error tocheck
            ; more complex pairs
            : ? pair? a
              let 
                : head : drop-right a 1
                  tail : last-pair a
                cond
                 : equal? readdot : car tail
                   syntax-error tocheck
                 : equal? readdot : cdr tail
                   syntax-error tocheck
                 : member readdot head
                   syntax-error tocheck
                 else
                   . a
            a
              . a


define : wisp-scheme-read-chunk port
         . "Read and parse one chunk of wisp-code"
         let : :  lines : wisp-scheme-read-chunk-lines port
              wisp-make-improper
                wisp-scheme-indentation-to-parens lines

define : wisp-scheme-read-all port
         . "Read all chunks from the given port"
         let loop 
           : tokens '()
           cond
             : eof-object? : peek-char port
               . tokens
             else
               loop
                 append tokens : wisp-scheme-read-chunk port

define : wisp-scheme-read-file path
         call-with-input-file path wisp-scheme-read-all

define : wisp-scheme-read-file-chunk path
         call-with-input-file path wisp-scheme-read-chunk

define : wisp-scheme-read-string str
         call-with-input-string str wisp-scheme-read-all

define : wisp-scheme-read-string-chunk str
         call-with-input-string str wisp-scheme-read-chunk


;;;; Test improper lists
;;;; Good cases
; write
;   wisp-scheme-read-string  "foo . bar"
; newline 
; write
;   wisp-scheme-read-string  "foo .
;   . bar"
; newline 
; write
;   wisp-scheme-read-string  "foo
;   . . bar"
; newline 
; write
;   wisp-scheme-read-string  "moo
;   foo
;     . . bar
; baz waz"
; newline 
;;;; Syntax Error cases
; write
;   wisp-scheme-read-string  "foo
;   . . ."
; newline 
; write
;   wisp-scheme-read-string  "moo : . . bar"
; write
;   wisp-scheme-read-string  "foo .
;   . . bar"
; newline 
; write
;   wisp-scheme-read-string  "moo
;   foo
;     . . bar baz
; baz waz"
; newline 
;;;; stranger stuff
; write
;   wisp-scheme-read-string  "foo ; bar\n  ; nop \n\n; nup\n; nup \n  \n\n\nfoo : moo \"\n\" \n___ . goo . hoo"
; newline 
; display
;   wisp-scheme-read-string  "  foo ; bar\n  ; nop \n\n; nup\n; nup \n  \n\n\nfoo : moo"
; newline 
; write : wisp-scheme-read-file-chunk "wisp-scheme.w"
; newline 
; call-with-output-file "wisp-guile.scm"
;   lambda : port
;     map 
;        lambda : chunk
;                 write chunk port
;        wisp-scheme-read-file "wisp-guile.w"
; run all chunks in wisp-guile.w as parsed by wisp-scheme.w. Give wisp-guile.w to parse as argument.
; map primitive-eval : wisp-scheme-read-file "wisp-guile.w" ; actually runs wisp-guile.w with the arguments supplied to this script.
; uncomment the previous line, then run the next line in the shell. If 1 and 2 are equal, this parser works!
; guile wisp.scm wisp-scheme.w > wisp-scheme.scm; guile wisp-scheme.scm wisp-guile.w > 1; guile wisp.scm wisp-guile.w > 2; diff 1 2