#!/home/arne/wisp/wisp-multiline.sh
; !#
; we need to be able to replace end-of-line characters in brackets and strings
;; nostringandbracketbreaks INPORT
;;
;; Replace linebreaks within brackets and strings in the INPORT by the
;; placeholders \STRING_BREAK_N and \STRING_BREAK_R. Also identify
;; real comments as ;\REALCOMMENTHERE
;;
;; -Author: Arne Babenhauserheide
define : endsinunevenbackslashes text
if : = 0 : string-length text
. #f
let counter
: last : string-take-right text 1
rest : string-append " " : string-drop-right text 1
count 0
cond
: = 0 : string-length rest ; end clause: read all
odd? count
; end clause: no \
: not : equal? last : string #\\
odd? count
else
counter (string-take-right rest 1) (string-drop-right rest 1) (+ 1 count)
define : nostringandbracketbreaks inport
; Replace end of line characters in brackets and strings
; FIXME: Breaks if the string is shorter than 2 chars
let*
__ : lastchar : read-char inport
____ nextchar : read-char inport
____ text : string lastchar
incomment #f
incommentfirstchar #f ; first char of a comment
instring #f
inbrackets 0
incharform 0 ; #\<something>
while : not : eof-object? nextchar
; incommentfirstchar is only valid for exactly one char
when incommentfirstchar : set! incommentfirstchar #f
; already started char forms win over everything, so process them first.
; already started means: after the #\
; FIXME: Fails to capture #t and #f which can kill line splitting if it happens inside brackets
when : = incharform 1
when : not : and (char=? lastchar #\# ) : or (char=? #\f nextchar) (char=? #\t nextchar)
; format #t "1: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform
; newline
set! incharform 0
when : >= incharform 2
if : or (char=? nextchar #\space) (char=? nextchar #\linefeed ) (char=? nextchar #\newline )
begin
; format #t "2: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform
; newline
set! incharform 0
; else
set! incharform : + incharform 1
; check if we switch to a string: last char is space, linebreak or in a string, not in a charform, not in a comment
when
and
char=? nextchar #\"
not incomment
< incharform 1
or
and
. instring ; when I’m in a string, I can get out
or
not : char=? lastchar #\\ ; if the last char is not a backslash (escaped quote)
; or the last char is a backslash preceded by an uneven number of backslashes (so the backslash is actually an escaped backslash)
and : char=? lastchar #\\
; not : equal? #f : string-match "\\([^\\]\\)+\\(\\\\\\\\\\)*[\\]$" text ; matches [^\](\\)*\$ - non-backslash + arbitrary number of pairs of backslashes + final backslash which undoes the escaping from the lastchar (by actually escaping the lastchar)
endsinunevenbackslashes text
char=? lastchar #\space ; when the last char was a space, I can get into a string
char=? lastchar #\linefeed ; same for newline chars
char=? lastchar #\newline
and : not instring ; outside of strings, brackets are pseudo-whitespace, too
or
char=? lastchar #\(
char=? lastchar #\)
set! instring : not instring
; check if we switch to a comment
when
and
; FIXME: this should be
; char=? nextchar #\;
equal? ";" : string nextchar
not incomment
not instring
< incharform 2
set! incomment #t
set! incommentfirstchar #t
; this also closes any potential charform
set! incharform 0
when
and incomment
or
char=? nextchar #\newline
char=? nextchar #\linefeed
set! incomment #f
; check for the beginning of a charform
when
and
not instring
not incomment
char=? lastchar #\space
char=? nextchar #\#
set! incharform 1
; check whether a charform is continued
when
and
= incharform 1
char=? lastchar #\#
char=? nextchar #\\
set! incharform 2
; check for brackets
when : and ( char=? nextchar #\( ) ( not instring ) ( not incomment ) ( = incharform 1 )
; format #f "add bracketlevel: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform
; newline
set! inbrackets : + inbrackets 1
when : and ( char=? nextchar #\) ) ( not instring ) ( not incomment ) ( = incharform 1 )
set! inbrackets : - inbrackets 1
if : or instring : > inbrackets 0
if : char=? nextchar #\linefeed
set! text : string-append text "\\LINE_BREAK_N"
if : char=? nextchar #\newline
set! text : string-append text "\\LINE_BREAK_R"
; else
set! text : string-append text : string nextchar
; mark the start of a comment, so we do not have to
; repeat the string matching in later code. We include
; the comment character!
; not (instring or inbrackets) = neither instring nor inbrackets
if incommentfirstchar
set! text : string-append text ( string nextchar ) "\\REALCOMMENTHERE"
; when not in brackets or string or starting a
; comment: just append the char
set! text : string-append text : string nextchar
set! lastchar nextchar
set! nextchar : read-char inport
; return the text
. text
; As next part we have split a text into a list of lines which we can process one by one.
define : splitlines inport
let
: lines '()
nextchar : read-char inport
nextline ""
while : not : eof-object? nextchar
if : not : or (char=? nextchar #\newline ) (char=? nextchar #\linefeed )
set! nextline : string-append nextline : string nextchar
begin
set! lines : append lines (list nextline)
set! nextline ""
set! nextchar : read-char inport
append lines : list nextline
define : line-indent line
list-ref line 0
define : line-content line
list-ref line 1
define : line-comment line
list-ref line 2
define : line-continues? line
. "Check whether the line is a continuation of a previous line (should not start with a bracket)."
if : equal? #f : line-content line
. #f ; this is the EOF line. It does not continue (to ensure that the last brackets get closed)
string-prefix? ". " : line-content line
define : line-empty-code? line
. "Check whether the code-part of the line is empty: contains only whitespace and/or comment."
equal? "" : line-content line
define : line-merge-comment line
. "Merge comment and content into the content. Return the new line."
let
: indent : line-indent line
content : line-content line
comment : line-comment line
if : equal? "" comment
. line ; no change needed
list indent : string-append content ";" comment
. ""
; skip the leading indentation
define : skipindent inport
let skipper
: inunderbars #t
indent 0
nextchar : read-char inport
; when the file ends, do not do anything else
if : not : eof-object? nextchar
; skip underbars
if inunderbars
if : char=? nextchar #\_ ; still in underbars?
skipper
. #t ; still in underbars?
+ indent 1
read-char inport
; else, reevaluate without inunderbars
skipper #f indent nextchar
; else: skip remaining spaces
if : char=? nextchar #\space
skipper
. #f
+ indent 1
read-char inport
begin
unread-char nextchar inport
. indent
. indent
; Now we have to split a single line into indentation, content and comment.
define : splitindent inport
let
: indent : skipindent inport
let
: nextchar : read-char inport
inindent #t ; it always begins in indent
incomment #f ; but not in a comment
commentstart #f
commentstartidentifier "\\REALCOMMENTHERE"
commentstartidentifierlength 16
commentidentifierindex 0
content ""
comment ""
while : not : eof-object? nextchar
; check whether we leave the content
; FIXME: (wisp.py) the reader cuts the ; here, when I write it as this:
; when : and ( not incomment ) : char=? nextchar #\;
; FIXME: THIS mistreats #\; as comment! (shown 4 lines after this comment…)
when
and
not incomment
; FIXME: this should be but would break
; char=? nextchar #\;
equal? ";" : string nextchar
not : string-suffix? ( string #\# #\\ ) content
set! commentstart #t
set! comment : string-append comment : string nextchar
set! nextchar : read-char inport
continue
; check whether we stay in the commentcheck
when : and commentstart : char=? nextchar : string-ref commentstartidentifier commentidentifierindex
set! commentidentifierindex : + commentidentifierindex 1
set! comment : string-append comment : string nextchar
when : = commentidentifierindex commentstartidentifierlength
set! commentstart #f
set! incomment #t
; reset used variables
set! commentidentifierindex 0
set! comment ""
set! nextchar : read-char inport
continue
; if we cannot complete the commentcheck, we did not start a real comment. Append it to the content
when : and commentstart : not : char=? nextchar : string-ref commentstartidentifier commentidentifierindex
set! commentstart #f
set! content : string-append content comment : string nextchar
set! comment ""
set! commentidentifierindex 0
set! nextchar : read-char inport
continue
; if we are in the comment, just append to the comment
when incomment
set! comment : string-append comment : string nextchar
set! nextchar : read-char inport
continue
; if nothing else is true, we are in the content
set! content : string-append content : string nextchar
set! nextchar : read-char inport
when commentstart
set! content : string-append content comment
set! comment ""
; return the indentation, the content and the comment
list indent content comment
; Now use the function to split a list of lines
define : linestoindented lines
let splitter
: unprocessed lines
processed '()
if : equal? unprocessed '()
. processed
; else: let-recursion
splitter
list-tail unprocessed 1
append processed
list
call-with-input-string
list-ref unprocessed 0
. splitindent
define : read-whole-file filename
let : : origfile : open-file filename "r"
let reader
: text ""
nextchar : read-char origfile
if : eof-object? nextchar
. text
reader
string-append text : string nextchar
read-char origfile
define : split-wisp-lines text
let : : nobreaks : call-with-input-string text nostringandbracketbreaks
call-with-input-string nobreaks splitlines
define : wisp2lisp-add-inline-colon-brackets line
. "Add inline colon brackets to a wisp-line (indent,content,comment)"
let : : content : line-content line
; replace final " :" by a function call. There we are by definition of the line-splitting not in a string.
when : string-suffix? " :" content
set! content : string-append (string-drop-right content 1) "()"
; process the content in reverse direction, so we can detect ' : and turn it into '(
; let linebracketizer ( ( instring #f ) ( inbrackets 0 ) ( bracketstoadd 0 ) ( unprocessed content ) ( processed "" ) )
let linebracketizer : ( instring #f ) ( inbrackets 0 ) ( bracketstoadd 0 ) ( unprocessed content ) ( processed "" )
if : < (string-length unprocessed) 2
; if unprocessed is < 2 chars, it cannot contain ": ". We are done.
list
line-indent line
string-append unprocessed processed : xsubstring ")" 0 bracketstoadd
line-comment line
; else
let
: lastletter : string-take-right unprocessed 1
lastupto3 : string-take-right unprocessed : min 3 : string-length unprocessed
; check if we’re in a string
when
or
and
not instring
equal? "\"" lastletter
not : equal? "#\\\"" lastupto3
and
. instring
equal? "\"" lastletter
not : endsinunevenbackslashes : string-drop-right unprocessed 1
set! instring : not instring
when : not instring
when : and (equal? ")" lastletter) : not : equal? "#\\)" lastupto3
set! inbrackets : + 1 inbrackets ; remember that we're going backwards!
when : and (equal? "(" lastletter) : not : equal? "#\\(" lastupto3
set! inbrackets : - inbrackets 1
; error handling: inbrackets must never be smaller than 0 - due to the line splitting.
when : < inbrackets 0
throw 'more-inline-brackets-closed-than-opened inbrackets line
; when we’re in a string or in brackets , just skip to the next char
if : or instring : > inbrackets 0
linebracketizer instring inbrackets bracketstoadd
. : string-drop-right unprocessed 1
. : string-append lastletter processed
; else check for " : ": That adds a new inline bracket
; support : at the beginning of a line, too.
if : or (equal? " : " lastupto3) (equal? ": " lastupto3)
; replace the last 2 chars with "(" and note
; that we need an additional closing bracket
; at the end.
linebracketizer instring inbrackets : + 1 bracketstoadd
string-append (string-drop-right unprocessed 2)
string-append "(" processed
; turn " ' (" into " '(", do not modify unprocessed, except to shorten it!
if : and (string-prefix? "(" processed) : equal? " ' " lastupto3
; leave out the second space
linebracketizer instring inbrackets bracketstoadd
. (string-append (string-drop-right unprocessed 2) "'")
. processed
; else, just go on
linebracketizer instring inbrackets bracketstoadd
. (string-drop-right unprocessed 1)
. (string-append lastletter processed)
define : last-indent levels
. "Retrieve the indentation of the last line: Simply the highest level."
list-ref levels 0
define : line-add-starting-bracket line
. "Add a starting bracket to the line, if it is no continuation line (it is more indented than the previous)."
list
line-indent line
string-append
. "("
line-content line
line-comment line
define : line-add-closing-brackets line number
. "Add a closing bracket to the line."
list
line-indent line
string-append
line-content line
xsubstring ")" 0 number
line-comment line
define : line-indent-brackets-to-close line-indent levels line-continues prev-continues
. "Find the number of brackets to close to reduce the levels to the line-indent."
; adjust the levels until the highest indentation level is equal
; to the indentation of the next line. Then check for
; continuation.
let closer : (bracketstoclose 0) (rest levels)
let : : highest-level : list-ref rest 0
; finish-condition
if : = line-indent highest-level
if prev-continues
. bracketstoclose
+ 1 bracketstoclose
if : > line-indent highest-level
closer (- bracketstoclose 1) : append (list line-indent) rest
closer (+ bracketstoclose 1) : list-tail rest 1
define : line-indent-brackets-to-open line-indent levels line-continues prev-continues
. "Find the number of brackets to open to fit the line-indent and continuation marker."
if line-continues
. 0
. 1
define : line-indent-levels-adjust levels next-indent
. "Add or remove levels so the highest remaining level matches next-indent."
let adjuster : (lev levels)
let : : highest-level : list-ref lev 0
if : = next-indent highest-level
. lev
if : > next-indent highest-level
append (list next-indent) lev
adjuster : list-tail lev 1
define : line-drop-continuation-dot line
let : : content : line-content line
list
line-indent line
if : line-continues? line
string-drop content 2
. content
line-comment line
define : wisp2lisp-parse lisp prev lines
. "Parse the body of the wisp-code."
set! prev : wisp2lisp-add-inline-colon-brackets prev ; prev already is a code-line.
if : not : line-continues? prev
set! prev : line-add-starting-bracket prev
set! lines : map-in-order wisp2lisp-add-inline-colon-brackets lines
let bracketizer : (levels '(0)) (pre prev) (unprocessed lines) (processed lisp) (whitespace '())
; levels is the list of levels, with the lowest to the right. i.e: '(12 8 4 0)
; once we processed everything, we pass the bracketizer pre as f one last time
if : equal? #f : line-content pre
. processed
let : : next : if (equal? unprocessed '()) (list 0 #f #f) : list-ref unprocessed 0 ; this is the break condition for the next loop!
if : line-empty-code? next ; empty lines get silently added, but otherwise ignored
bracketizer levels pre
list-tail unprocessed 1
. processed
append whitespace : list next
; firstoff add the next indent to the levels, so we only work on the levels, prev-continues, next-continues and next-indent
; if pre was a continuation, the real levels are 1 lower than the counted levels
let*
: next-indent : line-indent next
pre-indent : line-indent pre
pre-continues : line-continues? pre
next-continues : line-continues? next
final-line : equal? #f : line-content next
bracketstocloseprev : line-indent-brackets-to-close next-indent levels next-continues pre-continues
bracketstoopennext : line-indent-brackets-to-open next-indent levels next-continues pre-continues
newnext : if final-line next : if (> bracketstoopennext 0) (line-add-starting-bracket next) next
newpre : line-drop-continuation-dot : line-add-closing-brackets pre bracketstocloseprev
newlevels : line-indent-levels-adjust levels next-indent
bracketizer newlevels newnext
if final-line unprocessed : list-tail unprocessed 1
append processed (list newpre) whitespace
list
define : wisp2lisp-initial-comments lisp prev lines
. "Keep all starting comments: do not start them with a bracket."
let skip-initial-comments : (lisp lisp) (prev prev) (lines lines)
if : = 0 : length lines ; file only contained comments, maybe including the hashbang
. lisp
if : line-empty-code? prev
skip-initial-comments : append lisp : list prev
. (list-ref lines 0) (list-tail lines 1)
list lisp prev lines
define : wisp2lisp-hashbang lisp prev unprocessed
. "Parse a potential initial hashbang line."
if
and
equal? lisp '() ; really the first line
equal? 0 : line-indent prev
string-prefix? "#!" : line-content prev
wisp2lisp-hashbang : append lisp : list : line-merge-comment prev
. (list-ref unprocessed 0) (list-tail unprocessed 1)
list lisp prev unprocessed
define : wisp2lisp lines
. "Parse indentation in the lines to add the correct brackets."
if : equal? lines '()
. '()
let
: lisp '() ; the processed lines
prev : list-ref lines 0 ; the last line
unprocessed : list-tail lines 1 ; obvious :)
let*
: hashbanged : wisp2lisp-hashbang lisp prev unprocessed
deinitialized : apply wisp2lisp-initial-comments hashbanged
parsed : apply wisp2lisp-parse deinitialized
. parsed
; first step: Be able to mirror a file to stdout
let*
: filename : list-ref ( command-line ) 1
text : read-whole-file filename
; Lines consist of lines with indent, content and comment. See
; line-indent, line-content, line-comment and the other
; line-functions for details.
textlines : split-wisp-lines text
lines : linestoindented textlines
lisp : wisp2lisp lines
; display : list-ref lines 100 ; seems good
let show : (processed '()) (unprocessed lisp)
when : not : equal? unprocessed '()
let : : next : list-ref unprocessed 0
;display : length processed
;display "/"
;display : length unprocessed
;display ": "
display : xsubstring " " 0 : line-indent next
display : line-content next
unless : equal? "" : line-comment next
display ";"
display : line-comment next
newline
show (append processed (list next)) (list-tail unprocessed 1)
; let : : line : list-ref lisp 158
; display : line-indent line
; display ","
; display : line-content line
; display ","
; display : line-comment line
; looks good
; TODO: add brackets to the content
; TODO: undo linebreak-replacing. Needs in-string and in-comment
; checking, but only for each line, not spanning multiple lines.
newline