#!/home/arne/wisp/wisp-multiline.sh ; !# ; we need to be able to replace end-of-line characters in brackets and strings ;; nostringandbracketbreaks INPORT ;; ;; Replace linebreaks within brackets and strings in the INPORT by the ;; placeholders \STRING_BREAK_N and \STRING_BREAK_R. Also identify ;; real comments as ;\REALCOMMENTHERE ;; ;; -Author: Arne Babenhauserheide define : endsinunevenbackslashes text if : = 0 : string-length text . #f let counter : last : string-take-right text 1 rest : string-append " " : string-drop-right text 1 count 0 cond : = 0 : string-length rest ; end clause: read all odd? count ; end clause: no \ : not : equal? last : string #\\ odd? count else counter (string-take-right rest 1) (string-drop-right rest 1) (+ 1 count) define : nostringandbracketbreaks inport ; Replace end of line characters in brackets and strings ; FIXME: Breaks if the string is shorter than 2 chars let* __ : lastchar : read-char inport ____ nextchar : read-char inport ____ text : string lastchar incomment #f incommentfirstchar #f ; first char of a comment instring #f inbrackets 0 incharform 0 ; #\<something> while : not : eof-object? nextchar ; incommentfirstchar is only valid for exactly one char when incommentfirstchar : set! incommentfirstchar #f ; already started char forms win over everything, so process them first. ; already started means: after the #\ ; FIXME: Fails to capture #t and #f which can kill line splitting if it happens inside brackets when : = incharform 1 when : not : and (char=? lastchar #\# ) : or (char=? #\f nextchar) (char=? #\t nextchar) ; format #t "1: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform ; newline set! incharform 0 when : >= incharform 2 if : or (char=? nextchar #\space) (char=? nextchar #\linefeed ) (char=? nextchar #\newline ) begin ; format #t "2: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform ; newline set! incharform 0 ; else set! incharform : + incharform 1 ; check if we switch to a string: last char is space, linebreak or in a string, not in a charform, not in a comment when and char=? nextchar #\" not incomment < incharform 1 or and . instring ; when I’m in a string, I can get out or not : char=? lastchar #\\ ; if the last char is not a backslash (escaped quote) ; or the last char is a backslash preceded by an uneven number of backslashes (so the backslash is actually an escaped backslash) and : char=? lastchar #\\ ; not : equal? #f : string-match "\\([^\\]\\)+\\(\\\\\\\\\\)*[\\]$" text ; matches [^\](\\)*\$ - non-backslash + arbitrary number of pairs of backslashes + final backslash which undoes the escaping from the lastchar (by actually escaping the lastchar) endsinunevenbackslashes text char=? lastchar #\space ; when the last char was a space, I can get into a string char=? lastchar #\linefeed ; same for newline chars char=? lastchar #\newline and : not instring ; outside of strings, brackets are pseudo-whitespace, too or char=? lastchar #\( char=? lastchar #\) set! instring : not instring ; check if we switch to a comment when and ; FIXME: this should be ; char=? nextchar #\; equal? ";" : string nextchar not incomment not instring < incharform 2 set! incomment #t set! incommentfirstchar #t ; this also closes any potential charform set! incharform 0 when and incomment or char=? nextchar #\newline char=? nextchar #\linefeed set! incomment #f ; check for the beginning of a charform when and not instring not incomment char=? lastchar #\space char=? nextchar #\# set! incharform 1 ; check whether a charform is continued when and = incharform 1 char=? lastchar #\# char=? nextchar #\\ set! incharform 2 ; check for brackets when : and ( char=? nextchar #\( ) ( not instring ) ( not incomment ) ( = incharform 1 ) ; format #f "add bracketlevel: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform ; newline set! inbrackets : + inbrackets 1 when : and ( char=? nextchar #\) ) ( not instring ) ( not incomment ) ( = incharform 1 ) set! inbrackets : - inbrackets 1 if : or instring : > inbrackets 0 if : char=? nextchar #\linefeed set! text : string-append text "\\LINE_BREAK_N" if : char=? nextchar #\newline set! text : string-append text "\\LINE_BREAK_R" ; else set! text : string-append text : string nextchar ; mark the start of a comment, so we do not have to ; repeat the string matching in later code. We include ; the comment character! ; not (instring or inbrackets) = neither instring nor inbrackets if incommentfirstchar set! text : string-append text ( string nextchar ) "\\REALCOMMENTHERE" ; when not in brackets or string or starting a ; comment: just append the char set! text : string-append text : string nextchar set! lastchar nextchar set! nextchar : read-char inport ; return the text . text ; As next part we have split a text into a list of lines which we can process one by one. define : splitlines inport let : lines '() nextchar : read-char inport nextline "" while : not : eof-object? nextchar if : not : or (char=? nextchar #\newline ) (char=? nextchar #\linefeed ) set! nextline : string-append nextline : string nextchar begin set! lines : append lines (list nextline) set! nextline "" set! nextchar : read-char inport append lines : list nextline define : line-indent line list-ref line 0 define : line-content line list-ref line 1 define : line-comment line list-ref line 2 define : line-continues? line . "Check whether the line is a continuation of a previous line (should not start with a bracket)." if : equal? #f : line-content line . #f ; this is the EOF line. It does not continue (to ensure that the last brackets get closed) string-prefix? ". " : line-content line define : line-empty-code? line . "Check whether the code-part of the line is empty: contains only whitespace and/or comment." equal? "" : line-content line define : line-merge-comment line . "Merge comment and content into the content. Return the new line." let : indent : line-indent line content : line-content line comment : line-comment line if : equal? "" comment . line ; no change needed list indent : string-append content ";" comment . "" ; skip the leading indentation define : skipindent inport let skipper : inunderbars #t indent 0 nextchar : read-char inport ; when the file ends, do not do anything else if : not : eof-object? nextchar ; skip underbars if inunderbars if : char=? nextchar #\_ ; still in underbars? skipper . #t ; still in underbars? + indent 1 read-char inport ; else, reevaluate without inunderbars skipper #f indent nextchar ; else: skip remaining spaces if : char=? nextchar #\space skipper . #f + indent 1 read-char inport begin unread-char nextchar inport . indent . indent ; Now we have to split a single line into indentation, content and comment. define : splitindent inport let : indent : skipindent inport let : nextchar : read-char inport inindent #t ; it always begins in indent incomment #f ; but not in a comment commentstart #f commentstartidentifier "\\REALCOMMENTHERE" commentstartidentifierlength 16 commentidentifierindex 0 content "" comment "" while : not : eof-object? nextchar ; check whether we leave the content ; FIXME: (wisp.py) the reader cuts the ; here, when I write it as this: ; when : and ( not incomment ) : char=? nextchar #\; ; FIXME: THIS mistreats #\; as comment! (shown 4 lines after this comment…) when and not incomment ; FIXME: this should be but would break ; char=? nextchar #\; equal? ";" : string nextchar not : string-suffix? ( string #\# #\\ ) content set! commentstart #t set! comment : string-append comment : string nextchar set! nextchar : read-char inport continue ; check whether we stay in the commentcheck when : and commentstart : char=? nextchar : string-ref commentstartidentifier commentidentifierindex set! commentidentifierindex : + commentidentifierindex 1 set! comment : string-append comment : string nextchar when : = commentidentifierindex commentstartidentifierlength set! commentstart #f set! incomment #t ; reset used variables set! commentidentifierindex 0 set! comment "" set! nextchar : read-char inport continue ; if we cannot complete the commentcheck, we did not start a real comment. Append it to the content when : and commentstart : not : char=? nextchar : string-ref commentstartidentifier commentidentifierindex set! commentstart #f set! content : string-append content comment : string nextchar set! comment "" set! commentidentifierindex 0 set! nextchar : read-char inport continue ; if we are in the comment, just append to the comment when incomment set! comment : string-append comment : string nextchar set! nextchar : read-char inport continue ; if nothing else is true, we are in the content set! content : string-append content : string nextchar set! nextchar : read-char inport when commentstart set! content : string-append content comment set! comment "" ; return the indentation, the content and the comment list indent content comment ; Now use the function to split a list of lines define : linestoindented lines let splitter : unprocessed lines processed '() if : equal? unprocessed '() . processed ; else: let-recursion splitter list-tail unprocessed 1 append processed list call-with-input-string list-ref unprocessed 0 . splitindent define : read-whole-file filename let : : origfile : open-file filename "r" let reader : text "" nextchar : read-char origfile if : eof-object? nextchar . text reader string-append text : string nextchar read-char origfile define : split-wisp-lines text let : : nobreaks : call-with-input-string text nostringandbracketbreaks call-with-input-string nobreaks splitlines define : wisp2lisp-add-inline-colon-brackets line . "Add inline colon brackets to a wisp-line (indent,content,comment)" let : : content : line-content line ; replace final " :" by a function call. There we are by definition of the line-splitting not in a string. when : string-suffix? " :" content set! content : string-append (string-drop-right content 1) "()" ; process the content in reverse direction, so we can detect ' : and turn it into '( ; let linebracketizer ( ( instring #f ) ( inbrackets 0 ) ( bracketstoadd 0 ) ( unprocessed content ) ( processed "" ) ) let linebracketizer : ( instring #f ) ( inbrackets 0 ) ( bracketstoadd 0 ) ( unprocessed content ) ( processed "" ) if : < (string-length unprocessed) 2 ; if unprocessed is < 2 chars, it cannot contain ": ". We are done. list line-indent line string-append unprocessed processed : xsubstring ")" 0 bracketstoadd line-comment line ; else let : lastletter : string-take-right unprocessed 1 lastupto3 : string-take-right unprocessed : min 3 : string-length unprocessed ; check if we’re in a string when or and not instring equal? "\"" lastletter not : equal? "#\\\"" lastupto3 and . instring equal? "\"" lastletter not : endsinunevenbackslashes : string-drop-right unprocessed 1 set! instring : not instring when : not instring when : and (equal? ")" lastletter) : not : equal? "#\\)" lastupto3 set! inbrackets : + 1 inbrackets ; remember that we're going backwards! when : and (equal? "(" lastletter) : not : equal? "#\\(" lastupto3 set! inbrackets : - inbrackets 1 ; error handling: inbrackets must never be smaller than 0 - due to the line splitting. when : < inbrackets 0 throw 'more-inline-brackets-closed-than-opened inbrackets line ; when we’re in a string or in brackets , just skip to the next char if : or instring : > inbrackets 0 linebracketizer instring inbrackets bracketstoadd . : string-drop-right unprocessed 1 . : string-append lastletter processed ; else check for " : ": That adds a new inline bracket ; support : at the beginning of a line, too. if : or (equal? " : " lastupto3) (equal? ": " lastupto3) ; replace the last 2 chars with "(" and note ; that we need an additional closing bracket ; at the end. linebracketizer instring inbrackets : + 1 bracketstoadd string-append (string-drop-right unprocessed 2) string-append "(" processed ; turn " ' (" into " '(", do not modify unprocessed, except to shorten it! if : and (string-prefix? "(" processed) : equal? " ' " lastupto3 ; leave out the second space linebracketizer instring inbrackets bracketstoadd . (string-append (string-drop-right unprocessed 2) "'") . processed ; else, just go on linebracketizer instring inbrackets bracketstoadd . (string-drop-right unprocessed 1) . (string-append lastletter processed) define : last-indent levels . "Retrieve the indentation of the last line: Simply the highest level." list-ref levels 0 define : line-add-starting-bracket line . "Add a starting bracket to the line, if it is no continuation line (it is more indented than the previous)." list line-indent line string-append . "(" line-content line line-comment line define : line-add-closing-brackets line number . "Add a closing bracket to the line." list line-indent line string-append line-content line xsubstring ")" 0 number line-comment line define : line-indent-brackets-to-close line-indent levels line-continues prev-continues . "Find the number of brackets to close to reduce the levels to the line-indent." ; adjust the levels until the highest indentation level is equal ; to the indentation of the next line. Then check for ; continuation. let closer : (bracketstoclose 0) (rest levels) let : : highest-level : list-ref rest 0 ; finish-condition if : = line-indent highest-level if prev-continues . bracketstoclose + 1 bracketstoclose if : > line-indent highest-level closer (- bracketstoclose 1) : append (list line-indent) rest closer (+ bracketstoclose 1) : list-tail rest 1 define : line-indent-brackets-to-open line-indent levels line-continues prev-continues . "Find the number of brackets to open to fit the line-indent and continuation marker." if line-continues . 0 . 1 define : line-indent-levels-adjust levels next-indent . "Add or remove levels so the highest remaining level matches next-indent." let adjuster : (lev levels) let : : highest-level : list-ref lev 0 if : = next-indent highest-level . lev if : > next-indent highest-level append (list next-indent) lev adjuster : list-tail lev 1 define : line-drop-continuation-dot line let : : content : line-content line list line-indent line if : line-continues? line string-drop content 2 . content line-comment line define : wisp2lisp-parse lisp prev lines . "Parse the body of the wisp-code." set! prev : wisp2lisp-add-inline-colon-brackets prev ; prev already is a code-line. if : not : line-continues? prev set! prev : line-add-starting-bracket prev set! lines : map-in-order wisp2lisp-add-inline-colon-brackets lines let bracketizer : (levels '(0)) (pre prev) (unprocessed lines) (processed lisp) (whitespace '()) ; levels is the list of levels, with the lowest to the right. i.e: '(12 8 4 0) ; once we processed everything, we pass the bracketizer pre as f one last time if : equal? #f : line-content pre . processed let : : next : if (equal? unprocessed '()) (list 0 #f #f) : list-ref unprocessed 0 ; this is the break condition for the next loop! if : line-empty-code? next ; empty lines get silently added, but otherwise ignored bracketizer levels pre list-tail unprocessed 1 . processed append whitespace : list next ; firstoff add the next indent to the levels, so we only work on the levels, prev-continues, next-continues and next-indent ; if pre was a continuation, the real levels are 1 lower than the counted levels let* : next-indent : line-indent next pre-indent : line-indent pre pre-continues : line-continues? pre next-continues : line-continues? next final-line : equal? #f : line-content next bracketstocloseprev : line-indent-brackets-to-close next-indent levels next-continues pre-continues bracketstoopennext : line-indent-brackets-to-open next-indent levels next-continues pre-continues newnext : if final-line next : if (> bracketstoopennext 0) (line-add-starting-bracket next) next newpre : line-drop-continuation-dot : line-add-closing-brackets pre bracketstocloseprev newlevels : line-indent-levels-adjust levels next-indent bracketizer newlevels newnext if final-line unprocessed : list-tail unprocessed 1 append processed (list newpre) whitespace list define : wisp2lisp-initial-comments lisp prev lines . "Keep all starting comments: do not start them with a bracket." let skip-initial-comments : (lisp lisp) (prev prev) (lines lines) if : = 0 : length lines ; file only contained comments, maybe including the hashbang . lisp if : line-empty-code? prev skip-initial-comments : append lisp : list prev . (list-ref lines 0) (list-tail lines 1) list lisp prev lines define : wisp2lisp-hashbang lisp prev unprocessed . "Parse a potential initial hashbang line." if and equal? lisp '() ; really the first line equal? 0 : line-indent prev string-prefix? "#!" : line-content prev wisp2lisp-hashbang : append lisp : list : line-merge-comment prev . (list-ref unprocessed 0) (list-tail unprocessed 1) list lisp prev unprocessed define : wisp2lisp lines . "Parse indentation in the lines to add the correct brackets." if : equal? lines '() . '() let : lisp '() ; the processed lines prev : list-ref lines 0 ; the last line unprocessed : list-tail lines 1 ; obvious :) let* : hashbanged : wisp2lisp-hashbang lisp prev unprocessed deinitialized : apply wisp2lisp-initial-comments hashbanged parsed : apply wisp2lisp-parse deinitialized . parsed ; first step: Be able to mirror a file to stdout let* : filename : list-ref ( command-line ) 1 text : read-whole-file filename ; Lines consist of lines with indent, content and comment. See ; line-indent, line-content, line-comment and the other ; line-functions for details. textlines : split-wisp-lines text lines : linestoindented textlines lisp : wisp2lisp lines ; display : list-ref lines 100 ; seems good let show : (processed '()) (unprocessed lisp) when : not : equal? unprocessed '() let : : next : list-ref unprocessed 0 ;display : length processed ;display "/" ;display : length unprocessed ;display ": " display : xsubstring " " 0 : line-indent next display : line-content next unless : equal? "" : line-comment next display ";" display : line-comment next newline show (append processed (list next)) (list-tail unprocessed 1) ; let : : line : list-ref lisp 158 ; display : line-indent line ; display "," ; display : line-content line ; display "," ; display : line-comment line ; looks good ; TODO: add brackets to the content ; TODO: undo linebreak-replacing. Needs in-string and in-comment ; checking, but only for each line, not spanning multiple lines. newline