#!/usr/bin/env python3 # wisp.py --- Whitespace-to-Lisp preprocessor. # Copyright (C) 2013 Arne Babenhauserheide <arne_bab@web.de> # Author: Arne Babenhauserheide <arne_bab@web.de> # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """whitespace to lisp converter. Essentially it just adds brackets for indentation to allow writing lisp with indentation senstitive syntax. Currently it is written in Python, because I like Python as language, but crave the power of lisp. """ def replaceinwisp(code, string, replacement): """Replace the given string with the replacement, but only in indentation sensitive parts of the code. Essentially replace everywhere except in brackets or strings. :param code: Arbitrary wisp code to process. :param string: A string to replace. :param replacement: The replacement string. :return: (code, count): The new code and a count of replacements. """ count = 0 instring = False incomment = False inbrackets = 0 strlen = len(string) for n in range(len(code) - strlen): i = code[n] # comments start with a ; - but only in regular wisp code. if not incomment and not instring and not inbrackets and i == ";" and not code[n-2:n] == "#\\": incomment = not incomment # a linebreak ends the comment if incomment: if i == "\n": incomment = not incomment # all processing stops in comments continue if i == '"' and not code[n-1:n] == "\\": instring = not instring # all processing stops in strings if instring: continue if i == "(" and not code[n-2:n] == "#\\": inbrackets += 1 elif i == ")" and not code[n-2:n] == "#\\": inbrackets -= 1 # all processing stops in brackets if inbrackets: continue # here we do the actual replacing if code[n:n+strlen] == string: count += 1 code = code[:n] + replacement + code[n+strlen:] return code, count class Line: def __init__(self, line): """Parse one line in which linebreaks within strings and brackets already got replaced by a temporary placeholder.""" # Visible indentation: If the line starts with any number of # _, followed by a space, treat those _ as spaces. if line.startswith("_"): for i,letter in enumerate(line): if letter != "_": # rewind the index to the last underscore i -= 1 break # increment the index to the first # non-underscore. Required to treat end of string and end # of underscores the same i += 1 # here line[i-1] is _. Check if line[i+1] is a space or if # the line ends after the last underscore if line[i:i+1] == " " or not line[i:]: line = (i)*" " + line[i:] # \_ escapes the underscore at the beginning of a line, so you # can use identifiers which only consist of underscores. elif line.startswith("\_"): line = "_" + line[2:] #: prefix to go around the outer bracket: '(, ,( or `( self.prefix = "" # check if this is a continuation of the parent line self.continues = line.lstrip().startswith(". ") if self.continues: self.content = line.lstrip()[2:].lstrip() else: self.content = line.lstrip() # check if the line is prefixed with any combination of ' ` and , if not self.continues: while (self.content.startswith("' ") or self.content.startswith(", ") or self.content.startswith("` ")): self.prefix += self.content[0] self.content = self.content[2:] # care for lines starting with ": " (a colon followed by a space and more chars) self.indent = len(line) - len(line.lstrip()) if self.content.startswith(": ") and self.content[2:].lstrip(): # just add a space in front of the " : ". Then it will be # captured by as inline : later. With this, the following are almost equal: # ": a b" and # ": # a b" # The only difference between both is that ": a b" cannot # have siblings in subsequent lines: The function call # ends on this line. self.content = " " + self.content if self.content.strip() == ":" or self.content.strip() == "": self.content = "" # split a possible comment self.comment = "" instring = False for n, i in enumerate(self.content): if i == '"' and not self.content[n-1:n] == "\\": instring = not instring if not instring and i == ";" and not self.content[n-2:n] == "#\\": self.comment = self.content[n+1:] self.content = self.content[:n] break # treat inline " : " as opening a bracket which gets closed at # the end of the line if the : is at the end of the line, add # () to avoid being dependent on whitespace at the end of the # line. bracketstoclose = 0 instring = False inbrackets = 0 # go backwards through the content to be able to leave out the # space after a colon without breaking later colons. for n, i in reversed(list(enumerate(self.content))): if i == '"' and not self.content[n-1:n] == "\\": instring = not instring if not instring and i == ")" and not self.content[n-2:n] == "#\\": inbrackets += 1 elif not instring and i == "(" and not self.content[n-2:n] == "#\\": inbrackets -= 1 if (not instring and not inbrackets and i == ":" and # optimization to be able to avoid string # slicing when there can be no hit. n # avoid content[-1:2] (which is an unnecessary # slicing, since it is always "" ): if self.content[n-1:n+2] == " : " or self.content[n-1:] == " :": bracketstoclose += 1 # we have to keep the space after the colon (" : " # → " ( "), otherwise we cannot use two # consecutive colons (" : : ") which would be surprising. self.content = self.content[:n] + "(" + self.content[n+2:] # after the full line processing, replace " \\: " "\n\\: " and # " \\:\n" (inside line, start of a line, end of a line) by " # : ", "\n: " and " :\n" respectively to allow escaping : as # expression. self.content, count = replaceinwisp(self.content, " \\: ", " : ") if self.content.startswith("\\: "): self.content = ": " + self.content[3:] elif self.content.endswith(" \\:"): self.content = self.content[:-3] + " :" elif self.content == "\\:": # empty function or variable call self.content = ":" # add closing brackets self.content += ")" * bracketstoclose #: Is the line effectively empty? self.empty = False onlycomment = (line.split(";")[1:] and # there is content after the comment sign not line.split(";")[0].count('"') % 2 and # but the first comment sign is not in a string not line.split(";")[0].strip()) # there is no content before the comment sign if line.strip() == "" or onlycomment: self.empty = True def nostringbreaks(code): """remove linebreaks inside strings (will be readded at the end)""" instring = False nostringbreaks = [] for n, char in enumerate(code): if char == '"' and not code[n-1:n] == "\\": instring = not instring if instring and char == "\n": nostringbreaks.append("\\LINEBREAK") else: nostringbreaks.append(char) return "".join(nostringbreaks) def nobracketbreaks(code): """remove linebreaks inside brackets (will be readded at the end).""" instring = False inbracket = 0 nostringbreaks = [] for n, char in enumerate(code): if char == '"' and not code[n-1:n] == "\\": instring = not instring if char == '(' and not instring and not code[n-2:n] == "#\\": inbracket += 1 elif char == ')' and not instring and not code[n-2:n] == "#\\": inbracket -= 1 if inbracket and char == "\n": nostringbreaks.append("\\LINEBREAK") else: nostringbreaks.append(char) return "".join(nostringbreaks) def processlines(lines, prev, codestartindex, levels, lisplines, emptylines): """Process all lines after the first.""" # process further lines: adjust the content of the current line, but only append for line in lines[codestartindex+1:]: # ignore empty lines and comment-only lines if line.empty: # simply keep empty lines and ignore their indentation # readd a possible comment if line.comment: line.content += ";" + line.comment # keep the line, do not track it in any way emptylines.append(line.indent * " " + line.content) continue # care for leading brackets # continuing lines do not get a leading bracket. if not line.continues: line.content = line.prefix + "(" + line.content # care for closing brackets # rising indent: sibling function or variable if line.indent > prev.indent: levels.append(line.indent) lisplines.append(prev.indent * " " + prev.content) # same indent: neighbour function of variable: close the previour lines bracket if line.indent == prev.indent: if not prev.continues: lisplines.append(prev.indent * " " + prev.content + ")") else: lisplines.append(prev.indent * " " + prev.content) # lower indent: parent funtion or variable. Find the number of brackets to close if prev.indent > line.indent: bracketstoclose = len([level for level in levels if level >= line.indent]) levels = levels[:-bracketstoclose + 1] if prev.continues: bracketstoclose -= 1 lisplines.append(prev.indent * " " + prev.content + ")" * bracketstoclose) # add a possible comment if prev.comment: lisplines[-1] += ";" + prev.comment prev = line lisplines.extend(emptylines) emptylines = [] # postprocessing the loop. if prev and prev.continues: levels.pop() if prev: lisplines.append(prev.indent * " " + prev.content + ")" * (len(levels))) lisplines.extend(emptylines) return prev, lisplines, emptylines, levels def wisp2lisp(code): """Turn wisp code to lisp code.""" # TODO: extract the shebang before preprocessing the code. # if the code is empty, just return an empty string if not code: return code # first get rid of linebreaks in strings code = nostringbreaks(code) # and of linebreaks inside brackets code = nobracketbreaks(code) # now read the indentation lines = [] for line in code.splitlines(): lines.append(Line(line)) # finally emit matching lisp code # write into the lisp lines with a delay of 1 line lisplines = [] # effectively empty lines to be appended emptylines = [] levels = [0] prev = lines[0] #: The index of the first code line codestartindex = 0 # process the first lines in the file. # Shebang lines must be used verbatim if not prev.indent and prev.content.startswith("#!"): codestartindex += 1 if prev.comment: prev.content += ";" + prev.comment lisplines.append(prev.content) if codestartindex < len(lines): prev = lines[codestartindex] else: prev = None # initial comment lines need special treatment to avoid starting # them with () (implementation detail) while prev and prev.empty: codestartindex += 1 if prev.comment: prev.content += ";" + prev.comment lisplines.append(prev.indent * " " + prev.content) if codestartindex < len(lines): prev = lines[codestartindex] else: prev = None if prev and not prev.continues: prev.content = prev.prefix + "(" + prev.content # run the linereader loop. This does the main work - aside from # the preprocessing in the Line class. if prev: prev, lisplines, emptylines, levels = processlines(lines, prev, codestartindex, levels, lisplines, emptylines) # postprocessing the resulting lisplines: the loop is not perfect… # get rid of brackets around empty lines for n,i in enumerate(lisplines): if i.lstrip() == "()": lisplines[n] = "" return "\n".join(lisplines).replace("\\LINEBREAK", "\n") if __name__ == "__main__": import sys import optparse parser = optparse.OptionParser("[-o outfile] [file | -]") parser.add_option("-o", "--output", default="") opts, args = parser.parse_args() if args: sourcefile = args[0] else: sourcefile = "example.w" # accept stdin as input if sourcefile == "-": wisp = sys.stdin.read() else: with open(sourcefile) as f: wisp = f.read() if opts.output: with open(opts.output, "w") as f: f.write(wisp2lisp(wisp) + "\n") else: print(wisp2lisp(wisp))