From 8c9fb246794f7cb94689c348f104b29433416f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20N=C3=BC=C3=9Flein?= Date: Tue, 30 Jun 2009 20:57:09 +0200 Subject: implemented scope and most of ast --- src/__init__.py | 24 +---- src/front/ast.py | 282 ++++++++++++++++++++++++++++++++-------------------- src/front/lexer.py | 25 ++--- src/front/parser.py | 59 ++++++----- src/front/scope.py | 20 +++- 5 files changed, 238 insertions(+), 172 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index ad0278b..89bcdc6 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,26 +1,12 @@ from front.lexer import Lexer from front.parser import Parser +from front.scope import Scope +import sys def main(): - source = '''fun fib[a] - if a < 2 - @1 - end - @( call fib[a-1] + call fib[a-2] ) -end - -# main function -fun main[] - sum = 0 - i = 0 - while (i < 10) - sum = sum + call fib[i] - i = i + 1 - end - @sum -end''' - - print Parser(Lexer(source)).parse() + ast = Parser(Lexer(sys.stdin.read())).parse() + scope = Scope() + ast.generate() if __name__ == "__main__": main() diff --git a/src/front/ast.py b/src/front/ast.py index d6d4a3f..f221440 100644 --- a/src/front/ast.py +++ b/src/front/ast.py @@ -1,57 +1,128 @@ from back.tac import * +import sys class Node(object): - pass + labels = 0 + + def newlabel(self): + Node.labels += 1 + return Node.labels + + def emitlabel(self, i): + sys.stdout.write("L" + str(i) + ":") + sys.stdout.flush() + + def emit(self, s): + sys.stdout.write("\t" + s + "\n") + sys.stdout.flush() + + def debug(self, msg): + #sys.stdout.write("DEBUG: " + msg + "\n") + #sys.stdout.flush() + return class Program(Node): def __init__(self, functions, lineno = -1): self.functions = functions self.lineno = lineno - def eval(self,tacarray): + def generate(self): + self.debug("Program.generate(): functions = %s" % self.functions) for function in self.functions: - function = function.eval(tacarray) - - return 0 + function.generate() def __repr__(self): return "" % self.functions class Function(Node): - def __init__(self, name, params, statements, lineno = -1): + def __init__(self, name, params, statement, lineno = -1): self.name = name self.params = params - self.statements = statements + self.statement = statement self.lineno = lineno - def __repr__(self): - return "" % (self.name, str(self.params), self.lineno, str(self.statements)) + def generate(self): + self.debug("Function.generate(): name = %s, params = %s, statement = %s" % (self.name, self.params, self.statement)) + print "%s:" % self.name + begin = self.newlabel() + after = self.newlabel() + self.emitlabel(begin) + self.statement.generate(begin, after) + self.emitlabel(after) + print - def eval(self,tacarray): - return tacarray.append(te) + def __repr__(self): + return "" % (self.name, str(self.params), self.lineno, str(self.statement)) class Statement(Node): pass +class Sequence(Statement): + def __init__(self, s1, s2, lineno = -1): + self.s1 = s1 + self.s2 = s2 + self.lineno = lineno + + def generate(self, before, after): + self.debug("Sequence.generate(before = %d, after = %d): s1 = %s, s2 = %s" % (before, after, self.s1, self.s2)) + if not self.s1: + self.s2.generate(before, after) + elif not self.s2: + self.s1.generate(before, after) + else: + label = self.newlabel() + self.s1.generate(before, label) + self.emitlabel(label) + self.s2.generate(label, after) + + def __repr__(self): + return "" % (self.lineno, self.s1, self.s2) + class IfStatement(Statement): - def __init__(self, expression, true_statements, false_statements, lineno = -1): + def __init__(self, expression, true_statement, false_statement, lineno = -1): self.expression = expression - self.true_statements = true_statements - self.false_statements = false_statements + self.true_statement = true_statement + self.false_statement = false_statement self.lineno = lineno + def generate(self, before, after): + self.debug("IfStatement.generate(before = %d, after = %d): expression = %s, true = %s, false = %s" % (before, after, self.expression, self.true_statement, self.false_statement)) + + x = self.expression.reduce() + + if self.false_statement: + true_label = self.newlabel() + false_label = self.newlabel() + self.emit("iffalse %s goto L%d" % (x, false_label)) + self.emitlabel(true_label) + self.true_statement.generate(true_label, after) + self.emit("goto L%d" % after) + self.emitlabel(false_label) + self.false_statement.generate(false_label, after) + else: + true_label = self.newlabel() + self.emit("iffalse %s goto L%d" % (x, after)) + self.emitlabel(true_label) + self.true_statement.generate(true_label, after) + def __repr__(self): return "" % self.lineno - def eval(self,tacarray): - return tacarray.append(te) - class WhileStatement(Statement): - def __init__(self, expression, statements, lineno = -1): + def __init__(self, expression, statement, lineno = -1): self.expression = expression - self.statements = statements + self.statement = statement self.lineno = lineno + def generate(self, before, after): + self.debug("WhileStatement.generate(before = %d, after = %d): expression = %s, statement = %s" % (before, after, self.expression, self.statement)) + x = self.expression.reduce() + self.emit("iffalse %s goto L%d" % (x, after)) + label = self.newlabel() + self.emitlabel(label) + self.statement.generate(label, before) + self.emit("goto L%d" % before) + def __repr__(self): return "" % self.lineno @@ -63,144 +134,135 @@ class ReturnStatement(Statement): self.expression = expression self.lineno = lineno + def generate(self, before, after): + self.debug("ReturnStatement.generate(before = %d, after = %d): expression = %s" % (before, after, self.expression)) + self.emit("return %s" % self.expression.reduce()) + def __repr__(self): return "" % self.lineno - def eval(self,tacarray): - - if isinstance(expression,Node): - expression = expression.eval(tacarray) - - te = TacElem(Op.RETURN,expression) - return tacarray.append(te) - class AssignStatement(Statement): def __init__(self, ident, expression, lineno = -1): self.ident = ident self.expression = expression self.lineno = lineno + def generate(self, before, after): + self.debug("AssignStatement.generate(before = %d, after = %d): ident = %s, expression = %s" % (before, after, self.ident, self.expression)) + self.emit("%s = %s" % (self.ident, self.expression.reduce())) + def __repr__(self): return "" % (self.ident, self.lineno) - def eval(self,tacarray): - if isinstance(expression,Node): - expression = expression.eval(tacarray) - yield self.expression - - class FunctionCall(Statement): def __init__(self, ident, arguments = [], lineno = -1): self.ident = ident self.arguments = arguments self.lineno = lineno + def generate(self): + self.debug("FunctionCall.generate(): ident = %s, arguments = %s" % (self.ident, self.arguments)) + self.emit("call %s" % self.ident) + + def reduce(self): + self.debug("FunctionCall.reduce(): %s" % self) + t = Temporary() + self.emit("%s = call %s%s" % (t, self.ident, self.arguments)) + return t + def __repr__(self): return "" % (self.ident, self.lineno) - def eval(self,tacarray): - """ Function call will be seperated in all the Op.PARAMs first, and the - Op.CALL afterwards""" - - # XXX can there be arguments with multiple expressions bla((x+y-3),z)? - # -> have to look into that, when it comes to param-number - # for every parameter create a TacElem - #for argument in self.arguments: - # if isinstance(argument,Node): - # argument = argument.eval(tacarray) - # TacElem(Op.PARAM,argument) - - # in the end create the CALL-TacElem with the number of parameters - te = TacElem(Op.CALL,self.ident,len(self.arguments)) - return tacarray.append(te) +class Expression(Node): + def __init__(self, op, lineno = -1): + self.op = op + self.lineno = -1 + def generate(self): + return self -class Expression(Node): - pass + def reduce(self): + return self -class UnaryExpression(Expression): - """ Unary expressions are !x or -x ...""" - def __init__(self, op, right, lineno = -1): - self.op = op - self.right = right - self.lineno = lineno + def __str__(self): + return str(self.op) def __repr__(self): - return "" % (self.op, self.right, self.lineno) + return "" % self.lineno - def eval(self,tacarray): - right = self.right - op = None +class Operation(Expression): + def __init__(self, op, lineno = -1): + super(Operation, self).__init__(op, lineno) - if (self.op == "not"): - op = Op.NOT - if (self.op == "minus"): - op = Op.MINUS + def reduce(self): + self.debug("Operation.reduce(): %s" % self) + x = self.generate() + t = Temporary() + self.emit("%s = %s" % (t, x)) + return t - if op == None: - return None + def __repr__(self): + return "" % self.lineno - if isinstance(right,Node): - right = right.eval(tacarray) +class UnaryExpression(Operation): + """ Unary expressions are !x or -x ...""" + def __init__(self, op, right, lineno = -1): + super(UnaryExpression, self).__init__(op, lineno) + self.right = right - te = TacElem(op,right) - return tacarray.append(te) + def generate(self): + self.debug("UnaryExpression.generate(): op = %s, right = %s" % (self.op, self.right)) + return UnaryExpression(self.op, self.right.reduce()) + def __str__(self): + return "%s %s" % (str(self.op), str(self.right)) -class BinaryExpression(Expression): + def __repr__(self): + return "" % (self.op, self.right, self.lineno) + +class BinaryExpression(Operation): """ Biary expression are x+y, x-y, ... """ def __init__(self, left, op, right, lineno = -1): + super(BinaryExpression, self).__init__(op, lineno) self.left = left - self.op = op self.right = right - self.lineno = lineno - def __repr__(self): - return "" % (self.op, self.left, self.right, self.lineno) + def generate(self): + self.debug("BinaryExpression.generate(): left = %s, op = %s, right = %s" % (self.left, self.op, self.right)) + return BinaryExpression(self.left.reduce(), self.op, self.right.reduce()) + def __str__(self): + return "%s %s %s" % (str(self.left), str(self.op), str(self.right)) - def eval(self,tacarray): - op = None - if (self.op == "+"): - op = Op.ADD - if (self.op == "-"): - op = Op.SUB - if (self.op == "*"): - op= Op.MUL - if (self.op == "/"): - op= Op.DIV - if (self.op == "%"): - op= Op.MOD - if (self.op == "and"): - op= Op.AND - if (self.op == "or"): - op= Op.OR - - if op == None: - return None - - if isinstance(left,Node): - left = left.eval(tacarray) - - if isinstance(right,Node): - right = right.eval(tacarray) - - te = TacElem(op,left,right) - return tacarray.append(te) - + def __repr__(self): + return "" % (self.op, self.left, self.right, self.lineno) class Variable(Expression): def __init__(self, name, lineno = -1): - self.name = name - self.lineno = lineno + super(Variable, self).__init__(name, lineno) def __repr__(self): - return "" %(self.name, self.lineno) + return "" %(self.op, self.lineno) class Constant(Expression): def __init__(self, value, lineno = -1): - self.value = value - self.lineno = lineno + super(Constant, self).__init__(value, lineno) + + def jumping(self, true_label, false_label): + if self.op and true_label != 0: + self.emit("goto L%d" % true_label) + elif not self.op and false_label != 0: + self.emit("goto L%d" % false_label) def __repr__(self): - return "" %(self.value, self.lineno) + return "" %(self.op, self.lineno) + +class Temporary(Expression): + count = 0 + + def __init__(self): + self.number = Temporary.count + Temporary.count += 1 + + def __str__(self): + return "t%d" % self.number diff --git a/src/front/lexer.py b/src/front/lexer.py index 53ccaec..e6eb172 100644 --- a/src/front/lexer.py +++ b/src/front/lexer.py @@ -4,10 +4,10 @@ from token import * class Lexer: def __init__(self, source): - self.source = (source + "\n\n").splitlines() + self.source = (source + "\n").splitlines() self.source.reverse() self.line = 0 - self.doubleNewlineCheck = False + self.lastWasNewline = True self.currentLine = '' # reservierte Wörter initialisieren @@ -31,31 +31,32 @@ class Lexer: return def scan(self): + # leerzeichen entfernen + self.currentLine = self.currentLine.strip() + # wenn in der aktuellen Zeile nichts mehr steht - if (len(self.currentLine) == 0): + if len(self.currentLine) == 0: # wenn source zuende, dann None zurückgeben - if (len(self.source) <= 0): + if len(self.source) <= 0: return None # nächste Zeile auslesen self.line = self.line + 1 self.currentLine = self.source.pop() - # newline zurückgeben - if self.doubleNewlineCheck: - self.doubleNewlineCheck = False - return Token(Tag.NEWLINE) + # nur ein newline zurückgeben + if self.lastWasNewline: + return self.scan() - # leerzeichen entfernen - self.currentLine = self.currentLine.strip() + self.lastWasNewline = True + return Token(Tag.NEWLINE) # bei Kommentar, Rest der Zeile ignorieren if self.currentLine.startswith('#'): self.currentLine = '' return self.scan() - # keine doppelten Newlines - self.doubleNewlineCheck = True + self.lastWasNewline = False # Token parsen if self.currentLine.startswith('@'): diff --git a/src/front/parser.py b/src/front/parser.py index 610a5bb..511e929 100644 --- a/src/front/parser.py +++ b/src/front/parser.py @@ -1,37 +1,41 @@ # -*- coding: utf-8 -*- from ast import * -from lexer import * -from token import * +from token import Tag +from scope import Scope + +class ParseError(Exception): + pass class Parser: - def __init__(self, lex): - self.lexer = lex - self.token = lex.scan() - self.count = 1 #debug - return + def __init__(self, lexer): + self.lexer = lexer + self.scope = Scope() + self.move() def parse(self): return self.program() def move(self): - print self.count, self.token #debug - self.count += 1 #debug self.token = self.lexer.scan() - return + #print "move(): %s" % self.token def error(self, msg): - raise Exception(msg) + raise ParseError(msg) - def match(self, tag): + def _match(self, tag): if self.token == None: self.error("Unexpected end of file.") - if self.token.tag != tag: self.error("match: expected %s got %s\n" %(tag, self.token.tag)) val = self.token.value self.move() return val + def match(self, tag, *tags): + for t in (tag,) + tags: + res = self._match(t) + return res + # boolean = join { "||" join }. def boolean(self): res = self.join() @@ -94,8 +98,7 @@ class Parser: # function_call = "call" ident "[" [ expression_list ] "]". def function_call(self): - self.match(Tag.CALL) - name = self.match(Tag.IDENT) + name = self.match(Tag.CALL, Tag.IDENT) self.match(Tag.LBRAK) args = [] if self.token.tag == Tag.RBRAK else self.expression_list() self.match(Tag.RBRAK) @@ -126,23 +129,20 @@ class Parser: # statement_list = statement { statement }. def statement_list(self): - stat = [self.statement()] - while self.token.tag not in [Tag.END, Tag.ELSE]: - stat.append(self.statement()) - return stat + if self.token.tag in [Tag.END, Tag.ELSE]: + return None + return Sequence(self.statement(), self.statement_list(), self.lexer.line) # function = "fun" ident "[" [ ident_list ] "]" nl statement_list "end" nl. def function(self): line = self.lexer.line - self.match(Tag.FUN) - name = self.match(Tag.IDENT) + name = self.match(Tag.FUN, Tag.IDENT) self.match(Tag.LBRAK) args = [] if self.token.tag == Tag.RBRAK else self.ident_list() - self.match(Tag.RBRAK) - self.match(Tag.NEWLINE) + self.match(Tag.RBRAK, Tag.NEWLINE) + self.scope.new(name, args) block = self.statement_list() - self.match(Tag.END) - self.match(Tag.NEWLINE) + self.match(Tag.END, Tag.NEWLINE) return Function(name, args, block, line) # statement = [ if_statement | while_statement | assign_statement | return_statement | function_call ]. @@ -161,13 +161,12 @@ class Parser: condition = self.boolean() self.match(Tag.NEWLINE) true_case = self.statement_list() - else_case = [] + else_case = None if self.token.tag == Tag.ELSE: self.move() self.match(Tag.NEWLINE) else_case = self.statement_list() - self.match(Tag.END) - self.match(Tag.NEWLINE) + self.match(Tag.END, Tag.NEWLINE) return IfStatement(condition, true_case, else_case, line) # while_statement = "while" boolean nl statement_list "end" nl. @@ -177,8 +176,7 @@ class Parser: condition = self.boolean() self.match(Tag.NEWLINE) body = self.statement_list() - self.match(Tag.END) - self.match(Tag.NEWLINE) + self.match(Tag.END, Tag.NEWLINE) return WhileStatement(condition, body, line) # return_statement = "@" boolean nl. @@ -196,6 +194,7 @@ class Parser: self.match(Tag.ASSIGNMENT) exp = self.boolean() self.match(Tag.NEWLINE) + self.scope.add(name) return AssignStatement(name, exp, line) # program = function_list. diff --git a/src/front/scope.py b/src/front/scope.py index 0df8729..a15ed1d 100644 --- a/src/front/scope.py +++ b/src/front/scope.py @@ -1,6 +1,24 @@ class Scope(object): __shared_state = {} - functions = {} + __current_function = None + __functions = {} def __init__(self): self.__dict__ = self.__shared_state + + def new(self, name, symbols = []): + if getattr(self.__functions, name, None): + raise ScopeError("multiple definitions of %s" % name) + self.__functions[name] = [] + self.__current_function = name + self.add(symbols) + + def add(self, symbols): + if type(symbols) != type([]): + symbols = [symbols] + for symbol in symbols: + if symbol not in self.__functions[self.__current_function]: + self.__functions[self.__current_function].append(symbol) + + def __str__(self): + return "" % str(self.__functions) -- cgit v1.2.3