diff options
-rw-r--r-- | src/__init__.py | 38 | ||||
-rw-r--r-- | src/front/__init__.py | 34 | ||||
-rw-r--r-- | src/front/ast.py | 21 | ||||
-rw-r--r-- | src/front/lexer.py | 2 | ||||
-rw-r--r-- | src/front/parser.py | 255 | ||||
-rw-r--r-- | src/front/token.py | 2 |
6 files changed, 260 insertions, 92 deletions
diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..747efde --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,38 @@ +from front.lexer import Lexer +from front.parser import Parser +#from front.symbols import SymbolTable + +def main(): + source = '''fun fib[a] + if a < 2 + @1 + end + @( fib[a-1] + fib[a-2] ) +end + +# main function +fun main[] + sum = 0 + i = 0 + while (i < 10) + sum = sum + fib[i] + i = i + 1 + end + @sum +end''' + + #symbols = SymbolTable() + #lex = Lexer(source) + + # testing + #while True: + # token = lex.scan() + # print token.__repr__() + # if not token: + # break + + parse = Parser(Lexer(source)) + print parse.parse() + +if __name__ == "__main__": + main() diff --git a/src/front/__init__.py b/src/front/__init__.py deleted file mode 100644 index b72b440..0000000 --- a/src/front/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -from front.lexer import Lexer -from front.parser import Parser - -def main(): - source = '''fun fib[a] - if a < 2 - @1 - @( fib[a-1] + fib[a-2] ) -end - -# main function -fun main[] - sum = 0 - i = 0 - while (i < 10) - sum = sum + fib[i = i + 1] - end - @sum -end''' - - lex = Lexer(source) - - # testing - while True: - token = lex.scan() - print token.__repr__() - if not token: - break - - # parse = Parser(lex) - # parse.program() - -if __name__ == "__main__": - main() diff --git a/src/front/ast.py b/src/front/ast.py index 5b4def7..751d38d 100644 --- a/src/front/ast.py +++ b/src/front/ast.py @@ -14,6 +14,9 @@ class Program(Node): return 0 + def __repr__(self): + return "<Program: %s>" % self.functions + class Function(Node): def __init__(self, name, params, statements, lineno = -1): self.name = name @@ -22,7 +25,7 @@ class Function(Node): self.lineno = lineno def __repr__(self): - return "<Function: %s at line %d>" % (self.name, self.lineno) + return "<Function: %s %s %s>" % (self.name, str(self.params), str(self.statements)) def eval(self,tacarray): return tacarray.append(te) @@ -185,3 +188,19 @@ class BinaryExpression(Expression): te = TacElem(op,left,right) return tacarray.append(te) + +class Variable(Expression): + def __init__(self, name, lineno = -1): + self.name = name + self.lineno = lineno + + def __repr__(self): + return "<Variable: %s at line %d>" %(self.name, self.lineno) + +class Constant(Expression): + def __init__(self, value, lineno = -1): + self.value = value + self.lineno = lineno + + def __repr__(self): + return "<Variable: %d at line %d>" %(self.value, self.lineno) diff --git a/src/front/lexer.py b/src/front/lexer.py index ff67e6d..f2810cd 100644 --- a/src/front/lexer.py +++ b/src/front/lexer.py @@ -4,7 +4,7 @@ from token import * class Lexer: def __init__(self, source): - self.source = source.splitlines() + self.source = (source + "\n\n").splitlines() self.source.reverse() self.line = 0 self.doubleNewlineCheck = False diff --git a/src/front/parser.py b/src/front/parser.py index 43606a2..29e300d 100644 --- a/src/front/parser.py +++ b/src/front/parser.py @@ -1,93 +1,236 @@ -class Parser: - lexer = None +# -*- coding: utf-8 -*- +from ast import * +from lexer import * +from token import * - def __init__(self, lexer): +class Parser: + def __init__(self, lex): + self.lexer = lex + self.token = lex.scan() + self.count = 1 #debug return def parse(self): return self.program() def move(self): + print self.count, self.token #debug + self.count += 1 #debug + self.token = self.lexer.scan() return def error(self, msg): - return - - def match(tag): - return - - # bool = bool "||" join | join. - def bool(self): - return - - # join = join "&&" equality | equality. + raise Exception(msg) + + def match(self, tag): + if self.token == None: + self.error("Unexpected end of file.") + + if self.token.tag != tag: + self.error("match: expected %s got %s\n" %(tag, self.token.tag)) + val = self.token.value + self.move() + return val + + # boolean = join { "||" join }. + def boolean(self): + res = self.join() + while self.token.tag == Tag.OPERATOR and self.token.value == "||": + self.move() + res = BinaryExpression(res, "||", self.join()) + return res + + # join = equality { "&&" equality }. def join(self): - return + res = self.equality() + while self.token.tag == Tag.OPERATOR and self.token.value == "&&": + self.move() + res = BinaryExpression(res, "&&", self.relation()) + return res - # equality = equality "==" relation | equality "!=" relation | relation. + # equality = relation { ( "==" | "!=" ) relation }. def equality(self): - return + res = self.relation() + while self.token.tag == Tag.OPERATOR and self.token.value in ["==", "!="]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.relation()) + return res - # relation = expression "<" expression | expression "<=" expression | expression ">=" expression | expression ">" expression | expression. + # relation = expression { ( "<" | "<=" | ">=" | ">" ) expression }. def relation(self): - return + res = self.expression() + while self.token.tag == Tag.OPERATOR and self.token.value in ["<", "<=", ">=", ">"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.expression()) + return res - # expression = expression "+" term | expression "-" term | term. + # expression = term { ( "+" | "-" ) term }. def expression(self): - return + res = self.term() + while self.token.tag == Tag.OPERATOR and self.token.value in ["+", "-"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.term()) + return res - # term = term "*" unary | term "/" unary | unary. + # term = unary { ( "*" | "/" | "%" ) unary }. def term(self): - return + res = self.unary() + while self.token.tag == Tag.OPERATOR and self.token.value in ["*", "/", "%"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.unary()) + return res # unary = "!" unary | "-" unary | factor. def unary(self): - return - - # factor = "(" bool ")" | ident | integer | "true" | "false". + if self.token.tag == Tag.OPERATOR and self.token.value == "!": + self.move() + return UnaryExpression("!", self.unary()) + if self.token.tag == Tag.OPERATOR and self.token.value == "-": + self.move() + return UnaryExpression("-", self.unary()) + return self.factor() + + # factor = "(" boolean ")" | integer | "true" | "false" | ident | function_call. def factor(self): - return + if self.token.tag == Tag.LPAREN: + self.move() + res = self.boolean() + self.match(Tag.RPAREN) + return res + if self.token.tag == Tag.NUMBER: + return Constant(self.match(Tag.NUMBER)) + if self.token.tag == Tag.TRUE: + self.move() + return Constant(1) + if self.token.tag == Tag.FALSE: + self.move() + return Constant(0) + ''' Wegen der Änderung in function_call() kann die Grammatik nicht + mehr mit einem Lookahead von 1 geparst werden. Die Lösung hier + ist, den Aufruf von function_call() zu "inlinen". Muss irgendwann + aber geändert werden. Eine Lösung wäre Funktionen mit call + aufzurufen oder eben auf LL(2) oder LR umzusteigen. + ''' + if self.token.tag == Tag.IDENT: + name = self.match(Tag.IDENT) + if self.token.tag == Tag.LBRAK: # function_call + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) + return Variable(name)# variable + + error("unknown factor begins with " + self.token.tag) + + # function_call = ident "[" [ expression_list ] "]". + ''' function_call muss eine expression sein sonst geht alles folgende nicht: + foo = bar[] + @foo[bar] + spam[5] + eggs[1] + andererseits ist function_call dann kein statement mehr + exit[] + wäre also falsch. Der Aufruf muss in einem assignment verkapselt werden: + dummy = exit[] + ''' + def function_call(self): + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) # ident_list = ident { "," ident }. def ident_list(self): - return + ident = [self.match(Tag.IDENT)] + while self.token.tag == Tag.COMMA: + self.move() + ident.append(self.match(Tag.IDENT)) + return ident - # expression_list = expression { "," expression }. + # expression_list = boolean { "," boolean }. def expression_list(self): - return - - # program = function { function }. - def program(self): - return - - # function = "fun" ident "[" [ ident_list ] "]" statements "end". + exp = [self.boolean()] + while self.token.tag == Tag.COMMA: + self.move() + exp.append(self.boolean()) + return exp + + # function_list = function { function }. + def function_list(self): + funcs = [self.function()] + while self.token: + funcs.append(self.function()) + return funcs + + # statement_list = statement { statement }. + def statement_list(self): + stat = [self.statement()] + while self.token.tag not in [Tag.END, Tag.ELSE]: + stat.append(self.statement()) + return stat + + # function = "fun" ident "[" [ ident_list ] "]" nl statement_list "end" nl. def function(self): - return - - # statements = statement { nl statement }. - def statements(self): - return - - # statement = [ if_statement | while_statement | assignment | function_call | return_statement ]. + self.match(Tag.FUN) + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.ident_list() + self.match(Tag.RBRAK) + self.match(Tag.NEWLINE) + block = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return Function(name, args, block) + + # statement = [ if_statement | while_statement | assign_statement | return_statement ]. def statement(self): - return + return {Tag.IF: self.if_statement, + Tag.WHILE: self.while_statement, + Tag.IDENT: self.assignment, + Tag.RETURN: self.return_statement + }[self.token.tag]() - # if_statement = "if" expression statements [ "else" statements ] "end". + # if_statement = "if" boolean nl statement_list [ "else" [ nl ] statement_list ] "end" nl. def if_statement(self): - return - - # while_statement = "while" expression statements "end". + self.match(Tag.IF) + condition = self.boolean() + self.match(Tag.NEWLINE) + true_case = self.statement_list() + else_case = [] + if self.token.tag == Tag.ELSE: + self.move() + if self.token.tag == Tag.NEWLINE: self.move() + else_case = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return IfStatement(condition, true_case, else_case) + + # while_statement = "while" boolean nl statement_list "end" nl. def while_statement(self): - return + self.match(Tag.WHILE) + condition = self.boolean() + self.match(Tag.NEWLINE) + body = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return WhileStatement(condition, body) + + # return_statement = "@" boolean nl. + def return_statement(self): + self.match(Tag.RETURN) + exp = self.boolean() + self.match(Tag.NEWLINE) + return ReturnStatement(exp) - # assignment = ident "=" expression. + # assignment = ident "=" boolean nl. def assignment(self): - return + name = self.match(Tag.IDENT) + self.match(Tag.ASSIGNMENT) + exp = self.boolean() + self.match(Tag.NEWLINE) + return AssignStatement(name, exp) - # function_call = ident "[" [ expression_list ] "]". - def function_call(self): - return + # program = function_list. + def program(self): + return Program(self.function_list()) - # return_statement = "@" expression. - def return_statement(self): - return diff --git a/src/front/token.py b/src/front/token.py index e1bc8d8..bfcc082 100644 --- a/src/front/token.py +++ b/src/front/token.py @@ -30,6 +30,8 @@ Tag.FUN = Tag("FUN") Tag.ASSIGNMENT = Tag("ASSIGNMENT") Tag.RETURN = Tag("RETURN") Tag.OPERATOR = Tag("OPERATOR") +Tag.TRUE = Tag("TRUE") +Tag.FALSE = Tag("FALSE") class Token(object): __slots__ = ["tag","value"] |