From 41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Tue, 23 Jun 2009 03:21:32 +0200 Subject: added parser - commited parser (Michael Popov) - changed encoding of parser.py to utf8 --- src/front/__init__.py | 26 +++--- src/front/ast.py | 21 ++++- src/front/parser.py | 252 +++++++++++++++++++++++++++++++++++++++----------- src/front/token.py | 2 + 4 files changed, 232 insertions(+), 69 deletions(-) diff --git a/src/front/__init__.py b/src/front/__init__.py index fdf35a8..747efde 100644 --- a/src/front/__init__.py +++ b/src/front/__init__.py @@ -1,11 +1,12 @@ from front.lexer import Lexer from front.parser import Parser -from front.symbols import SymbolTable +#from front.symbols import SymbolTable def main(): source = '''fun fib[a] if a < 2 @1 + end @( fib[a-1] + fib[a-2] ) end @@ -14,23 +15,24 @@ fun main[] sum = 0 i = 0 while (i < 10) - sum = sum + fib[i = i + 1] + sum = sum + fib[i] + i = i + 1 end @sum end''' - symbols = SymbolTable() - lex = Lexer(source) + #symbols = SymbolTable() + #lex = Lexer(source) # testing - while True: - token = lex.scan() - print token.__repr__() - if not token: - break - - # parse = Parser(lex) - # parse.program() + #while True: + # token = lex.scan() + # print token.__repr__() + # if not token: + # break + + parse = Parser(Lexer(source)) + print parse.parse() if __name__ == "__main__": main() diff --git a/src/front/ast.py b/src/front/ast.py index 5b4def7..751d38d 100644 --- a/src/front/ast.py +++ b/src/front/ast.py @@ -14,6 +14,9 @@ class Program(Node): return 0 + def __repr__(self): + return "" % self.functions + class Function(Node): def __init__(self, name, params, statements, lineno = -1): self.name = name @@ -22,7 +25,7 @@ class Function(Node): self.lineno = lineno def __repr__(self): - return "" % (self.name, self.lineno) + return "" % (self.name, str(self.params), str(self.statements)) def eval(self,tacarray): return tacarray.append(te) @@ -185,3 +188,19 @@ class BinaryExpression(Expression): te = TacElem(op,left,right) return tacarray.append(te) + +class Variable(Expression): + def __init__(self, name, lineno = -1): + self.name = name + self.lineno = lineno + + def __repr__(self): + return "" %(self.name, self.lineno) + +class Constant(Expression): + def __init__(self, value, lineno = -1): + self.value = value + self.lineno = lineno + + def __repr__(self): + return "" %(self.value, self.lineno) diff --git a/src/front/parser.py b/src/front/parser.py index 43606a2..9e738d3 100644 --- a/src/front/parser.py +++ b/src/front/parser.py @@ -1,93 +1,233 @@ -class Parser: - lexer = None +# -*- coding: utf-8 -*- +from ast import * +from lexer import * +from token import * - def __init__(self, lexer): +class Parser: + def __init__(self, lex): + self.lexer = lex + self.token = lex.scan() + self.count = 1 #debug return def parse(self): return self.program() def move(self): + print self.count, self.token #debug + self.count += 1 #debug + self.token = self.lexer.scan() return def error(self, msg): - return - - def match(tag): - return - - # bool = bool "||" join | join. - def bool(self): - return - - # join = join "&&" equality | equality. + raise Exception(msg) + + def match(self, tag): + if self.token.tag != tag: + self.error("match: expected %s got %s\n" %(tag, self.token.tag)) + val = self.token.value + self.move() + return val + + # boolean = join { "||" join }. + def boolean(self): + res = self.join() + while self.token.tag == Tag.OPERATOR and self.token.value == "||": + self.move() + res = BinaryExpression(res, "||", self.join()) + return res + + # join = equality { "&&" equality }. def join(self): - return + res = self.equality() + while self.token.tag == Tag.OPERATOR and self.token.value == "&&": + self.move() + res = BinaryExpression(res, "&&", self.relation()) + return res - # equality = equality "==" relation | equality "!=" relation | relation. + # equality = relation { ( "==" | "!=" ) relation }. def equality(self): - return + res = self.relation() + while self.token.tag == Tag.OPERATOR and self.token.value in ["==", "!="]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.relation()) + return res - # relation = expression "<" expression | expression "<=" expression | expression ">=" expression | expression ">" expression | expression. + # relation = expression { ( "<" | "<=" | ">=" | ">" ) expression }. def relation(self): - return + res = self.expression() + while self.token.tag == Tag.OPERATOR and self.token.value in ["<", "<=", ">=", ">"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.expression()) + return res - # expression = expression "+" term | expression "-" term | term. + # expression = term { ( "+" | "-" ) term }. def expression(self): - return + res = self.term() + while self.token.tag == Tag.OPERATOR and self.token.value in ["+", "-"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.term()) + return res - # term = term "*" unary | term "/" unary | unary. + # term = unary { ( "*" | "/" | "%" ) unary }. def term(self): - return + res = self.unary() + while self.token.tag == Tag.OPERATOR and self.token.value in ["*", "/", "%"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.unary()) + return res # unary = "!" unary | "-" unary | factor. def unary(self): - return - - # factor = "(" bool ")" | ident | integer | "true" | "false". + if self.token.tag == Tag.OPERATOR and self.token.value == "!": + self.move() + return UnaryExpression("!", self.unary()) + if self.token.tag == Tag.OPERATOR and self.token.value == "-": + self.move() + return UnaryExpression("-", self.unary()) + return self.factor() + + # factor = "(" boolean ")" | integer | "true" | "false" | ident | function_call. def factor(self): - return + if self.token.tag == Tag.LPAREN: + self.move() + res = self.boolean() + self.match(Tag.RPAREN) + return res + if self.token.tag == Tag.NUMBER: + return Constant(self.match(Tag.NUMBER)) + if self.token.tag == Tag.TRUE: + self.move() + return Constant(1) + if self.token.tag == Tag.FALSE: + self.move() + return Constant(0) + ''' Wegen der Änderung in function_call() kann die Grammatik nicht + mehr mit einem Lookahead von 1 geparst werden. Die Lösung hier + ist, den Aufruf von function_call() zu "inlinen". Muss irgendwann + aber geändert werden. Eine Lösung wäre Funktionen mit call + aufzurufen oder eben auf LL(2) oder LR umzusteigen. + ''' + if self.token.tag == Tag.IDENT: + name = self.match(Tag.IDENT) + if self.token.tag == Tag.LBRAK: # function_call + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) + return Variable(name)# variable + + error("unknown factor begins with " + self.token.tag) + + # function_call = ident "[" [ expression_list ] "]". + ''' function_call muss eine expression sein sonst geht alles folgende nicht: + foo = bar[] + @foo[bar] + spam[5] + eggs[1] + andererseits ist function_call dann kein statement mehr + exit[] + wäre also falsch. Der Aufruf muss in einem assignment verkapselt werden: + dummy = exit[] + ''' + def function_call(self): + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) # ident_list = ident { "," ident }. def ident_list(self): - return + ident = [self.match(Tag.IDENT)] + while self.token.tag == Tag.COMMA: + self.move() + ident.append(self.match(Tag.IDENT)) + return ident - # expression_list = expression { "," expression }. + # expression_list = boolean { "," boolean }. def expression_list(self): - return - - # program = function { function }. - def program(self): - return - - # function = "fun" ident "[" [ ident_list ] "]" statements "end". + exp = [self.boolean()] + while self.token.tag == Tag.COMMA: + self.move() + exp.append(self.boolean()) + return exp + + # function_list = function { function }. + def function_list(self): + funcs = [self.function()] + while self.token != None: + funcs.append(self.function()) + return funcs + + # statement_list = statement { statement }. + def statement_list(self): + stat = [self.statement()] + while self.token.tag not in [Tag.END, Tag.ELSE]: + stat.append(self.statement()) + return stat + + # function = "fun" ident "[" [ ident_list ] "]" nl statement_list "end" nl. def function(self): - return - - # statements = statement { nl statement }. - def statements(self): - return - - # statement = [ if_statement | while_statement | assignment | function_call | return_statement ]. + self.match(Tag.FUN) + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.ident_list() + self.match(Tag.RBRAK) + self.match(Tag.NEWLINE) + block = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return Function(name, args, block) + + # statement = [ if_statement | while_statement | assign_statement | return_statement ]. def statement(self): - return + return {Tag.IF: self.if_statement, + Tag.WHILE: self.while_statement, + Tag.IDENT: self.assignment, + Tag.RETURN: self.return_statement + }[self.token.tag]() - # if_statement = "if" expression statements [ "else" statements ] "end". + # if_statement = "if" boolean nl statement_list [ "else" [ nl ] statement_list ] "end" nl. def if_statement(self): - return - - # while_statement = "while" expression statements "end". + self.match(Tag.IF) + condition = self.boolean() + self.match(Tag.NEWLINE) + true_case = self.statement_list() + else_case = [] + if self.token.tag == Tag.ELSE: + self.move() + if self.token.tag == Tag.NEWLINE: self.move() + else_case = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return IfStatement(condition, true_case, else_case) + + # while_statement = "while" boolean nl statement_list "end" nl. def while_statement(self): - return + self.match(Tag.WHILE) + condition = self.boolean() + self.match(Tag.NEWLINE) + body = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return WhileStatement(condition, body) + + # return_statement = "@" boolean nl. + def return_statement(self): + self.match(Tag.RETURN) + exp = self.boolean() + self.match(Tag.NEWLINE) + return ReturnStatement(exp) - # assignment = ident "=" expression. + # assignment = ident "=" boolean nl. def assignment(self): - return + name = self.match(Tag.IDENT) + self.match(Tag.ASSIGNMENT) + exp = self.boolean() + self.match(Tag.NEWLINE) + return AssignStatement(name, exp) - # function_call = ident "[" [ expression_list ] "]". - def function_call(self): - return + # program = function_list. + def program(self): + return Program(self.function_list()) - # return_statement = "@" expression. - def return_statement(self): - return diff --git a/src/front/token.py b/src/front/token.py index e1bc8d8..bfcc082 100644 --- a/src/front/token.py +++ b/src/front/token.py @@ -30,6 +30,8 @@ Tag.FUN = Tag("FUN") Tag.ASSIGNMENT = Tag("ASSIGNMENT") Tag.RETURN = Tag("RETURN") Tag.OPERATOR = Tag("OPERATOR") +Tag.TRUE = Tag("TRUE") +Tag.FALSE = Tag("FALSE") class Token(object): __slots__ = ["tag","value"] -- cgit v1.2.3