summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Sulfrian <alexander@sulfrian.net>2009-06-23 03:21:32 +0200
committerAlexander Sulfrian <alexander@sulfrian.net>2009-06-23 03:23:55 +0200
commit41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5 (patch)
treee20415ce452f7c84befcd98d2b349ad65876087b
parent5dd84e8e76132ad2e1a58de28ec94ca7ee1ab969 (diff)
downloadswppy-41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5.tar.gz
swppy-41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5.tar.xz
swppy-41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5.zip
added parser
- commited parser (Michael Popov) - changed encoding of parser.py to utf8
-rw-r--r--src/front/__init__.py26
-rw-r--r--src/front/ast.py21
-rw-r--r--src/front/parser.py252
-rw-r--r--src/front/token.py2
4 files changed, 232 insertions, 69 deletions
diff --git a/src/front/__init__.py b/src/front/__init__.py
index fdf35a8..747efde 100644
--- a/src/front/__init__.py
+++ b/src/front/__init__.py
@@ -1,11 +1,12 @@
from front.lexer import Lexer
from front.parser import Parser
-from front.symbols import SymbolTable
+#from front.symbols import SymbolTable
def main():
source = '''fun fib[a]
if a < 2
@1
+ end
@( fib[a-1] + fib[a-2] )
end
@@ -14,23 +15,24 @@ fun main[]
sum = 0
i = 0
while (i < 10)
- sum = sum + fib[i = i + 1]
+ sum = sum + fib[i]
+ i = i + 1
end
@sum
end'''
- symbols = SymbolTable()
- lex = Lexer(source)
+ #symbols = SymbolTable()
+ #lex = Lexer(source)
# testing
- while True:
- token = lex.scan()
- print token.__repr__()
- if not token:
- break
-
- # parse = Parser(lex)
- # parse.program()
+ #while True:
+ # token = lex.scan()
+ # print token.__repr__()
+ # if not token:
+ # break
+
+ parse = Parser(Lexer(source))
+ print parse.parse()
if __name__ == "__main__":
main()
diff --git a/src/front/ast.py b/src/front/ast.py
index 5b4def7..751d38d 100644
--- a/src/front/ast.py
+++ b/src/front/ast.py
@@ -14,6 +14,9 @@ class Program(Node):
return 0
+ def __repr__(self):
+ return "<Program: %s>" % self.functions
+
class Function(Node):
def __init__(self, name, params, statements, lineno = -1):
self.name = name
@@ -22,7 +25,7 @@ class Function(Node):
self.lineno = lineno
def __repr__(self):
- return "<Function: %s at line %d>" % (self.name, self.lineno)
+ return "<Function: %s %s %s>" % (self.name, str(self.params), str(self.statements))
def eval(self,tacarray):
return tacarray.append(te)
@@ -185,3 +188,19 @@ class BinaryExpression(Expression):
te = TacElem(op,left,right)
return tacarray.append(te)
+
+class Variable(Expression):
+ def __init__(self, name, lineno = -1):
+ self.name = name
+ self.lineno = lineno
+
+ def __repr__(self):
+ return "<Variable: %s at line %d>" %(self.name, self.lineno)
+
+class Constant(Expression):
+ def __init__(self, value, lineno = -1):
+ self.value = value
+ self.lineno = lineno
+
+ def __repr__(self):
+ return "<Variable: %d at line %d>" %(self.value, self.lineno)
diff --git a/src/front/parser.py b/src/front/parser.py
index 43606a2..9e738d3 100644
--- a/src/front/parser.py
+++ b/src/front/parser.py
@@ -1,93 +1,233 @@
-class Parser:
- lexer = None
+# -*- coding: utf-8 -*-
+from ast import *
+from lexer import *
+from token import *
- def __init__(self, lexer):
+class Parser:
+ def __init__(self, lex):
+ self.lexer = lex
+ self.token = lex.scan()
+ self.count = 1 #debug
return
def parse(self):
return self.program()
def move(self):
+ print self.count, self.token #debug
+ self.count += 1 #debug
+ self.token = self.lexer.scan()
return
def error(self, msg):
- return
-
- def match(tag):
- return
-
- # bool = bool "||" join | join.
- def bool(self):
- return
-
- # join = join "&&" equality | equality.
+ raise Exception(msg)
+
+ def match(self, tag):
+ if self.token.tag != tag:
+ self.error("match: expected %s got %s\n" %(tag, self.token.tag))
+ val = self.token.value
+ self.move()
+ return val
+
+ # boolean = join { "||" join }.
+ def boolean(self):
+ res = self.join()
+ while self.token.tag == Tag.OPERATOR and self.token.value == "||":
+ self.move()
+ res = BinaryExpression(res, "||", self.join())
+ return res
+
+ # join = equality { "&&" equality }.
def join(self):
- return
+ res = self.equality()
+ while self.token.tag == Tag.OPERATOR and self.token.value == "&&":
+ self.move()
+ res = BinaryExpression(res, "&&", self.relation())
+ return res
- # equality = equality "==" relation | equality "!=" relation | relation.
+ # equality = relation { ( "==" | "!=" ) relation }.
def equality(self):
- return
+ res = self.relation()
+ while self.token.tag == Tag.OPERATOR and self.token.value in ["==", "!="]:
+ op = self.match(Tag.OPERATOR)
+ res = BinaryExpression(res, op, self.relation())
+ return res
- # relation = expression "<" expression | expression "<=" expression | expression ">=" expression | expression ">" expression | expression.
+ # relation = expression { ( "<" | "<=" | ">=" | ">" ) expression }.
def relation(self):
- return
+ res = self.expression()
+ while self.token.tag == Tag.OPERATOR and self.token.value in ["<", "<=", ">=", ">"]:
+ op = self.match(Tag.OPERATOR)
+ res = BinaryExpression(res, op, self.expression())
+ return res
- # expression = expression "+" term | expression "-" term | term.
+ # expression = term { ( "+" | "-" ) term }.
def expression(self):
- return
+ res = self.term()
+ while self.token.tag == Tag.OPERATOR and self.token.value in ["+", "-"]:
+ op = self.match(Tag.OPERATOR)
+ res = BinaryExpression(res, op, self.term())
+ return res
- # term = term "*" unary | term "/" unary | unary.
+ # term = unary { ( "*" | "/" | "%" ) unary }.
def term(self):
- return
+ res = self.unary()
+ while self.token.tag == Tag.OPERATOR and self.token.value in ["*", "/", "%"]:
+ op = self.match(Tag.OPERATOR)
+ res = BinaryExpression(res, op, self.unary())
+ return res
# unary = "!" unary | "-" unary | factor.
def unary(self):
- return
-
- # factor = "(" bool ")" | ident | integer | "true" | "false".
+ if self.token.tag == Tag.OPERATOR and self.token.value == "!":
+ self.move()
+ return UnaryExpression("!", self.unary())
+ if self.token.tag == Tag.OPERATOR and self.token.value == "-":
+ self.move()
+ return UnaryExpression("-", self.unary())
+ return self.factor()
+
+ # factor = "(" boolean ")" | integer | "true" | "false" | ident | function_call.
def factor(self):
- return
+ if self.token.tag == Tag.LPAREN:
+ self.move()
+ res = self.boolean()
+ self.match(Tag.RPAREN)
+ return res
+ if self.token.tag == Tag.NUMBER:
+ return Constant(self.match(Tag.NUMBER))
+ if self.token.tag == Tag.TRUE:
+ self.move()
+ return Constant(1)
+ if self.token.tag == Tag.FALSE:
+ self.move()
+ return Constant(0)
+ ''' Wegen der Änderung in function_call() kann die Grammatik nicht
+ mehr mit einem Lookahead von 1 geparst werden. Die Lösung hier
+ ist, den Aufruf von function_call() zu "inlinen". Muss irgendwann
+ aber geändert werden. Eine Lösung wäre Funktionen mit call
+ aufzurufen oder eben auf LL(2) oder LR umzusteigen.
+ '''
+ if self.token.tag == Tag.IDENT:
+ name = self.match(Tag.IDENT)
+ if self.token.tag == Tag.LBRAK: # function_call
+ self.match(Tag.LBRAK)
+ args = [] if self.token.tag == Tag.RBRAK else self.expression_list()
+ self.match(Tag.RBRAK)
+ return FunctionCall(name, args)
+ return Variable(name)# variable
+
+ error("unknown factor begins with " + self.token.tag)
+
+ # function_call = ident "[" [ expression_list ] "]".
+ ''' function_call muss eine expression sein sonst geht alles folgende nicht:
+ foo = bar[]
+ @foo[bar]
+ spam[5] + eggs[1]
+ andererseits ist function_call dann kein statement mehr
+ exit[]
+ wäre also falsch. Der Aufruf muss in einem assignment verkapselt werden:
+ dummy = exit[]
+ '''
+ def function_call(self):
+ name = self.match(Tag.IDENT)
+ self.match(Tag.LBRAK)
+ args = [] if self.token.tag == Tag.RBRAK else self.expression_list()
+ self.match(Tag.RBRAK)
+ return FunctionCall(name, args)
# ident_list = ident { "," ident }.
def ident_list(self):
- return
+ ident = [self.match(Tag.IDENT)]
+ while self.token.tag == Tag.COMMA:
+ self.move()
+ ident.append(self.match(Tag.IDENT))
+ return ident
- # expression_list = expression { "," expression }.
+ # expression_list = boolean { "," boolean }.
def expression_list(self):
- return
-
- # program = function { function }.
- def program(self):
- return
-
- # function = "fun" ident "[" [ ident_list ] "]" statements "end".
+ exp = [self.boolean()]
+ while self.token.tag == Tag.COMMA:
+ self.move()
+ exp.append(self.boolean())
+ return exp
+
+ # function_list = function { function }.
+ def function_list(self):
+ funcs = [self.function()]
+ while self.token != None:
+ funcs.append(self.function())
+ return funcs
+
+ # statement_list = statement { statement }.
+ def statement_list(self):
+ stat = [self.statement()]
+ while self.token.tag not in [Tag.END, Tag.ELSE]:
+ stat.append(self.statement())
+ return stat
+
+ # function = "fun" ident "[" [ ident_list ] "]" nl statement_list "end" nl.
def function(self):
- return
-
- # statements = statement { nl statement }.
- def statements(self):
- return
-
- # statement = [ if_statement | while_statement | assignment | function_call | return_statement ].
+ self.match(Tag.FUN)
+ name = self.match(Tag.IDENT)
+ self.match(Tag.LBRAK)
+ args = [] if self.token.tag == Tag.RBRAK else self.ident_list()
+ self.match(Tag.RBRAK)
+ self.match(Tag.NEWLINE)
+ block = self.statement_list()
+ self.match(Tag.END)
+ self.match(Tag.NEWLINE)
+ return Function(name, args, block)
+
+ # statement = [ if_statement | while_statement | assign_statement | return_statement ].
def statement(self):
- return
+ return {Tag.IF: self.if_statement,
+ Tag.WHILE: self.while_statement,
+ Tag.IDENT: self.assignment,
+ Tag.RETURN: self.return_statement
+ }[self.token.tag]()
- # if_statement = "if" expression statements [ "else" statements ] "end".
+ # if_statement = "if" boolean nl statement_list [ "else" [ nl ] statement_list ] "end" nl.
def if_statement(self):
- return
-
- # while_statement = "while" expression statements "end".
+ self.match(Tag.IF)
+ condition = self.boolean()
+ self.match(Tag.NEWLINE)
+ true_case = self.statement_list()
+ else_case = []
+ if self.token.tag == Tag.ELSE:
+ self.move()
+ if self.token.tag == Tag.NEWLINE: self.move()
+ else_case = self.statement_list()
+ self.match(Tag.END)
+ self.match(Tag.NEWLINE)
+ return IfStatement(condition, true_case, else_case)
+
+ # while_statement = "while" boolean nl statement_list "end" nl.
def while_statement(self):
- return
+ self.match(Tag.WHILE)
+ condition = self.boolean()
+ self.match(Tag.NEWLINE)
+ body = self.statement_list()
+ self.match(Tag.END)
+ self.match(Tag.NEWLINE)
+ return WhileStatement(condition, body)
+
+ # return_statement = "@" boolean nl.
+ def return_statement(self):
+ self.match(Tag.RETURN)
+ exp = self.boolean()
+ self.match(Tag.NEWLINE)
+ return ReturnStatement(exp)
- # assignment = ident "=" expression.
+ # assignment = ident "=" boolean nl.
def assignment(self):
- return
+ name = self.match(Tag.IDENT)
+ self.match(Tag.ASSIGNMENT)
+ exp = self.boolean()
+ self.match(Tag.NEWLINE)
+ return AssignStatement(name, exp)
- # function_call = ident "[" [ expression_list ] "]".
- def function_call(self):
- return
+ # program = function_list.
+ def program(self):
+ return Program(self.function_list())
- # return_statement = "@" expression.
- def return_statement(self):
- return
diff --git a/src/front/token.py b/src/front/token.py
index e1bc8d8..bfcc082 100644
--- a/src/front/token.py
+++ b/src/front/token.py
@@ -30,6 +30,8 @@ Tag.FUN = Tag("FUN")
Tag.ASSIGNMENT = Tag("ASSIGNMENT")
Tag.RETURN = Tag("RETURN")
Tag.OPERATOR = Tag("OPERATOR")
+Tag.TRUE = Tag("TRUE")
+Tag.FALSE = Tag("FALSE")
class Token(object):
__slots__ = ["tag","value"]