From b50aea83018bb8d4084e7f0438a665b5c4f4ec09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20N=C3=BC=C3=9Flein?= Date: Fri, 15 May 2009 19:12:44 +0200 Subject: added ebnf-stuff --- ebnf.py | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mylang.ebnf | 31 +++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 ebnf.py create mode 100644 mylang.ebnf diff --git a/ebnf.py b/ebnf.py new file mode 100644 index 0000000..1989118 --- /dev/null +++ b/ebnf.py @@ -0,0 +1,152 @@ +import string + +class EBNF: + IDENT = 0 + LITERAL = 2 + LPAREN = 3 + LBRAK = 4 + LBRACE = 5 + BAR = 6 + EQL = 7 + RPAREN = 8 + RBRAK = 9 + RBRACE = 10 + PERIOD = 11 + OTHER = 12 + + __input__ = None + __char__ = None + __symbol__ = None + __data__ = None + + def __init__(self, input): + self.__input__ = input + self.__get_char__() + + def __get_char__(self): + self.__char__ = self.__input__.read(1) + if self.__char__ == '': + raise EOFError + return self.__char__ + + def __get_sym__(self): + self.__data__ = None + + while self.__char__ in string.whitespace: + self.__get_char__() + + if self.__char__ in string.letters + "_": + self.__symbol__ = self.IDENT + self.__data__ = self.__char__ + while self.__get_char__() in string.letters + "_": + self.__data__ += self.__char__ + return + + elif self.__char__ == '"': + self.__symbol__ = self.LITERAL + self.__data__ = "" + while self.__get_char__() != '"': + self.__data__ += self.__char__ + + elif self.__char__ == '=': + self.__symbol__ = self.EQL + elif self.__char__ == '(': + self.__symbol__ = self.LPAREN + elif self.__char__ == ')': + self.__symbol__ = self.RPAREN + elif self.__char__ == '[': + self.__symbol__ = self.LBRAK + elif self.__char__ == ']': + self.__symbol__ = self.RBRAK + elif self.__char__ == '{': + self.__symbol__ = self.LBRACE + elif self.__char__ == '}': + self.__symbol__ = self.RBRACE + elif self.__char__ == '|': + self.__symbol__ = self.BAR + elif self.__char__ == '.': + self.__symbol__ = self.PERIOD + else: + self.__symbol__ = self.OTHER + + self.__get_char__() + + def __error__(self, num): + pos = self.__input__.tell() + if pos > self.__lastpos__ + 2: + print "ERROR: pos=%d, err=%d, sym=%d" % (pos, num, self.__symbol__) + self.__lastpos__ = pos + + def __expression__(self): + self.__term__() + while self.__symbol__ == self.BAR: + self.__get_sym__() + self.__term__() + + def __term__(self): + self.__factor__() + while self.__symbol__ < self.BAR: + self.__factor__() + + def __factor__(self): + if self.__symbol__ == self.IDENT: + print "record(TO, '%s', 1)" % (self.__data__,) + self.__get_sym__() + elif self.__symbol__ == self.LITERAL: + print "record(T1, '%s', 0)" % (self.__data__,) + self.__get_sym__() + elif self.__symbol__ == self.LPAREN: + self.__get_sym__() + self.__expression__() + if self.__symbol__ == self.RPAREN: + self.__get_sym__() + else: + self.__error__(2) + elif self.__symbol__ == self.LBRAK: + self.__get_sym__() + self.__expression__() + if self.__symbol__ == self.RBRAK: + self.__get_sym__() + else: + self.__error__(3) + elif self.__symbol__ == self.LBRACE: + self.__get_sym__() + self.__expression__() + if self.__symbol__ == self.RBRACE: + self.__get_sym__() + else: + self.__error__(4) + else: + self.__error__(5) + + def __production__(self): + self.__get_sym__() + + if self.__symbol__ == self.EQL: + self.__get_sym__() + else: + self.__error__(7) + + self.__expression__() + + if self.__symbol__ == self.PERIOD: + self.__get_sym__() + else: + self.__error__(8) + + def __syntax__(self): + while self.__symbol__ == self.IDENT: + self.__production__() + + def compile(self): + try: + self.__lastpos__ = 0 + self.__get_sym__() + self.__syntax__() + except EOFError: + return + +if __name__ == '__main__': + import sys + ebnf = EBNF(sys.stdin) + ebnf.compile() diff --git a/mylang.ebnf b/mylang.ebnf new file mode 100644 index 0000000..f71d877 --- /dev/null +++ b/mylang.ebnf @@ -0,0 +1,31 @@ +lowercaseLetter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z". +uppercaseLetter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z". +letter = lowercaseLetter | uppercaseLetter. +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9". +nl = "\n". + +ident = letter { letter | digit }. +integer = digit { digit }. + +factor = ident | integer | "(" expression ")" | "~" factor. +term = factor {("*" | "/" | "%" | "&") factor}. +simple_expression = ["+" | "-"] term {("+" | "-" | "|") term}. +expression = simple_expression [ ( "==" | "!=" | ">=" | "<=" | ">" | "<" ) simple_expression ]. +ident_list = ident { "," ident }. +expression_list = expression { "," expression }. + +program = function program. +function = "fun" ident "[" [ ident_list ] "]" statements "end". + +statement = [ if_statement | while_statement | assignment | function_call | return_statement ]. +statements = statement { nl statement }. + +if_statement = "if" expression statements [ "else" statements ] "end". +while_statement = "while" expression statements "end". +return_statement = "@" expression. +assignment = ident "=" expression. +function_call = ident "[" [ expression_list ] "]". + + + + -- cgit v1.2.3