From 614065172ad58893fa2eab25fdfe25d1aa21a483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benedikt=20B=C3=B6hm?= Date: Wed, 20 May 2009 14:26:05 +0200 Subject: remove symbol table from lexer --- src/front/TODO | 5 +++++ src/front/__init__.py | 2 +- src/front/lexer.py | 43 ++++++++++++++++++++----------------------- src/front/token.py | 23 ++++++++++++++--------- 4 files changed, 40 insertions(+), 33 deletions(-) create mode 100644 src/front/TODO (limited to 'src/front') diff --git a/src/front/TODO b/src/front/TODO new file mode 100644 index 0000000..ae28350 --- /dev/null +++ b/src/front/TODO @@ -0,0 +1,5 @@ +lexer: +====== +- convert to StringIO +- lexical errors (p. 139) + diff --git a/src/front/__init__.py b/src/front/__init__.py index 3f7e5f8..fdf35a8 100644 --- a/src/front/__init__.py +++ b/src/front/__init__.py @@ -20,7 +20,7 @@ fun main[] end''' symbols = SymbolTable() - lex = Lexer(source, symbols) + lex = Lexer(source) # testing while True: diff --git a/src/front/lexer.py b/src/front/lexer.py index fe798a9..5605fe3 100644 --- a/src/front/lexer.py +++ b/src/front/lexer.py @@ -3,10 +3,7 @@ import re from token import * class Lexer: - - def __init__(self, source, symbols): - self.symbols = symbols - + def __init__(self, source): self.source = source.splitlines() self.source.reverse() self.line = 0 @@ -14,18 +11,18 @@ class Lexer: self.currentLine = '' # reservierte Wörter initialisieren - self.reservedWords = {'True': Token(Tag.TRUE), - 'False': Token(Tag.FALSE), - '[': Token(Tag.LBRAK), - ']': Token(Tag.RBRAK), - '(': Token(Tag.LPAREN), - ')': Token(Tag.RPAREN), - ',': Token(Tag.COMMA), - 'while': Token(Tag.WHILE), - 'if': Token(Tag.IF), - 'else': Token(Tag.ELSE), - 'fun': Token(Tag.FUN), - 'end': Token(Tag.END)} + self.reservedWords = {'True': LeafToken(Tag.TRUE), + 'False': LeafToken(Tag.FALSE), + '[': LeafToken(Tag.LBRAK), + ']': LeafToken(Tag.RBRAK), + '(': LeafToken(Tag.LPAREN), + ')': LeafToken(Tag.RPAREN), + ',': LeafToken(Tag.COMMA), + 'while': LeafToken(Tag.WHILE), + 'if': LeafToken(Tag.IF), + 'else': LeafToken(Tag.ELSE), + 'fun': LeafToken(Tag.FUN), + 'end': LeafToken(Tag.END)} return def reserve(self, word, token): @@ -46,11 +43,11 @@ class Lexer: # newline zurückgeben if self.doubleNewlineCheck: self.doubleNewlineCheck = False - return Token(Tag.NEWLINE) + return LeafToken(Tag.NEWLINE) # leerzeichen entfernen self.currentLine = self.currentLine.strip() - + # bei Kommentar, Rest der Zeile ignorieren if self.currentLine.startswith('#'): self.currentLine = '' @@ -62,7 +59,7 @@ class Lexer: # Token parsen if self.currentLine.startswith('@'): self.currentLine = self.currentLine[1:] - return Token(Tag.RETURN) + return LeafToken(Tag.RETURN) # reservierte Wörter (da stehen auch schon erkannte Identifyer drine) for reservedWord, token in self.reservedWords.iteritems(): @@ -77,26 +74,26 @@ class Lexer: match = re.match(r"^([0-9]+)", self.currentLine) if match: self.currentLine = self.currentLine[match.end(0):] - return ValueToken(Tag.NUMBER, int(match.group(0))) + return LeafToken(Tag.NUMBER, int(match.group(0))) # operatoren matchen match = re.match(r"^(<=|==|>=|&&|\|\||<|>|\+|-|\*|/)", self.currentLine) if match: self.currentLine = self.currentLine[match.end(0):] - return ValueToken(Tag.OPERATOR, match.group(0)) + return LeafToken(Tag.OPERATOR, match.group(0)) # idents matchen match = re.match(r"^([a-zA-Z][a-zA-Z0-9]*)", self.currentLine) if match: self.currentLine = self.currentLine[match.end(0):] - token = ValueToken(Tag.IDENT, self.symbols.getOrPut(match.group(0))) + token = LeafToken(Tag.IDENT, match.group(0)) self.reserve(match.group(0), token) return token # assignments if self.currentLine.startswith('='): self.currentLine = self.currentLine[1:] - return Token(Tag.ASSIGNMENT) + return LeafToken(Tag.ASSIGNMENT) # wenn die programmausführung hier ist, # ist ein syntaxfehler aufgetreten diff --git a/src/front/token.py b/src/front/token.py index 4cf646a..31159bf 100644 --- a/src/front/token.py +++ b/src/front/token.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- # date: 20 Mai 2009 -class Tag: +class Tag(object): + __slots__ = ["name"] + def __init__(self, name): self.name = name @@ -52,23 +54,26 @@ Tag.RETURN = Tag("RETURN") Tag.ASSIGN = Tag("ASSIGN") Tag.FUNCTION = Tag("FUNCTION") -class Token: - tag = None +class Token(object): + __slots__ = ["tag"] def __init__(self, tag): self.tag = tag - return def __repr__(self): return "" % self.tag -class ValueToken(Token): - value = None +class LeafToken(Token): + __slots__ = ["value"] - def __init__(self, tag, value): + def __init__(self, tag, value = None): Token.__init__(self, tag) self.value = value - return def __repr__(self): - return "" % (self.tag, self.value.__str__()) + if self.value: + return "" % (self.tag, self.value.__str__()) + return "" % self.tag + +class NodeToken(Token): + pass -- cgit v1.2.3