-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecursive_descent_parser.py
102 lines (90 loc) · 2.66 KB
/
recursive_descent_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Taken from Python Cookbook. David Baezley
# also see https://bozmen.io/recursive-descent-parser
import re
import collections
# Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'
master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES, DIVIDE, LPAREN, RPAREN, WS]))
# Tokenizer
Token = collections.namedtuple('Token', ['type','value'])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup, m.group())
if tok.type != 'WS':
yield tok
'''
expr ::= term { (+|-) term }*
term ::= factor { (*|/) factor }*
factor ::= ( expr )
| NUM
'''
# Parser
class ExpressionEvaluator:
'''
Implementation of a recursive descent parser. Each method
implements a single grammar rule. Use the ._accept() method
to test and accept the current lookahead token. Use the ._expect()
method to exactly match and discard the next token on on the input
(or raise a SyntaxError if it doesn't match).
'''
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None # Last symbol consumed
self.nexttok = None # Next symbol tokenized
self._advance() # Load first lookahead token
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok, self.nexttok = self.nexttok, next(self.tokens, None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
# Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
exprval += right
elif op == 'MINUS':
exprval -= right
return exprval
def term(self):
"term ::= factor { ('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
termval *= right
elif op == 'DIVIDE':
termval /= right
return termval
def factor(self):
"factor ::= NUM | ( expr )"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')