-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
238 lines (219 loc) · 11.5 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# NanoBASIC/parser.py
# From Computer Science from Scratch
# Copyright 2024 David Kopec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# DESCRIPTION
# The parser takes the tokens from the tokenizer and tries to convert them
# into structures that are meaningful for interpreting the program. The
# particular parsing technique being used in parser.py is recursive descent.
# In recursive descent, generally each non-terminal defined in the grammar becomes
# a function. That function is responsible for checking that the tokens it is
# analyzing follow a production rule specified in the grammar. In other words,
# generally one recursive descent function = one production rule from the grammar.
# The parser checks the tokens by looking at them sequentially. If the token that
# is being analyzed is expected to be a part of another production rule, the
# recursive descent parser just calls the function representing that other production rule.
# The recursive descent functions return respective nodes when they are successful.
from NanoBASIC.tokenizer import Token
from typing import cast
from NanoBASIC.nodes import *
from NanoBASIC.errors import ParserError
class Parser:
def __init__(self, tokens: list[Token]):
self.tokens = tokens
self.token_index: int = 0
@property
def out_of_tokens(self) -> bool:
return self.token_index >= len(self.tokens)
@property
def current(self) -> Token:
if self.out_of_tokens:
raise (ParserError(f"No tokens after "
f"{self.previous.kind}", self.previous))
return self.tokens[self.token_index]
@property
def previous(self) -> Token:
return self.tokens[self.token_index - 1]
def consume(self, kind: TokenType) -> Token:
if self.current.kind is kind:
self.token_index += 1
return self.previous
raise ParserError(f"Expected {kind} after {self.previous}"
f"but got {self.current}.", self.current)
def parse(self) -> list[Statement]:
statements: list[Statement] = []
while not self.out_of_tokens:
statement = self.parse_line()
statements.append(statement)
return statements
def parse_line(self) -> Statement:
number = self.consume(TokenType.NUMBER)
return self.parse_statement(cast(int, number.associated_value))
def parse_statement(self, line_id: int) -> Statement:
match self.current.kind:
case TokenType.PRINT:
return self.parse_print(line_id)
case TokenType.IF_T:
return self.parse_if(line_id)
case TokenType.LET:
return self.parse_let(line_id)
case TokenType.GOTO:
return self.parse_goto(line_id)
case TokenType.GOSUB:
return self.parse_gosub(line_id)
case TokenType.RETURN_T:
return self.parse_return(line_id)
raise ParserError("Expected to find start of statement.",
self.current)
# PRINT "COMMA",SEPARATED,7154
def parse_print(self, line_id: int) -> PrintStatement:
print_token = self.consume(TokenType.PRINT)
printables: list[str | NumericExpression] = []
last_col: int = print_token.col_end
while True: # Keep finding things to print
if self.current.kind is TokenType.STRING:
string = self.consume(TokenType.STRING)
printables.append(cast(str, string.associated_value))
last_col = string.col_end
elif (expression := self.parse_numeric_expression()) is not None:
printables.append(expression)
last_col = expression.col_end
else:
raise ParserError("Only strings and numeric expressions "
"allowed in print list.", self.current)
# Comma means there's more to print
if not self.out_of_tokens and self.current.kind is TokenType.COMMA:
self.consume(TokenType.COMMA)
continue
break
return PrintStatement(line_id=line_id, line_num=print_token.line_num,
col_start=print_token.col_start, col_end=last_col,
printables=printables)
# IF BOOLEAN_EXPRESSION THEN STATEMENT
def parse_if(self, line_id: int) -> IfStatement:
if_token = self.consume(TokenType.IF_T)
boolean_expression = self.parse_boolean_expression()
self.consume(TokenType.THEN)
statement = self.parse_statement(line_id)
return IfStatement(line_id=line_id, line_num=if_token.line_num,
col_start=if_token.col_start, col_end=statement.col_end,
boolean_expr=boolean_expression, then_statement=statement)
# LET VARIABLE = VALUE
def parse_let(self, line_id: int) -> LetStatement:
let_token = self.consume(TokenType.LET)
variable = self.consume(TokenType.VARIABLE)
self.consume(TokenType.EQUAL)
expression = self.parse_numeric_expression()
return LetStatement(line_id=line_id, line_num=let_token.line_num,
col_start=let_token.col_start, col_end=expression.col_end,
name=cast(str, variable.associated_value), expr=expression)
# GOTO NUMERIC_EXPRESSION
def parse_goto(self, line_id: int) -> GoToStatement:
goto_token = self.consume(TokenType.GOTO)
expression = self.parse_numeric_expression()
return GoToStatement(line_id=line_id, line_num=goto_token.line_num,
col_start=goto_token.col_start, col_end=expression.col_end,
line_expr=expression)
# GOSUB NUMERIC_EXPRESSION
def parse_gosub(self, line_id: int) -> GoSubStatement:
gosub_token = self.consume(TokenType.GOSUB)
expression = self.parse_numeric_expression()
return GoSubStatement(line_id=line_id, line_num=gosub_token.line_num,
col_start=gosub_token.col_start, col_end=expression.col_end,
line_expr=expression)
# RETURN
def parse_return(self, line_id: int) -> ReturnStatement:
return_token = self.consume(TokenType.RETURN_T)
return ReturnStatement(line_id=line_id, line_num=return_token.line_num,
col_start=return_token.col_start, col_end=return_token.col_end)
# NUMERIC_EXPRESSION BOOLEAN_OPERATOR NUMERIC_EXPRESSION
def parse_boolean_expression(self) -> BooleanExpression:
left = self.parse_numeric_expression()
if self.current.kind in {TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.EQUAL,
TokenType.LESS, TokenType.LESS_EQUAL, TokenType.NOT_EQUAL}:
operator = self.consume(self.current.kind)
right = self.parse_numeric_expression()
return BooleanExpression(line_num=left.line_num,
col_start=left.col_start, col_end=right.col_end,
operator=operator.kind, left_expr=left, right_expr=right)
raise ParserError(f"Expected boolean operator but found "
f"{self.current.kind}.", self.current)
def parse_numeric_expression(self) -> NumericExpression:
left = self.parse_term()
# Keep parsing +s and -s until there are no more
while True:
if self.out_of_tokens: # What if expression is end of file?
return left
if self.current.kind is TokenType.PLUS:
self.consume(TokenType.PLUS)
right = self.parse_term()
left = BinaryOperation(line_num=left.line_num, col_start=left.col_start,
col_end=right.col_end, operator=TokenType.PLUS,
left_expr=left, right_expr=right)
elif self.current.kind is TokenType.MINUS:
self.consume(TokenType.MINUS)
right = self.parse_term()
left = BinaryOperation(line_num=left.line_num, col_start=left.col_start,
col_end=right.col_end, operator=TokenType.MINUS,
left_expr=left, right_expr=right)
else:
break # No more, must be end of expression
return left
def parse_term(self) -> NumericExpression:
left = self.parse_factor()
# Keep parsing *s and /s until there are no more
while True:
if self.out_of_tokens: # What if expression is end of file?
return left
if self.current.kind is TokenType.MULTIPLY:
self.consume(TokenType.MULTIPLY)
right = self.parse_factor()
left = BinaryOperation(line_num=left.line_num, col_start=left.col_start,
col_end=right.col_end, operator=TokenType.MULTIPLY,
left_expr=left, right_expr=right)
elif self.current.kind is TokenType.DIVIDE:
self.consume(TokenType.DIVIDE)
right = self.parse_factor()
left = BinaryOperation(line_num=left.line_num, col_start=left.col_start,
col_end=right.col_end, operator=TokenType.DIVIDE,
left_expr=left, right_expr=right)
else:
break # No more, must be end of expression
return left
def parse_factor(self) -> NumericExpression:
if self.current.kind is TokenType.VARIABLE:
variable = self.consume(TokenType.VARIABLE)
return VarRetrieve(line_num=variable.line_num,
col_start=variable.col_start, col_end=variable.col_end,
name=cast(str, variable.associated_value))
elif self.current.kind is TokenType.NUMBER:
number = self.consume(TokenType.NUMBER)
return NumberLiteral(line_num=number.line_num,
col_start=number.col_start, col_end=number.col_end,
number=int(cast(str, number.associated_value)))
elif self.current.kind is TokenType.OPEN_PAREN:
self.consume(TokenType.OPEN_PAREN)
expression = self.parse_numeric_expression()
if self.current.kind is not TokenType.CLOSE_PAREN:
raise ParserError("Expected matching close parenthesis.", self.current)
self.consume(TokenType.CLOSE_PAREN)
return expression
elif self.current.kind is TokenType.MINUS:
minus = self.consume(TokenType.MINUS)
expression = self.parse_factor()
return UnaryOperation(line_num=minus.line_num,
col_start=minus.col_start, col_end=expression.col_end,
operator=TokenType.MINUS, expr=expression)
raise ParserError("Unexpected token in numeric expression.", self.current)