forked from alt-romes/programmer-calculator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.c
438 lines (285 loc) · 12.3 KB
/
parser.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.h"
#include "xmalloc.h"
// Static functions
static exprtree parse_expr(parser_t);
static exprtree parse_or_expr(parser_t);
static exprtree parse_xor_expr(parser_t);
static exprtree parse_and_expr(parser_t);
static exprtree parse_shift_expr(parser_t);
static exprtree parse_add_expr(parser_t);
static exprtree parse_mult_expr(parser_t);
static exprtree parse_prefix_expr(parser_t);
static exprtree parse_atom_expr(parser_t);
static exprtree parse_number(parser_t);
static exprtree parse_stdop_expr(parser_t, char*, exprtree (*) (parser_t));
static exprtree create_exprtree(int, void*, exprtree, exprtree);
int total_trees_created = 0;
int total_trees_freed = 0;
int total_parsers_created = 0;
int total_parsers_freed = 0;
int total_tokens_created = 0;
int total_tokens_freed = 0;
// For a simpler version of this parser check github.com/alt-romes/calculator-c-parser
char* tokenize(char* in) {
char* tokens = xmalloc(sizeof(char) * MAX_TOKENS);
int in_len = strlen(in);
int token_pos = 0;
for (int i = 0; i < in_len; i++)
if (strchr(VALID_TOKENS, in[i]))
tokens[token_pos++] = in[i];
tokens[token_pos] = '\0';
total_tokens_created++;
return tokens;
}
exprtree parse(char* tokens) {
// TODO: How to stop with errors?
// attention: allocate size for *struct parser_t*, because *parser_t* is type defined as a pointer to *struct parser_t*
parser_t parser = xmalloc(sizeof(struct parser_t));
total_parsers_created++;
assert(tokens != NULL);
parser->tokens = tokens;
int ntokens = strlen(tokens);
assert(ntokens > 0);
parser->ntokens = ntokens;
parser->pos = 0;
exprtree expression = parse_expr(parser);
free(parser->tokens);
free(parser);
total_parsers_freed++;
total_tokens_freed++;
return expression;
}
long long calculate(exprtree expr) {
// expr shouldn't be null if being calculated.
assert(expr != NULL);
// all expressions have 2 operands because 1 operand operators are taken care of immediately
if (expr->type == OP_TYPE) {
long long left_value = calculate(expr->left);
long long right_value = calculate(expr->right);
// Execute takes the operands switched because the stack inverts the order of the numbers
long long value = expr->op->execute(right_value, left_value);
return value & globalmask;
}
else {
// Expression is a leaf (is a number) - so return the number directly
return *(expr->value) & globalmask;
}
}
void free_exprtree(exprtree expr) {
if (expr) {
if (expr->left)
free_exprtree(expr->left);
if (expr->right)
free_exprtree(expr->right);
if (expr->type != OP_TYPE)
free(expr->value);
free(expr);
total_trees_freed++;
}
}
static exprtree parse_expr(parser_t parser) {
// Grammar rule: expression := or_exp
return parse_or_expr(parser);
}
static exprtree parse_or_expr(parser_t parser) {
// Grammar rule: or_exp := xor_exp ( (| | $) xor_exp )*
char ops[] = {OR_SYMBOL, NOR_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_xor_expr);
}
static exprtree parse_xor_expr(parser_t parser) {
// Grammar rule: xor_exp := and_exp (^ and_exp)*
char ops[] = {XOR_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_and_expr);
}
static exprtree parse_and_expr(parser_t parser) {
// Grammar rule: and_exp := shift_exp (& shift_exp)*
char ops[] = {AND_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_shift_expr);
}
static exprtree parse_shift_expr(parser_t parser) {
// Grammar rule: shift_exp := add_exp ((<< | >> | ror | rol) add_exp)*
char ops[] = {SHRA_SYMBOL, SHR_SYMBOL, SHL_SYMBOL, ROR_SYMBOL, ROL_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_add_expr);
}
static exprtree parse_add_expr(parser_t parser) {
// Grammar rule: add_exp := mult_exp ((+ | -) mult_exp)*
char ops[] = {ADD_SYMBOL, SUB_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_mult_expr);
}
static exprtree parse_mult_expr(parser_t parser) {
// Grammar rule: mult_exp := not_exp ((* | / | %) not_exp)*
char ops[] = {POW_SYMBOL, MUL_SYMBOL, DIV_SYMBOL, MOD_SYMBOL, '\0'};
return parse_stdop_expr(parser, ops, parse_prefix_expr);
}
static exprtree parse_prefix_expr(parser_t parser) {
// Grammar rule: prefix_exp := (~ | + | -)? atom_exp
// TODO: Display input invalid instead of using a zero-val expression
if (!(parser->pos < parser->ntokens)) {
long long zerov = 0;
return create_exprtree(DEC_TYPE, &zerov, NULL, NULL);
}
char prefixes[] = {ADD_SYMBOL, SUB_SYMBOL, NOT_SYMBOL, TWOSCOMPLEMENT_SYMBOL, '\0'};
// If we've exceeded the number of tokens we should detect an error
assert(parser->pos < parser->ntokens);
char prefix = 0;
if (strchr(prefixes, parser->tokens[parser->pos])) {
// Find + or - before the number (to make numbers positive or negative)
// When the symbol found is +, there's no need to do anything
prefix = parser->tokens[parser->pos];
parser->pos++; // Consume token
}
exprtree expr = parse_atom_expr(parser);
if (prefix == 0 || prefix == ADD_SYMBOL) // Do nothing to expression
return expr;
else {
// Prefix is either SUB_SYMBOL, NOT_SYMBOL or TWOSCOMPLEMENT_SYMBOL
// We get the operation to use in the expression
operation* op = getopcode(prefix);
// SUB sets the symmetric of number with the expression (0 - expression), so we create a
// subtraction expression with 0 as the left tree
// Bitwise NOT of a number - only one parameter is used:
// And because the order is changed in execute(), put a number in the right expr instead of the left one
// apply the NOT operation to the number on the right branch. The other branch doesn't matter so we set it as the same as SUB
// Two's complement serves the same logic - since it only uses one parameter we can set the other as anything
// So we create an expression with 0 on the left, and the correct op, and it works
long long zero_val = 0;
exprtree zero_val_expr = create_exprtree(DEC_TYPE, &zero_val, NULL, NULL);
return create_exprtree(OP_TYPE, op, zero_val_expr, expr);
}
}
static exprtree parse_atom_expr(parser_t parser) {
// Grammar rule: atom_expr := number | left_parenthesis expression right_parenthesis
// TODO: An error should be displayed here instead of return a zero value expression.
// This assertion fails if the only token is a prefix, the prefix is read, and this function is called without enough tokens
// to be parsed. There are possibly more cases
if (!(parser->pos < parser->ntokens)) {
long long zerov = 0;
return create_exprtree(DEC_TYPE, &zerov, NULL, NULL);
}
// If we've exceeded the number of tokens we should detect an error
assert(parser->pos < parser->ntokens);
exprtree expr;
if (parser->tokens[parser->pos] == LPAR_SYMBOL) {
// If the atomic expression starts with parenthesis
parser->pos++; // Consume left parenthesis
expr = parse_expr(parser);
// If we've exceeded the number of tokens we should detect an error
// This assertion is triggered if an expression without right parenthesis is the input
// It should be handled as an error below
/* assert(parser->pos < parser->ntokens); */
if (parser->tokens[parser->pos] == RPAR_SYMBOL)
parser->pos++; // Consume right parenthesis
else {
// For now, everything to the right of an unclosed left parenthesis will be equivalent to 0
long long zerov = 0;
return create_exprtree(DEC_TYPE, &zerov, NULL, NULL);
// TODO: Find a way to do error handling and displaying, possibly give one more type to exprtree type = ERR_TYPE and have in the union a char* for the error message
/* fprintf(stderr, "Invalid expression!!!\n"); */
}
}
else {
// If it doesn't start with parenthesis then it's a normal number
expr = parse_number(parser);
}
return expr;
}
static exprtree parse_number(parser_t parser) {
// Grammar rule: number: ( (0-9)+ | 0x(0-9a-f)+ | 0b(0-1)+ )
// If we've exceeded the number of tokens we should detect an error
assert(parser->pos < parser->ntokens);
int numbertype = DEC_TYPE;
if (parser->pos+1 < parser->ntokens) {
switch (parser->tokens[parser->pos]) {
case '0':
if (parser->tokens[parser->pos+1] == 'b') {
// Enter if number is binary
numbertype = BIN_TYPE;
parser->pos += 2;
break;
} else if (parser->tokens[parser->pos+1] == 'x'){
// Enter is number is hex
goto hex;
} else {
// Enter if number is decimal
break;
}
case 'x':
hex:
numbertype = HEX_TYPE;
// If x was 1st character add 1 otherwise add 2 to parser->pos
parser->pos += (parser->tokens[parser->pos] == 'x') ? 1 : 2;
}
}
char numberfound[MAX_TOKENS + 1];
int numberlen = 0;
while ( parser->pos < parser->ntokens &&
((numbertype == DEC_TYPE && strchr(VALID_DEC_SYMBOLS, parser->tokens[parser->pos]))
|| (numbertype == HEX_TYPE && strchr(VALID_HEX_SYMBOLS, parser->tokens[parser->pos]))
|| (numbertype == BIN_TYPE && strchr(VALID_BIN_SYMBOLS, parser->tokens[parser->pos]))) ) {
numberfound[numberlen++] = parser->tokens[parser->pos];
parser->pos++; // Consume 1 digit (1 token)
}
numberfound[numberlen] = '\0';
// If no number was found, return for now a zero value expression
//
// TODO: return the error expression instead
// this happens when the input is valid tokens like abc, but then the number doesn't start with 0x,
// and possibly in other situations
if (numberlen == 0) {
long long zerov = 0;
return create_exprtree(DEC_TYPE, &zerov, NULL, NULL);
}
// Else, create the expression from the found number
int numberbase = 0;
switch (numbertype) {
case DEC_TYPE:
numberbase = 10;
break;
case HEX_TYPE:
numberbase = 16;
break;
case BIN_TYPE:
numberbase = 2;
break;
}
long long value = strtoll(numberfound, NULL, numberbase);
exprtree number_expr = create_exprtree(numbertype, &value, NULL, NULL);
return number_expr;
}
static exprtree parse_stdop_expr(parser_t parser, char* ops, exprtree (*parse_inner_expr) (parser_t)) {
// TODO: We don't want to do this - when the input is badly formatted an error should be displayed.
// This is a temporary fix that returns the expression immediately as zero.
if (!(parser->pos < parser->ntokens)) {
long long zerov = 0;
return create_exprtree(DEC_TYPE, &zerov, NULL, NULL);
}
// When the position gets here it should be smaller than the ntokens, or maybe only inner_expr should worry about it?
assert(parser->pos < parser->ntokens);
exprtree expr = parse_inner_expr(parser);
while (parser->pos < parser->ntokens && strchr(ops, parser->tokens[parser->pos])) {
operation* op = getopcode(parser->tokens[parser->pos]);
parser->pos++; // Consume token
exprtree right_expr = parse_inner_expr(parser);
expr = create_exprtree(OP_TYPE, op, expr, right_expr);
}
return expr;
}
static exprtree create_exprtree(int type, void* content, exprtree left, exprtree right) {
// attention: allocate size for *struct exprtree*, because *exprtree* is type defined as a pointer to *struct exprtree*
exprtree expr = xmalloc(sizeof(struct exprtree));
expr->type = type;
if (type == OP_TYPE)
expr->op = getopcode(*((char*) content));
else {
expr->value = xmalloc(sizeof(long long));
*(expr->value) = *((long long*) content);
}
expr->left = left;
expr->right = right;
total_trees_created++;
return expr;
}