forked from mozilla/gecko-dev
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Expression.py
240 lines (216 loc) · 6.68 KB
/
Expression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Parses and evaluates simple statements for Preprocessor:
Expression currently supports the following grammar, whitespace is ignored:
expression :
and_cond ( '||' expression ) ? ;
and_cond:
test ( '&&' and_cond ) ? ;
test:
unary ( ( '==' | '!=' ) unary ) ? ;
unary :
'!'? value ;
value :
[0-9]+ # integer
| 'defined(' \w+ ')'
| \w+ # string identifier or value;
"""
import re
class Expression:
def __init__(self, expression_string):
"""
Create a new expression with this string.
The expression will already be parsed into an Abstract Syntax Tree.
"""
self.content = expression_string
self.offset = 0
self.__ignore_whitespace()
self.e = self.__get_logical_or()
if self.content:
raise Expression.ParseError, self
def __get_logical_or(self):
"""
Production: and_cond ( '||' expression ) ?
"""
if not len(self.content):
return None
rv = Expression.__AST("logical_op")
# test
rv.append(self.__get_logical_and())
self.__ignore_whitespace()
if self.content[:2] != '||':
# no logical op needed, short cut to our prime element
return rv[0]
# append operator
rv.append(Expression.__ASTLeaf('op', self.content[:2]))
self.__strip(2)
self.__ignore_whitespace()
rv.append(self.__get_logical_or())
self.__ignore_whitespace()
return rv
def __get_logical_and(self):
"""
Production: test ( '&&' and_cond ) ?
"""
if not len(self.content):
return None
rv = Expression.__AST("logical_op")
# test
rv.append(self.__get_equality())
self.__ignore_whitespace()
if self.content[:2] != '&&':
# no logical op needed, short cut to our prime element
return rv[0]
# append operator
rv.append(Expression.__ASTLeaf('op', self.content[:2]))
self.__strip(2)
self.__ignore_whitespace()
rv.append(self.__get_logical_and())
self.__ignore_whitespace()
return rv
def __get_equality(self):
"""
Production: unary ( ( '==' | '!=' ) unary ) ?
"""
if not len(self.content):
return None
rv = Expression.__AST("equality")
# unary
rv.append(self.__get_unary())
self.__ignore_whitespace()
if not re.match('[=!]=', self.content):
# no equality needed, short cut to our prime unary
return rv[0]
# append operator
rv.append(Expression.__ASTLeaf('op', self.content[:2]))
self.__strip(2)
self.__ignore_whitespace()
rv.append(self.__get_unary())
self.__ignore_whitespace()
return rv
def __get_unary(self):
"""
Production: '!'? value
"""
# eat whitespace right away, too
not_ws = re.match('!\s*', self.content)
if not not_ws:
return self.__get_value()
rv = Expression.__AST('not')
self.__strip(not_ws.end())
rv.append(self.__get_value())
self.__ignore_whitespace()
return rv
def __get_value(self):
"""
Production: ( [0-9]+ | 'defined(' \w+ ')' | \w+ )
Note that the order is important, and the expression is kind-of
ambiguous as \w includes 0-9. One could make it unambiguous by
removing 0-9 from the first char of a string literal.
"""
rv = None
m = re.match('defined\s*\(\s*(\w+)\s*\)', self.content)
if m:
word_len = m.end()
rv = Expression.__ASTLeaf('defined', m.group(1))
else:
word_len = re.match('[0-9]*', self.content).end()
if word_len:
value = int(self.content[:word_len])
rv = Expression.__ASTLeaf('int', value)
else:
word_len = re.match('\w*', self.content).end()
if word_len:
rv = Expression.__ASTLeaf('string', self.content[:word_len])
else:
raise Expression.ParseError, self
self.__strip(word_len)
self.__ignore_whitespace()
return rv
def __ignore_whitespace(self):
ws_len = re.match('\s*', self.content).end()
self.__strip(ws_len)
return
def __strip(self, length):
"""
Remove a given amount of chars from the input and update
the offset.
"""
self.content = self.content[length:]
self.offset += length
def evaluate(self, context):
"""
Evaluate the expression with the given context
"""
# Helper function to evaluate __get_equality results
def eval_equality(tok):
left = opmap[tok[0].type](tok[0])
right = opmap[tok[2].type](tok[2])
rv = left == right
if tok[1].value == '!=':
rv = not rv
return rv
# Helper function to evaluate __get_logical_and and __get_logical_or results
def eval_logical_op(tok):
left = opmap[tok[0].type](tok[0])
right = opmap[tok[2].type](tok[2])
if tok[1].value == '&&':
return left and right
elif tok[1].value == '||':
return left or right
raise Expression.ParseError, self
# Mapping from token types to evaluator functions
# Apart from (non-)equality, all these can be simple lambda forms.
opmap = {
'logical_op': eval_logical_op,
'equality': eval_equality,
'not': lambda tok: not opmap[tok[0].type](tok[0]),
'string': lambda tok: context[tok.value],
'defined': lambda tok: tok.value in context,
'int': lambda tok: tok.value}
return opmap[self.e.type](self.e);
class __AST(list):
"""
Internal class implementing Abstract Syntax Tree nodes
"""
def __init__(self, type):
self.type = type
super(self.__class__, self).__init__(self)
class __ASTLeaf:
"""
Internal class implementing Abstract Syntax Tree leafs
"""
def __init__(self, type, value):
self.value = value
self.type = type
def __str__(self):
return self.value.__str__()
def __repr__(self):
return self.value.__repr__()
class ParseError(StandardError):
"""
Error raised when parsing fails.
It has two members, offset and content, which give the offset of the
error and the offending content.
"""
def __init__(self, expression):
self.offset = expression.offset
self.content = expression.content[:3]
def __str__(self):
return 'Unexpected content at offset %i, "%s"'%(self.offset, self.content)
class Context(dict):
"""
This class holds variable values by subclassing dict, and while it
truthfully reports True and False on
name in context
it returns the variable name itself on
context["name"]
to reflect the ambiguity between string literals and preprocessor
variables.
"""
def __getitem__(self, key):
if key in self:
return super(self.__class__, self).__getitem__(key)
return key