-
Notifications
You must be signed in to change notification settings - Fork 2
/
ast-parse.py
executable file
·291 lines (241 loc) · 9.3 KB
/
ast-parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/env python3
import os
import json
import sys
ast_json = None
with open('build/slurped-ast.json') as f:
ast_json = json.load(f)
main_node_json = None
for node in ast_json:
if "mangledName" in node and node["mangledName"] == "_main":
main_node_json = node
# collect nodes ahead of time
def collect_nodes_by_id(node, ids_to_nodes):
if "inner" in node:
for inner_node in node["inner"]:
collect_nodes_by_id(inner_node, ids_to_nodes)
ids_to_nodes[node["id"]] = node
class Node():
def __init__(self, node_json):
self.kind = node_json.get("kind")
self.id = node_json.get("id")
self.inner_json = node_json.get("inner")
self.loc = node_json.get("loc")
self.mangledName = node_json.get("mangledName")
self.type = node_json.get("type")
if self.type:
self.type = self.type.get("qualType")
self.receiverKind = node_json.get("receiverKind")
self.value = node_json.get("value")
self.name = node_json.get("name")
self.classType = node_json.get("classType")
if self.classType:
self.classType = self.classType.get("qualType")
self.selector = node_json.get("selector")
self.inner_nodes = []
def str_with_depth(self, depth = 0):
sys.exit('Invalid call to Superclass "Node"')
def __str__(self):
self.str_with_depth(depth=0)
class StringLiteralNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ": "
s += " " + self.value
return s
def expr(self):
# TODO likely have to do encoding here for quote issues
return self.value
class IntegerLiteralNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ": "
s += self.value
return s
def expr(self):
"""
NSString selectors
intValue
The integer value of the string.
integerValue
The NSInteger value of the string.
longLongValue
"""
ns_string_selector = ""
t = self.type
match t:
case "unsigned long":
ns_string_selector = 'integerValue'
case "unsigned long long":
ns_string_selector = 'longLongValue'
case _:
# this is expected to be "int" or "bool", warn if that's not correct
if self.type not in ["int", "bool"]:
print(f"[WARNING] Unexpected type for {self.__name__}: {t}")
ns_string_selector = 'intValue'
expr = f"FUNCTION('{self.value}','{ns_string_selector}')"
return expr
class FloatingLiteralNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ": "
s += self.value
return s
def expr(self):
ns_string_selector = ""
t = self.type
match t:
case "float":
ns_string_selector = 'floatValue'
case _:
# this is expected to be "int" or "bool", warn if that's not correct
if self.type not in ["int", "bool"]:
print(f"[WARNING] Unexpected type for {self.__name__}: {t}")
ns_string_selector = 'intValue'
expr = f"FUNCTION('{self.value}','{ns_string_selector}')"
return expr
class ObjCStringLiteralNode(Node):
def str_with_depth(self, depth = 0):
t = self.type
v = self.inner_nodes[0].value
s = "\t" * depth + " " + self.kind + ": "
s += t + " " + v
return s
def expr(self):
# reach down to the first inner node, expected to be
# StringLiteral, and return it's value
return self.inner_nodes[0].expr()
class ObjCMessageExprNode(Node):
def str_with_depth(self, depth = 0):
class_and_selector = ""
if self.receiverKind == "class":
class_and_selector += self.classType + " "
class_and_selector += self.selector
s = "\t" * depth + " " + self.kind + ":"
s += " " + class_and_selector
return s
def expr(self):
# two types, either the first argument is a class (which we need to generate)
# or an instance (from a chained call)
expr = "FUNCTION("
copy_of_inner_nodes = self.inner_nodes.copy()
if self.receiverKind == "class":
expr += f"CAST('{self.classType}','Class'),'{self.selector}'"
else:
# remove first inner node to leave only selectors
temp_expr = copy_of_inner_nodes.pop(0).expr()
expr += f"{temp_expr},'{self.selector}'"
# fill selectors
selector_array = self.selector.split(":")
for inner in copy_of_inner_nodes:
expr += f",{inner.expr()}"
expr += ")"
return expr
class ImplicitCastExprNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ":"
return s
def expr(self):
# reach down and bubble up the first (and expected to be only) inner node expr
return self.inner_nodes[0].expr()
class FunctionDeclNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ":"
return s
def expr(self):
# reach down to the first inner node, expected to be
# ObjCMessageExprNode, and return it's value.
return self.inner_nodes[0].expr()
class CompoundStmtNode(Node):
def str_with_depth(self, depth = 0):
s = "\t" * depth + " " + self.kind + ":"
return s
def expr(self):
# for each inner node (aka line of code), print independent function expressions
copy_of_inner_nodes = self.inner_nodes.copy()
expr = f"FUNCTION(CAST('NSNull','Class'),'alloc',{copy_of_inner_nodes.pop(0).expr()})"
for inner in copy_of_inner_nodes:
expr = f"FUNCTION(CAST('NSNull','Class'),'alloc', {expr}, {inner.expr()})"
return expr
def parse_nodes_recursively(node_json):
kind = node_json["kind"]
node = None
match kind:
case "StringLiteral":
node = StringLiteralNode(node_json)
case "IntegerLiteral":
node = IntegerLiteralNode(node_json)
case "FloatingLiteral":
node = FloatingLiteralNode(node_json)
case "ObjCStringLiteral":
node = ObjCStringLiteralNode(node_json)
case "ObjCMessageExpr":
node = ObjCMessageExprNode(node_json)
case "ImplicitCastExpr":
node = ImplicitCastExprNode(node_json)
case "FunctionDecl":
node = FunctionDeclNode(node_json)
case "CompoundStmt":
node = CompoundStmtNode(node_json)
case _:
print("Unrecognized node kind during recursive parsing:", kind)
if node.inner_json:
for inner_node_json in node.inner_json:
node.inner_nodes.append(parse_nodes_recursively(inner_node_json))
return node
# Need to use tail recursion to format printing correctly
# meaning not all nodes are available to look up values.
# Collect the nodes ahead of time and pass them in as a dictionary
def visualize_nodes_recursively(node, ids_to_nodes, depth = 0):
node_kind = node["kind"]
print("\t" * depth, node_kind + ":")
match node_kind:
case "StringLiteral":
print("\t" * (depth+1), node["value"])
case "IntegerLiteral":
print("\t" * (depth+1), node["value"])
case "ObjCStringLiteral":
t = node["type"]["qualType"]
v = ids_to_nodes[node["inner"][0]["id"]]["value"]
print("\t" * (depth+1), t, v)
case "ObjCMessageExpr":
class_and_selector = ""
if node["receiverKind"] == "class":
class_and_selector += node["classType"]["qualType"] + " "
class_and_selector += node["selector"]
print("\t" * (depth+1), class_and_selector)
case "ImplicitCastExpr":
pass
case "FunctionDecl":
pass
case "CompoundStmt":
pass
case _:
print("\t" * (depth+1), "unrecognized node kind:", node_kind)
print()
if "inner" in node:
for inner_node in node["inner"]:
visualize_nodes_recursively(inner_node, ids_to_nodes, depth+1)
def print_nodes_recursively(node, depth = 0):
print(node.str_with_depth(depth=depth))
for inner_node in node.inner_nodes:
print_nodes_recursively(inner_node, depth+1)
def create_nsexpr_recursively(node):
expr = ""
print(node.kind, node.expr())
for inner_node in node.inner_nodes:
create_nsexpr_recursively(inner_node)
try:
main_node = parse_nodes_recursively(main_node_json)
# print the constructed expression(s)
print(main_node.expr())
# uncomment print all node expressions in a parsable node structure
##############
# print_nodes_recursively(main_node)
# create_nsexpr_recursively(main_node)
##############
except:
# uncomment below to visualize the json that can't be parsed
##############
ids_to_nodes = {}
collect_nodes_by_id(main_node_json, ids_to_nodes)
print(f"collected {len(ids_to_nodes)} nodes ahead of time")
visualize_nodes_recursively(main_node_json, ids_to_nodes)
##############