-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdependency_analyzer
executable file
·310 lines (227 loc) · 11 KB
/
dependency_analyzer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#!/usr/bin/env python
import os
import sys
from subprocess import check_output
import re
from chashutil import get_param_of
from chashutil import static_vars
from chashutil import read_info_files, read_info_files_functions_only
from chashutil import FUNCTION_PREFIX, VARIABLE_PREFIX, \
INFO_EXTENSION, OUTPUT_FLAG, HELP_FLAG
from clang_hash_global import get_global_hash
"""This script implements call graph mining and only works on the hashes of
functions. Other hashes are ignored.
"""
CHANGED_FUNCS_FLAG = '--changed-functions'
IMPACTED_FUNCS_FLAG = '--impacted-functions'
DEPENDEES_FLAG = '--dependees'
DEPENDENTS_FLAG = '--dependents'
DIRECT_FLAG = '--direct'
CHASH_GLOBAL_FLAG = '--chash-global'
HELP_TEXTS = {
CHANGED_FUNCS_FLAG : 'list of all functions that changed',
IMPACTED_FUNCS_FLAG : 'list of all functions that changed or a depend on a function that changed or is impacted',
DEPENDENTS_FLAG : 'list of symbols that depend on the symbol',
DIRECT_FLAG : 'list of symbols that the function calls directly (i.e. the cHash dependency list)',
DEPENDEES_FLAG : 'output list of symbols the symbol depends on',
CHASH_GLOBAL_FLAG : 'output list of symbols whose global hash changed'
}
def usage():
print "Usage:"
for flag in (DEPENDENTS_FLAG, DIRECT_FLAG, DEPENDEES_FLAG):
print " %s\n\t%s %s <symbol>\n" % \
(HELP_TEXTS[flag], sys.argv[0], flag)
for flag in (CHANGED_FUNCS_FLAG, IMPACTED_FUNCS_FLAG, CHASH_GLOBAL_FLAG):
print " %s\n\t%s %s\n" % \
(HELP_TEXTS[flag], sys.argv[0], flag)
@static_vars(processed=set())
def get_dependents(symbol, call_graph):
"""Return a list of all symbols that call symbol/are dependent on it
(directly or indirectly, i.e. the transitive closure).
"""
processed = get_dependents.processed # to break recursion
if symbol in processed:
return set()
processed.add(symbol)
dependents = { s for s in call_graph if symbol in call_graph[s] }
keys = set(dependents)
for sym in keys:
dependents |= get_dependents(sym, call_graph)
processed.remove(symbol)
return dependents
@static_vars(processed=set())
def get_dependees(symbol, call_graph):
"""Return a list of all symbols that are called by symbol/symbol depends on
(directly or indirectly, i.e. the transitive closure).
"""
processed = get_dependees.processed # to break recursion
if symbol in processed:
return set()
processed.add(symbol)
dependees = { s for s in call_graph if s in call_graph[symbol] }
keys = set(dependees)
for sym in keys:
dependees |= get_dependees(sym, call_graph)
processed.remove(symbol)
return dependees
def get_dependency_graph_of(symbol, call_graph):
"""Return a sub-graph of the call graph with symbol as the root element.
"""
dependees = get_dependees(symbol, call_graph)
dependees.add(symbol) # Also include the symbol itself
return {sym : call_graph[sym] for sym in dependees}
def get_changed_functions_from_commit(path, rev = "HEAD"):
diff = check_output(['git', '--git-dir', path + '/.git', 'show', rev])#.decode('utf-8')
fns = set()
for line in diff.split("\n"):
if not line.startswith("@@"):
continue
where = line.split("@@",2)[2]
m = re.search('.*?([a-zA-Z_][a-zA-Z0-9_]*)\s*\\(', where)
if m:
fn = m.group(1) # TODO: das hier ist komisch, return 'get_record_from'
fns.add(fn)
return fns
def get_changed_functions(local_hashes, prev_local_hashes): # TODO: rename get_changed_elements
"""Compare the hashes and return all functions for which the hash changed.
Returns:
A tuple of sets containing the changed functions.
(modified, added, deleted)
"""
current_functions = set(local_hashes.keys())
previous_functions = set(prev_local_hashes.keys())
kept_functions = current_functions & previous_functions # not added or deleted
modified_functions = set()
for symbol in kept_functions:
if local_hashes[symbol] != prev_local_hashes[symbol]:
modified_functions.add(symbol)
added_functions = current_functions - previous_functions
deleted_functions = previous_functions - current_functions
return (modified_functions, added_functions, deleted_functions)
def get_impacted_funcs(changed_functions, call_graph):
"""
Args:
changed_functions: A set of all functions that changed.
call_graph: A dict with all dependencies.
Returns:
A set of all function names that are impacted by any change.
"""
impacted_functions = set(changed_functions)
for symbol in changed_functions:
impacted_functions |= get_dependents(symbol, call_graph)
return impacted_functions
def get_global_hashes(local_hashes, used_definitions):
global_hashes = {}
for symbol in local_hashes:
get_global_hash(symbol, global_hashes, local_hashes, used_definitions)
return global_hashes
@static_vars(currently_processed=set())
def get_fake_global_hash(symbol, global_hashes, local_hashes, used_definitions):
"""Calculates the fake global hash of the symbol recursively over all used definitions
"""
currently_processed = get_global_hash.currently_processed # to break recursion
# If a symbol does not have a local hash, we assume it is a library
# function and therefore does not require a local hash.
if symbol not in local_hashes:
return None
if symbol in global_hashes:
return global_hashes[symbol]
if local_hashes[symbol] == 1:
global_hashes[symbol] = 1
return global_hashes[symbol]
# Below this point, local_hash[element] is always 0
if symbol in currently_processed: # TODO: prevents recursion
return 0
currently_processed.add(symbol)
global_hash = 0
if symbol in used_definitions:
used_defs = used_definitions[symbol]
for used_def in used_defs:
used_def_global_hash = get_fake_global_hash(used_def, global_hashes, local_hashes, used_definitions)
if used_def_global_hash is not None:
global_hash += used_def_global_hash
if global_hash:
break
currently_processed.remove(symbol)
global_hashes[symbol] = global_hash
return global_hashes[symbol]
def get_fake_global_hashes(changed_functions, used_definitions):
"""Set the local hashes of all changed elements to 1 and also all global hashes of changed elements will be non-zero"""
local_hashes = {f:(1 if f in changed_functions else 0) for f in used_definitions.keys()}
global_hashes = {}
for symbol in local_hashes:
get_fake_global_hash(symbol, global_hashes, local_hashes, used_definitions)
return global_hashes
def get_impacted_funcs_fake_hash(changed_functions, call_graph):
"""For efficience"""
global_hashes = get_fake_global_hashes(changed_functions, call_graph)
impacted_functions = {f for f in global_hashes.keys() if global_hashes[f] != 0}
return impacted_functions
################################################################################
PATH_TO_PROJECT = "/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua/"
# Path to the .o.info files of the current commit
CURRENT_HASH_FILES = PATH_TO_PROJECT + "cur_info/"
# Path to .o.info files of the previous commit
PREVIOUS_HASH_FILES = PATH_TO_PROJECT + "prev_info/"
if __name__ == "__main__":
args = sys.argv
argc = len(args)
if argc > 1 and HELP_FLAG not in args:
#if argc > 3: # 4th param is path # TODO: this does not hold anymore!
# working_directory = args[3]
# os.chdir(working_directory)
#working_directory = os.getcwd() # TODO: get path from command line
# get data
if CHASH_GLOBAL_FLAG in args:
local_hashes, used_definitions = read_info_files(CURRENT_HASH_FILES)
prev_hashes, prev_used_definitions = read_info_files(PREVIOUS_HASH_FILES)
# TODO: create more modes, perhaps make this flag a little switch (e.g. -c)
# TODO: also create all the FLAGs below for cHash-global
# Print all symbols whose global hash changed
global_hashes = get_global_hashes(local_hashes, used_definitions)
prev_global_hashes = get_global_hashes(prev_hashes, prev_used_definitions)
changed_global_hashes = get_changed_functions(global_hashes, prev_global_hashes)
print changed_global_hashes
sys.exit(0)
# else, work with call graph based analysis
local_hashes, call_graph = read_info_files_functions_only(CURRENT_HASH_FILES)
prev_hashes, prev_call_graph = read_info_files_functions_only(PREVIOUS_HASH_FILES)
if DEPENDENTS_FLAG in args:
symbol_to_check = get_param_of(DEPENDENTS_FLAG)
dependents = get_dependents(symbol_to_check, call_graph)
print dependents
elif DIRECT_FLAG in args:
symbol = get_param_of(DIRECT_FLAG)
assert symbol in call_graph
print call_graph[symbol]
elif DEPENDEES_FLAG in args:
symbol_to_check = get_param_of(DEPENDEES_FLAG)
dependees = get_dependees(symbol_to_check, call_graph)
print dependees
elif CHANGED_FUNCS_FLAG in args:
changed_functions = get_changed_functions(local_hashes, prev_hashes)
for func in changed_functions[0]:
print func, " ", local_hashes[func], " ", prev_hashes[func]
print "Calced from hashes:"
print changed_functions
changed_from_commit = get_changed_functions_from_commit(PATH_TO_PROJECT)
print "Got from commit:"
print changed_from_commit
elif IMPACTED_FUNCS_FLAG in args:
changed_funcs = get_changed_functions(local_hashes, prev_hashes)
changed_funcs_set = changed_funcs[0] | changed_funcs[1] | changed_funcs[2]
impacted_funcs = get_impacted_funcs(changed_funcs_set, call_graph)
print impacted_funcs
changed_from_commit = get_changed_functions_from_commit(PATH_TO_PROJECT)
impacted_from_commit = get_impacted_funcs(changed_from_commit, call_graph)
print impacted_from_commit
# vllt auch noch als option machen: aenderungen in den dependencies ausgeben lassen (geadded/deleted) new dependencies/removed dependencies
# Brauche folgendes experiment:
# 1. build, save hashes, build, save hashes
# 2. run dependency_analyzer, feed paths to both build's hash files
# TODO: check if local hash changed, then say changed/not changed based on call graph
# -> ??? same as cHash? muss das iwie auch noch in ein experiment giessen...
# TODO: Verfahren aus Zhang/Li/Sun2012 anwenden!
# WICHTIG: im experiment muss ichs gar nicht erst einmal "save" frisch bauen und dann erst richtig, es reicht einfach cleanen, dann bauen, dann parent holen, cleanen bauen
else:
usage()