Skip to content

Commit

Permalink
Fix the CFGENode serialization heisenbug.
Browse files Browse the repository at this point in the history
  • Loading branch information
ltfish committed Apr 10, 2019
1 parent ccc62bd commit 29fad8e
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 15 deletions.
5 changes: 3 additions & 2 deletions angr/analyses/cfg/cfg_emulated.py
Original file line number Diff line number Diff line change
Expand Up @@ -3033,6 +3033,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None,

# Determine if this is a SimProcedure, and further, if this is a syscall
syscall = None
is_syscall = False
if sim_successors.sort == 'SimProcedure':
is_simprocedure = True
if sa['is_syscall'] is True:
Expand All @@ -3058,7 +3059,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None,
simprocedure_name=simproc_name,
syscall_name=syscall,
no_ret=no_ret,
syscall=syscall,
is_syscall=is_syscall,
function_address=sim_successors.addr,
block_id=block_id,
depth=depth,
Expand All @@ -3072,7 +3073,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None,
self.model,
callstack_key=call_stack.stack_suffix(self.context_sensitivity_level),
input_state=None,
syscall=syscall,
is_syscall=is_syscall,
function_address=func_addr,
block_id=block_id,
depth=depth,
Expand Down
14 changes: 14 additions & 0 deletions angr/knowledge_plugins/cfg/cfg_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,19 @@ def copy(self):
cm.cfgs = self.cfgs.copy()
return cm

#
# Pickling
#

def __getstate__(self):
return {
'_kb': self._kb,
'cfgs': self.cfgs,
}

def __setstate__(self, state):
self._kb = state['_kb']
self.cfgs = state['cfgs']


KnowledgeBasePlugin.register_default("cfgs", CFGManager)
99 changes: 92 additions & 7 deletions angr/knowledge_plugins/cfg/cfg_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,29 +56,38 @@ def __init__(self,
soot_block=None,
instruction_addrs=None,
thumb=False,
byte_string=None):
byte_string=None,
is_syscall=None,
name=None):
"""
Note: simprocedure_name is not used to recreate the SimProcedure object. It's only there for better
__repr__.
"""

self.addr = addr
self.simprocedure_name = simprocedure_name
self.size = size
self.simprocedure_name = simprocedure_name
self.no_ret = no_ret
self._cfg_model = cfg
self.function_address = function_address
self.block_id = block_id # type: int or tuple
self.thumb = thumb
self.byte_string = byte_string # type: None or bytes

if isinstance(addr, SootAddressDescriptor):
self._name = None
if name is not None:
self._name = name
elif isinstance(addr, SootAddressDescriptor):
self._name = repr(addr)
else:
self._name = simprocedure_name
self.instruction_addrs = list(instruction_addrs) if instruction_addrs is not None else []

self.is_syscall = bool(self.simprocedure_name and self._cfg_model.project.simos.is_syscall_addr(addr))
if is_syscall is not None:
self.is_syscall = is_syscall
else:
self.is_syscall = bool(self.simprocedure_name and self._cfg_model.project.simos.is_syscall_addr(addr))

if not instruction_addrs and not self.is_simprocedure:
# We have to collect instruction addresses by ourselves
if irsb is not None:
Expand Down Expand Up @@ -176,6 +185,43 @@ def parse_from_cmessage(cls, cmsg, cfg=None): # pylint:disable=arguments-differ
)
return obj

#
# Pickling
#

def __getstate__(self):
s = {
'addr': self.addr,
'size': self.size,
'simprocedure_name': self.simprocedure_name,
'no_ret': self.no_ret,
'function_address': self.function_address,
'block_id': self.block_id,
'thumb': self.thumb,
'byte_string': self.byte_string,
'_name': self._name,
'instruction_addrs': self.instruction_addrs,
'is_syscall': self.is_syscall,
'has_return': self.has_return,
}
return s

def __setstate__(self, state):
self.__init__(state['addr'],
state['size'],
None,
simprocedure_name=state['simprocedure_name'],
no_ret=state['no_ret'],
function_address=state['function_address'],
block_id=state['block_id'],
thumb=state['thumb'],
byte_string=state['byte_string'],
name=state['_name'],
instruction_addrs=state['instruction_addrs'],
is_syscall=state['is_syscall'],
)
self.has_return = state['has_return']

#
# Methods
#
Expand All @@ -192,6 +238,8 @@ def copy(self):
instruction_addrs=self.instruction_addrs,
thumb=self.thumb,
byte_string=self.byte_string,
is_syscall=self.is_syscall,
name=self._name
)
return c

Expand Down Expand Up @@ -273,11 +321,13 @@ def __init__(self,
instruction_addrs=None,
thumb=False,
byte_string=None,
is_syscall=None,
name=None,
# CFGENode specific
input_state=None,
final_states=None,
syscall_name=None,
looping_times=0,
syscall=None,
depth=None,
callstack_key=None,
creation_failure_info=None,
Expand All @@ -292,12 +342,13 @@ def __init__(self,
instruction_addrs=instruction_addrs,
thumb=thumb,
byte_string=byte_string,
is_syscall=is_syscall,
name=name,
)

self.input_state = input_state
self.syscall_name = syscall_name
self.looping_times = looping_times
self.syscall = syscall
self.depth = depth

self.creation_failure_info = None
Expand Down Expand Up @@ -358,6 +409,40 @@ def __eq__(self, other):
def __hash__(self):
return hash((self.callstack_key, self.addr, self.looping_times, self.simprocedure_name, self.creation_failure_info))

#
# Pickeling
#

def __getstate__(self):
s = super().__getstate__()
s['syscall_name'] = self.syscall_name
s['looping_times'] = self.looping_times
s['depth'] = self.depth
s['creation_failure_info'] = self.creation_failure_info
s['_callstack_key'] = self.callstack_key
s['return_target'] = self.return_target
return s

def __setstate__(self, state):
self.__init__(state['addr'],
state['size'],
None,
simprocedure_name=state['simprocedure_name'],
no_ret=state['no_ret'],
function_address=state['function_address'],
block_id=state['block_id'],
instruction_addrs=state['instruction_addrs'],
thumb=state['thumb'],
byte_string=state['byte_string'],
is_syscall=state['is_syscall'],
name=state['_name'],
syscall_name=state['syscall_name'],
looping_times=state['looping_times'],
depth=state['depth'],
callstack_key=state['_callstack_key'],
creation_failure_info=state['creation_failure_info']
)

def copy(self):
return CFGENode(
self.addr,
Expand All @@ -374,7 +459,7 @@ def copy(self):
input_state=self.input_state,
syscall_name=self.syscall_name,
looping_times=self.looping_times,
syscall=self.syscall,
is_syscall=self.is_syscall,
depth=self.depth,
final_states=self.final_states[::],
callstack_key=self.callstack_key,
Expand Down
13 changes: 7 additions & 6 deletions tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def internaltest_cfg(p):

state.seek(0)
cfg2 = pickle.load(state)
nose.tools.assert_equal(set(cfg.nodes()), set(cfg2.nodes()))
nose.tools.assert_equal(set(cfg.model.nodes()), set(cfg2.model.nodes()))
nose.tools.assert_equal(cfg.unresolvables, cfg2.unresolvables)
nose.tools.assert_set_equal(set(cfg.deadends), set(cfg2.deadends))

Expand All @@ -45,7 +45,7 @@ def internaltest_cfgfast(p):

state.seek(0)
cfg2 = pickle.load(state)
nose.tools.assert_equal(set(cfg.nodes()), set(cfg2.nodes()))
nose.tools.assert_equal(set(cfg.model.nodes()), set(cfg2.model.nodes()))

def internaltest_project(fpath):
tpath = tempfile.mktemp()
Expand All @@ -63,16 +63,17 @@ def internaltest_project(fpath):
simgr.run(n=10)
assert len(simgr.errored) == 0


def test_analyses():
p = angr.Project(os.path.join(internaltest_location, 'i386/fauxware'), load_options={'auto_load_libs': False})
cfg = p.analyses.CFG()
cfb = p.analyses.CFB(cfg)
vrf = p.analyses.VariableRecoveryFast(p.kb.functions['main'])

assert len(p.kb.functions) > 0
assert len(pickle.loads(pickle.dumps(p.kb)).functions) > 0
assert len(pickle.loads(pickle.dumps(p.kb, -1)).functions) > 0

state = pickle.dumps((p,cfg,cfb,vrf))
state = pickle.dumps((p,cfg,cfb,vrf), -1)
del p
del cfg
del cfb
Expand All @@ -86,9 +87,8 @@ def test_analyses():
assert cfg.kb is not None
assert len(p.kb.functions) > 0

def test_serialization():
test_analyses()

def test_serialization():
for d in internaltest_arch:
for f in internaltest_files:
fpath = os.path.join(internaltest_location, d,f)
Expand All @@ -102,4 +102,5 @@ def test_serialization():
internaltest_vfg(p, cfg)

if __name__ == '__main__':
test_analyses()
test_serialization()

0 comments on commit 29fad8e

Please sign in to comment.