From 29fad8e263a6b5c9ee741625a72bf868180e159f Mon Sep 17 00:00:00 2001 From: Fish Date: Wed, 10 Apr 2019 16:39:22 +0800 Subject: [PATCH] Fix the CFGENode serialization heisenbug. --- angr/analyses/cfg/cfg_emulated.py | 5 +- angr/knowledge_plugins/cfg/cfg_manager.py | 14 ++++ angr/knowledge_plugins/cfg/cfg_node.py | 99 +++++++++++++++++++++-- tests/test_serialization.py | 13 +-- 4 files changed, 116 insertions(+), 15 deletions(-) diff --git a/angr/analyses/cfg/cfg_emulated.py b/angr/analyses/cfg/cfg_emulated.py index 9942d51f222..0e5bdfc442a 100644 --- a/angr/analyses/cfg/cfg_emulated.py +++ b/angr/analyses/cfg/cfg_emulated.py @@ -3033,6 +3033,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None, # Determine if this is a SimProcedure, and further, if this is a syscall syscall = None + is_syscall = False if sim_successors.sort == 'SimProcedure': is_simprocedure = True if sa['is_syscall'] is True: @@ -3058,7 +3059,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None, simprocedure_name=simproc_name, syscall_name=syscall, no_ret=no_ret, - syscall=syscall, + is_syscall=is_syscall, function_address=sim_successors.addr, block_id=block_id, depth=depth, @@ -3072,7 +3073,7 @@ def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None, self.model, callstack_key=call_stack.stack_suffix(self.context_sensitivity_level), input_state=None, - syscall=syscall, + is_syscall=is_syscall, function_address=func_addr, block_id=block_id, depth=depth, diff --git a/angr/knowledge_plugins/cfg/cfg_manager.py b/angr/knowledge_plugins/cfg/cfg_manager.py index 09a76af3965..0da72ead10a 100644 --- a/angr/knowledge_plugins/cfg/cfg_manager.py +++ b/angr/knowledge_plugins/cfg/cfg_manager.py @@ -42,5 +42,19 @@ def copy(self): cm.cfgs = self.cfgs.copy() return cm + # + # Pickling + # + + def __getstate__(self): + return { + '_kb': self._kb, + 'cfgs': self.cfgs, + } + + def __setstate__(self, state): + self._kb = state['_kb'] + self.cfgs = state['cfgs'] + KnowledgeBasePlugin.register_default("cfgs", CFGManager) diff --git a/angr/knowledge_plugins/cfg/cfg_node.py b/angr/knowledge_plugins/cfg/cfg_node.py index 10bf60453b5..7a96d9ec406 100644 --- a/angr/knowledge_plugins/cfg/cfg_node.py +++ b/angr/knowledge_plugins/cfg/cfg_node.py @@ -56,15 +56,17 @@ def __init__(self, soot_block=None, instruction_addrs=None, thumb=False, - byte_string=None): + byte_string=None, + is_syscall=None, + name=None): """ Note: simprocedure_name is not used to recreate the SimProcedure object. It's only there for better __repr__. """ self.addr = addr - self.simprocedure_name = simprocedure_name self.size = size + self.simprocedure_name = simprocedure_name self.no_ret = no_ret self._cfg_model = cfg self.function_address = function_address @@ -72,13 +74,20 @@ def __init__(self, self.thumb = thumb self.byte_string = byte_string # type: None or bytes - if isinstance(addr, SootAddressDescriptor): + self._name = None + if name is not None: + self._name = name + elif isinstance(addr, SootAddressDescriptor): self._name = repr(addr) else: self._name = simprocedure_name self.instruction_addrs = list(instruction_addrs) if instruction_addrs is not None else [] - self.is_syscall = bool(self.simprocedure_name and self._cfg_model.project.simos.is_syscall_addr(addr)) + if is_syscall is not None: + self.is_syscall = is_syscall + else: + self.is_syscall = bool(self.simprocedure_name and self._cfg_model.project.simos.is_syscall_addr(addr)) + if not instruction_addrs and not self.is_simprocedure: # We have to collect instruction addresses by ourselves if irsb is not None: @@ -176,6 +185,43 @@ def parse_from_cmessage(cls, cmsg, cfg=None): # pylint:disable=arguments-differ ) return obj + # + # Pickling + # + + def __getstate__(self): + s = { + 'addr': self.addr, + 'size': self.size, + 'simprocedure_name': self.simprocedure_name, + 'no_ret': self.no_ret, + 'function_address': self.function_address, + 'block_id': self.block_id, + 'thumb': self.thumb, + 'byte_string': self.byte_string, + '_name': self._name, + 'instruction_addrs': self.instruction_addrs, + 'is_syscall': self.is_syscall, + 'has_return': self.has_return, + } + return s + + def __setstate__(self, state): + self.__init__(state['addr'], + state['size'], + None, + simprocedure_name=state['simprocedure_name'], + no_ret=state['no_ret'], + function_address=state['function_address'], + block_id=state['block_id'], + thumb=state['thumb'], + byte_string=state['byte_string'], + name=state['_name'], + instruction_addrs=state['instruction_addrs'], + is_syscall=state['is_syscall'], + ) + self.has_return = state['has_return'] + # # Methods # @@ -192,6 +238,8 @@ def copy(self): instruction_addrs=self.instruction_addrs, thumb=self.thumb, byte_string=self.byte_string, + is_syscall=self.is_syscall, + name=self._name ) return c @@ -273,11 +321,13 @@ def __init__(self, instruction_addrs=None, thumb=False, byte_string=None, + is_syscall=None, + name=None, + # CFGENode specific input_state=None, final_states=None, syscall_name=None, looping_times=0, - syscall=None, depth=None, callstack_key=None, creation_failure_info=None, @@ -292,12 +342,13 @@ def __init__(self, instruction_addrs=instruction_addrs, thumb=thumb, byte_string=byte_string, + is_syscall=is_syscall, + name=name, ) self.input_state = input_state self.syscall_name = syscall_name self.looping_times = looping_times - self.syscall = syscall self.depth = depth self.creation_failure_info = None @@ -358,6 +409,40 @@ def __eq__(self, other): def __hash__(self): return hash((self.callstack_key, self.addr, self.looping_times, self.simprocedure_name, self.creation_failure_info)) + # + # Pickeling + # + + def __getstate__(self): + s = super().__getstate__() + s['syscall_name'] = self.syscall_name + s['looping_times'] = self.looping_times + s['depth'] = self.depth + s['creation_failure_info'] = self.creation_failure_info + s['_callstack_key'] = self.callstack_key + s['return_target'] = self.return_target + return s + + def __setstate__(self, state): + self.__init__(state['addr'], + state['size'], + None, + simprocedure_name=state['simprocedure_name'], + no_ret=state['no_ret'], + function_address=state['function_address'], + block_id=state['block_id'], + instruction_addrs=state['instruction_addrs'], + thumb=state['thumb'], + byte_string=state['byte_string'], + is_syscall=state['is_syscall'], + name=state['_name'], + syscall_name=state['syscall_name'], + looping_times=state['looping_times'], + depth=state['depth'], + callstack_key=state['_callstack_key'], + creation_failure_info=state['creation_failure_info'] + ) + def copy(self): return CFGENode( self.addr, @@ -374,7 +459,7 @@ def copy(self): input_state=self.input_state, syscall_name=self.syscall_name, looping_times=self.looping_times, - syscall=self.syscall, + is_syscall=self.is_syscall, depth=self.depth, final_states=self.final_states[::], callstack_key=self.callstack_key, diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 9cb97c4bffd..cdc6fd15bd1 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -25,7 +25,7 @@ def internaltest_cfg(p): state.seek(0) cfg2 = pickle.load(state) - nose.tools.assert_equal(set(cfg.nodes()), set(cfg2.nodes())) + nose.tools.assert_equal(set(cfg.model.nodes()), set(cfg2.model.nodes())) nose.tools.assert_equal(cfg.unresolvables, cfg2.unresolvables) nose.tools.assert_set_equal(set(cfg.deadends), set(cfg2.deadends)) @@ -45,7 +45,7 @@ def internaltest_cfgfast(p): state.seek(0) cfg2 = pickle.load(state) - nose.tools.assert_equal(set(cfg.nodes()), set(cfg2.nodes())) + nose.tools.assert_equal(set(cfg.model.nodes()), set(cfg2.model.nodes())) def internaltest_project(fpath): tpath = tempfile.mktemp() @@ -63,6 +63,7 @@ def internaltest_project(fpath): simgr.run(n=10) assert len(simgr.errored) == 0 + def test_analyses(): p = angr.Project(os.path.join(internaltest_location, 'i386/fauxware'), load_options={'auto_load_libs': False}) cfg = p.analyses.CFG() @@ -70,9 +71,9 @@ def test_analyses(): vrf = p.analyses.VariableRecoveryFast(p.kb.functions['main']) assert len(p.kb.functions) > 0 - assert len(pickle.loads(pickle.dumps(p.kb)).functions) > 0 + assert len(pickle.loads(pickle.dumps(p.kb, -1)).functions) > 0 - state = pickle.dumps((p,cfg,cfb,vrf)) + state = pickle.dumps((p,cfg,cfb,vrf), -1) del p del cfg del cfb @@ -86,9 +87,8 @@ def test_analyses(): assert cfg.kb is not None assert len(p.kb.functions) > 0 -def test_serialization(): - test_analyses() +def test_serialization(): for d in internaltest_arch: for f in internaltest_files: fpath = os.path.join(internaltest_location, d,f) @@ -102,4 +102,5 @@ def test_serialization(): internaltest_vfg(p, cfg) if __name__ == '__main__': + test_analyses() test_serialization()