diff --git a/angr/analyses/decompiler/structuring/phoenix.py b/angr/analyses/decompiler/structuring/phoenix.py index cfb03ff6a62..1e556b942c7 100644 --- a/angr/analyses/decompiler/structuring/phoenix.py +++ b/angr/analyses/decompiler/structuring/phoenix.py @@ -13,7 +13,7 @@ from angr.utils.graph import GraphUtils from ....knowledge_plugins.cfg import IndirectJumpType from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR -from ....utils.graph import dominates, inverted_idoms, to_acyclic_graph +from ....utils.graph import dominates, to_acyclic_graph from ..sequence_walker import SequenceWalker from ..utils import ( remove_last_statement, @@ -1913,21 +1913,16 @@ def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: Opt other_edges = [] idoms = networkx.immediate_dominators(full_graph, head) if networkx.is_directed_acyclic_graph(full_graph): - _, inv_idoms = inverted_idoms(full_graph) acyclic_graph = full_graph else: acyclic_graph = to_acyclic_graph(full_graph, loop_heads=[head]) - _, inv_idoms = inverted_idoms(acyclic_graph) for src, dst in acyclic_graph.edges: if src is dst: continue - if not graph.has_edge(src, dst): - # the edge might be from full_graph but not in graph - continue - if not dominates(idoms, src, dst) and not dominates(inv_idoms, dst, src): + if not dominates(idoms, src, dst) and not dominates(idoms, dst, src): if (src.addr, dst.addr) not in self.whitelist_edges: all_edges_wo_dominance.append((src, dst)) - elif not dominates(idoms, src, dst) and dominates(inv_idoms, dst, src): + elif not dominates(idoms, src, dst): if (src.addr, dst.addr) not in self.whitelist_edges: secondary_edges.append((src, dst)) else: @@ -1935,7 +1930,7 @@ def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: Opt other_edges.append((src, dst)) ordered_nodes = GraphUtils.quasi_topological_sort_nodes(acyclic_graph, loop_heads=[head]) - node_seq = {nn: idx for (idx, nn) in enumerate(ordered_nodes)} + node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order if all_edges_wo_dominance: all_edges_wo_dominance = self._chick_order_edges(all_edges_wo_dominance, node_seq) @@ -2008,7 +2003,8 @@ def _virtualize_edge(self, graph, full_graph, src, dst): ) new_src = SequenceNode(src.addr, nodes=[src, goto_node]) - graph.remove_edge(src, dst) + if graph.has_edge(src, dst): + graph.remove_edge(src, dst) if new_src is not None: self.replace_nodes(graph, src, new_src) if full_graph is not None: diff --git a/angr/analyses/propagator/propagator.py b/angr/analyses/propagator/propagator.py index cb0a46509f5..fe02a05f7a7 100644 --- a/angr/analyses/propagator/propagator.py +++ b/angr/analyses/propagator/propagator.py @@ -257,6 +257,7 @@ def _run_on_node(self, node, state): # make a copy of the state if it's not the initial state state = state.copy() state._equivalence.clear() + state.init_replacements() else: # clear self._initial_state so that we *do not* run this optimization again! self._initial_state = None diff --git a/angr/knowledge_plugins/propagations/states.py b/angr/knowledge_plugins/propagations/states.py index 93a1d5c05dd..edc4f69650a 100644 --- a/angr/knowledge_plugins/propagations/states.py +++ b/angr/knowledge_plugins/propagations/states.py @@ -235,6 +235,9 @@ def merge(self, *others): return state, merge_occurred + def init_replacements(self): + self._replacements = defaultdict(dict) + def add_replacement(self, codeloc, old: CodeLocation, new): """ Add a replacement record: Replacing expression `old` with `new` at program location `codeloc`. diff --git a/tests/test_decompiler.py b/tests/test_decompiler.py index 3422a1f59e7..6c06f61a86a 100644 --- a/tests/test_decompiler.py +++ b/tests/test_decompiler.py @@ -2014,7 +2014,9 @@ def test_decompiling_ls_print_many_per_line(self, decompiler_options=None): @structuring_algo("phoenix") def test_decompiling_who_scan_entries(self, decompiler_options=None): - # order of edge virtualization matters. suboptimal order will lead to more gotos. + # order of edge virtualization matters. the default edge virtualization order (post-ordering) will lead to two + # gotos. virtualizing 0x401361 -> 0x4012b5 will lead to only one goto (because it's the edge that the + # compiler's optimizations created). bin_path = os.path.join(test_location, "x86_64", "decompiler", "who.o") proj = angr.Project(bin_path, auto_load_libs=False) @@ -2024,7 +2026,13 @@ def test_decompiling_who_scan_entries(self, decompiler_options=None): self._print_decompilation_result(d) # it should make somewhat sense - assert d.codegen.text.count("goto ") == 1 + assert d.codegen.text.count("goto ") == 2 + + # a bug in propagator was leading to the removal of the comparison at 0x4012b8 + lines = d.codegen.text.split("\n") + label_4012b8_index = lines.index("LABEL_4012b8:") + assert label_4012b8_index != -1 + assert lines[label_4012b8_index + 1].endswith("== 2)") @structuring_algo("phoenix") def test_decompiling_tr_build_spec_list(self, decompiler_options=None): @@ -2050,9 +2058,9 @@ def test_decompiling_tr_build_spec_list(self, decompiler_options=None): self._print_decompilation_result(d) assert d.codegen.text.count("goto ") == 3 - assert d.codegen.text.count("goto LABEL_400d08;") == 2 - # goto 400e40 this is the fake goto that can be eliminated if cross-jumping reverter is present - assert d.codegen.text.count("goto LABEL_400e40;") == 1 + assert d.codegen.text.count("goto LABEL_400d08;") == 1 + assert d.codegen.text.count("goto LABEL_400d2a;") == 1 + assert d.codegen.text.count("goto LABEL_400e1c;") == 1 @structuring_algo("phoenix") def test_decompiling_sha384sum_digest_bsd_split_3(self, decompiler_options=None): @@ -2077,8 +2085,8 @@ def test_decompiling_sha384sum_digest_bsd_split_3(self, decompiler_options=None) ) self._print_decompilation_result(d) - # there should only be two or even fewer gotos - assert d.codegen.text.count("goto ") == 2 + # there should only be three or even fewer gotos + assert d.codegen.text.count("goto ") == 3 @for_all_structuring_algos def test_eliminating_stack_canary_reused_stack_chk_fail_call(self, decompiler_options=None):