Skip to content

Commit

Permalink
Using branch probability to guide critical edge splitting.
Browse files Browse the repository at this point in the history
Summary:
The original heuristic to break critical edge during machine sink is relatively conservertive: when there is only one instruction sinkable to the critical edge, it is likely that the machine sink pass will not break the critical edge. This leads to many speculative instructions executed at runtime. However, with profile info, we could model the splitting benefits: if the critical edge has 50% taken rate, it would always be beneficial to split the critical edge to avoid the speculated runtime instructions. This patch uses profile to guide critical edge splitting in machine sink pass.

The performance impact on speccpu2006 on Intel sandybridge machines:

spec/2006/fp/C++/444.namd                  25.3  +0.26%
spec/2006/fp/C++/447.dealII               45.96  -0.10%
spec/2006/fp/C++/450.soplex               41.97  +1.49%
spec/2006/fp/C++/453.povray               36.83  -0.96%
spec/2006/fp/C/433.milc                   23.81  +0.32%
spec/2006/fp/C/470.lbm                    41.17  +0.34%
spec/2006/fp/C/482.sphinx3                48.13  +0.69%
spec/2006/int/C++/471.omnetpp             22.45  +3.25%
spec/2006/int/C++/473.astar               21.35  -2.06%
spec/2006/int/C++/483.xalancbmk           36.02  -2.39%
spec/2006/int/C/400.perlbench              33.7  -0.17%
spec/2006/int/C/401.bzip2                  22.9  +0.52%
spec/2006/int/C/403.gcc                   32.42  -0.54%
spec/2006/int/C/429.mcf                   39.59  +0.19%
spec/2006/int/C/445.gobmk                 26.98  -0.00%
spec/2006/int/C/456.hmmer                 24.52  -0.18%
spec/2006/int/C/458.sjeng                 28.26  +0.02%
spec/2006/int/C/462.libquantum            55.44  +3.74%
spec/2006/int/C/464.h264ref               46.67  -0.39%

geometric mean                                   +0.20%

Manually checked 473 and 471 to verify the diff is in the noise range.

Reviewers: rengolin, davidxl

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D24818

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284541 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
danielcdh committed Oct 18, 2016
1 parent 9f38b99 commit 6e98f1c
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 131 deletions.
17 changes: 17 additions & 0 deletions lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
Expand Down Expand Up @@ -60,6 +61,15 @@ UseBlockFreqInfo("machine-sink-bfi",
cl::desc("Use block frequency info to find successors to sink"),
cl::init(true), cl::Hidden);

static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
"machine-sink-split-probability-threshold",
cl::desc(
"Percentage threshold for splitting single-instruction critical edge. "
"If the branch threshold is higher than this threshold, we allow "
"speculative execution of up to 1 instruction to avoid branching to "
"splitted critical edge"),
cl::init(40), cl::Hidden);

STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
Expand All @@ -74,6 +84,7 @@ namespace {
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
const MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;

// Remember which edges have been considered for breaking.
Expand Down Expand Up @@ -105,6 +116,7 @@ namespace {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
Expand Down Expand Up @@ -283,6 +295,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

bool EverMadeChange = false;
Expand Down Expand Up @@ -383,6 +396,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;

if (MBPI->getEdgeProbability(From, To) <=
BranchProbability(SplitEdgeProbabilityThreshold, 100))
return true;

// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
Expand Down
30 changes: 13 additions & 17 deletions test/CodeGen/ARM/atomic-cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,14 @@ entry:
; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0
; CHECK-ARMV6-NEXT: bxne lr
; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-ARMV6-NEXT: mov [[RES]], #1
; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
; CHECK-ARMV6-NEXT: bne [[TRY]]
; CHECK-ARMV6-NEXT: [[END]]:
; CHECK-ARMV6-NEXT: mov r0, [[RES]]
; CHECK-ARMV6-NEXT: bx lr
; CHECK-ARMV6-NEXT: moveq [[RES]], #1
; CHECK-ARMV6-NEXT: bxeq lr
; CHECK-ARMV6-NEXT: b [[TRY]]

; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
Expand All @@ -64,20 +62,18 @@ entry:
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
; CHECK-ARMV7-NEXT: .fnstart
; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
; CHECK-ARMV7-NEXT: bne [[TRY]]
; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
; CHECK-ARMV7-NEXT: [[FAIL]]:
; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
; CHECK-ARMV7-NEXT: bxeq lr
; CHECK-ARMV7-NEXT: [[TRY]]:
; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: mov [[RES]], #0
; CHECK-ARMV7-NEXT: [[END]]:
; CHECK-ARMV7-NEXT: mov r0, [[RES]]
; CHECK-ARMV7-NEXT: bx lr

; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
Expand Down
21 changes: 9 additions & 12 deletions test/CodeGen/ARM/code-placement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ entry:
br i1 %0, label %bb2, label %bb

bb:
; CHECK: LBB0_2:
; CHECK: bne LBB0_2
; CHECK-NOT: b LBB0_2
; CHECK: LBB0_[[LABEL:[0-9]]]:
; CHECK: bne LBB0_[[LABEL]]
; CHECK-NOT: b LBB0_[[LABEL]]
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
Expand All @@ -34,14 +34,13 @@ bb2:
define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
entry:
; CHECK-LABEL: t2:
; CHECK: beq LBB1_[[RET:.]]
%0 = icmp eq i32 %passes, 0 ; <i1> [#uses=1]
br i1 %0, label %bb5, label %bb.nph15

; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
bb1: ; preds = %bb2.preheader, %bb1
; CHECK: LBB1_[[BB1:.]]: @ %bb1
; CHECK: bne LBB1_[[BB1]]
; CHECK: LBB1_[[BB3:.]]: @ %bb3
; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
; CHECK: blt LBB1_[[BB3]]
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
Expand All @@ -53,9 +52,9 @@ bb1: ; preds = %bb2.preheader, %bb1
br i1 %exitcond, label %bb3, label %bb1

bb3: ; preds = %bb1, %bb2.preheader
; CHECK: LBB1_[[BB3:.]]: @ %bb3
; CHECK: bne LBB1_[[PREHDR]]
; CHECK-NOT: b LBB1_
; CHECK: LBB1_[[BB1:.]]: @ %bb1
; CHECK: bne LBB1_[[BB1]]
; CHECK: b LBB1_[[BB3]]
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]
Expand All @@ -71,8 +70,6 @@ bb2.preheader: ; preds = %bb3, %bb.nph15
%sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
br i1 %4, label %bb1, label %bb3

; CHECK: LBB1_[[RET]]: @ %bb5
; CHECK: pop
bb5: ; preds = %bb3, %entry
%sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %sum.1.lcssa
Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/X86/block-placement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,12 @@ define void @fpcmp_unanalyzable_branch(i1 %cond) {
; CHECK-LABEL: fpcmp_unanalyzable_branch:
; CHECK: # BB#0: # %entry
; CHECK: # BB#1: # %entry.if.then_crit_edge
; CHECK: .LBB10_4: # %if.then
; CHECK: .LBB10_5: # %if.end
; CHECK: .LBB10_5: # %if.then
; CHECK: .LBB10_6: # %if.end
; CHECK: # BB#3: # %exit
; CHECK: jne .LBB10_4
; CHECK-NEXT: jnp .LBB10_5
; CHECK-NEXT: jmp .LBB10_4
; CHECK-NEXT: jnp .LBB10_6
; CHECK: jmp .LBB10_5

entry:
; Note that this branch must be strongly biased toward
Expand Down
Loading

0 comments on commit 6e98f1c

Please sign in to comment.