Skip to content

Commit

Permalink
CodeGen: Allow small copyable blocks to "break" the CFG.
Browse files Browse the repository at this point in the history
When choosing the best successor for a block, ordinarily we would have preferred
a block that preserves the CFG unless there is a strong probability the other
direction. For small blocks that can be duplicated we now skip that requirement
as well.

Differential revision: https://reviews.llvm.org/D27742

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291609 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Kyle Butt committed Jan 10, 2017
1 parent d8c5040 commit ada6595
Show file tree
Hide file tree
Showing 58 changed files with 472 additions and 212 deletions.
59 changes: 52 additions & 7 deletions lib/CodeGen/MachineBlockPlacement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildCFGChains();
void optimizeBranches();
void alignBlocks();
bool shouldTailDuplicate(MachineBasicBlock *BB);
bool canTailDuplicateUnplacedPreds(
MachineBasicBlock *BB, MachineBasicBlock *Succ,
BlockChain &Chain, const BlockFilterSet *BlockFilter);

public:
static char ID; // Pass identification, replacement for typeid
Expand Down Expand Up @@ -561,6 +565,45 @@ getAdjustedProbability(BranchProbability OrigProb,
return SuccProb;
}

/// Check if a block should be tail duplicated.
/// \p BB Block to check.
bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
// Blocks with single successors don't create additional fallthrough
// opportunities. Don't duplicate them. TODO: When conditional exits are
// analyzable, allow them to be duplicated.
bool IsSimple = TailDup.isSimpleBB(BB);

if (BB->succ_size() == 1)
return false;
return TailDup.shouldTailDuplicate(IsSimple, *BB);
}

/// When the option TailDupPlacement is on, this method checks if the
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
/// into all of its unplaced, unfiltered predecessors, that are not BB. In
/// addition we keep a set of blocks that have been tail-duplicated into and
/// allow those blocks to be unplaced as well. This allows the creation of a
/// second (larger) spine and a short fallthrough spine.
/// We also identify blocks with the CFG that would have been produced by
/// tail-duplication and lay them out in the same manner.
bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
if (!shouldTailDuplicate(Succ))
return false;

for (MachineBasicBlock *Pred : Succ->predecessors()) {
// Make sure all unplaced and unfiltered predecessors can be
// tail-duplicated into.
if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
|| BlockToChain[Pred] == &Chain)
continue;
if (!TailDup.canTailDuplicate(Succ, Pred))
return false;
}
return true;
}

/// When the option OutlineOptionalBranches is on, this method
/// checks if the fallthrough candidate block \p Succ (of block
/// \p BB) also has other unscheduled predecessor blocks which
Expand Down Expand Up @@ -634,6 +677,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
if (SuccChain.UnscheduledPredecessors == 0)
return false;

// As a heuristic, if we can duplicate the block into all its unscheduled
// predecessors, we return false.
if (TailDupPlacement
&& canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter))
return false;

// There are two basic scenarios here:
// -------------------------------------
// Case 1: triangular shape CFG (if-then):
Expand Down Expand Up @@ -1908,13 +1957,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
DuplicatedToLPred = false;
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
<< BB->getNumber() << "\n");
bool IsSimple = TailDup.isSimpleBB(BB);
// Blocks with single successors don't create additional fallthrough
// opportunities. Don't duplicate them. TODO: When conditional exits are
// analyzable, allow them to be duplicated.
if (!IsSimple && BB->succ_size() == 1)
return false;
if (!TailDup.shouldTailDuplicate(IsSimple, *BB))

if (!shouldTailDuplicate(BB))
return false;
// This has to be a callback because none of it can be done after
// BB is deleted.
Expand Down Expand Up @@ -1967,6 +2011,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);

SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
bool IsSimple = TailDup.isSimpleBB(BB);
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
&DuplicatedPreds, &RemovalCallbackRef);

Expand Down
7 changes: 6 additions & 1 deletion test/CodeGen/AArch64/addsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,17 @@ test4:

test5:
; CHECK: cmn {{w[0-9]+}}, #444
; CHECK: b.gt [[RET]]
; CHECK: b.le [[TEST6:.?LBB[0-9]+_[0-9]+]]
%newval5 = add i32 %val, 4
store i32 %newval5, i32* @var_i32
%cmp_neg_uge = icmp sgt i32 %val2, -444
br i1 %cmp_neg_uge, label %ret, label %test6

; CHECK: {{^}}[[RET]]:
; CHECK: ret
; CHECK: {{^}}[[TEST6]]:
; CHECK: ret

test6:
%newval6 = add i32 %val, 5
store i32 %newval6, i32* @var_i32
Expand Down
22 changes: 12 additions & 10 deletions test/CodeGen/AArch64/arm64-atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
; CHECK-NEXT: ret
; CHECK-NEXT: [[FAILBB]]:
; CHECK-NEXT: clrex
; CHECK-NEXT: [[EXITBB]]:
; CHECK-NEXT: ret
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
Expand All @@ -27,10 +27,12 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
; CHECK-NEXT: mov x0, x[[ADDR]]
; CHECK-NEXT: ret
; CHECK-NEXT: [[FAILBB]]:
; CHECK-NEXT: clrex
; CHECK-NEXT: [[EXITBB]]:
; CHECK-NEXT: mov x0, x[[ADDR]]
; CHECK-NEXT: ret
%new = load i32, i32* %pnew
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
Expand All @@ -41,15 +43,15 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_rel:
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
; CHECK-NEXT: cmp [[RESULT]], w1
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
; CHECK-NEXT: ret
; CHECK-NEXT: [[FAILBB]]:
; CHECK-NEXT: clrex
; CHECK-NEXT: [[EXITBB]]:
; CHECK-NEXT: ret
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
Expand All @@ -64,10 +66,10 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
; CHECK-NEXT: ret
; CHECK-NEXT: [[FAILBB]]:
; CHECK-NEXT: clrex
; CHECK-NEXT: [[EXITBB]]:
; CHECK-NEXT: ret
%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
%val = extractvalue { i64, i1 } %pair, 0
ret i64 %val
Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/AArch64/arm64-ccmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ if.end: ; preds = %if.then, %lor.lhs.f
; CHECK: cmp w0, #1
; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
; CHECK: ccmp [[DIVRES]], #16, #0, ge
; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
; CHECK: bl _foo
; CHECK: [[BLOCK]]:
; CHECK: b.le [[BLOCK:LBB[0-9_]+]]
; CHECK: orr w0, wzr, #0x7
; CHECK: [[BLOCK]]:
; CHECK: bl _foo
define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
entry:
%cmp = icmp sgt i32 %a, 0
Expand All @@ -135,7 +135,7 @@ if.end:
; CHECK: cmp
; CHECK-NOT: b.
; CHECK: fccmp {{.*}}, #8, ge
; CHECK: b.lt
; CHECK: b.ge
define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
entry:
%cmp = icmp sgt i32 %a, 0
Expand Down
14 changes: 5 additions & 9 deletions test/CodeGen/AArch64/arm64-shrink-wrapping.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,19 +346,15 @@ entry:
; CHECK-NEXT: sub w1, w1, #1
; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
; DISABLE-NEXT: b [[IFEND_LABEL]]
;
; DISABLE: [[ELSE_LABEL]]: ; %if.else
; DISABLE: lsl w0, w1, #1
;
; CHECK: [[IFEND_LABEL]]:
; CHECK-NEXT: [[IFEND_LABEL]]:
; Epilogue code.
; CHECK: add sp, sp, #16
; CHECK-NEXT: ret
;
; ENABLE: [[ELSE_LABEL]]: ; %if.else
; ENABLE-NEXT: lsl w0, w1, #1
; ENABLE_NEXT: ret
; CHECK: [[ELSE_LABEL]]: ; %if.else
; CHECK-NEXT: lsl w0, w1, #1
; DISABLE-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
entry:
%ap = alloca i8*, align 8
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/AArch64/compare-branch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ test4:
%val4 = load volatile i64, i64* @var64
%tst4 = icmp ne i64 %val4, 0
br i1 %tst4, label %end, label %test5, !prof !1
; CHECK: cbnz {{x[0-9]+}}, .LBB
; CHECK: cbz {{x[0-9]+}}, .LBB

test5:
store volatile i64 %val4, i64* @var64
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/AArch64/logical_shifted_reg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ test2:

test3:
; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
; CHECK: b.gt .L
; CHECK: b.le .L
%asr_op = ashr i64 %val2, 12
%asr_and = and i64 %asr_op, %val1
%tst3 = icmp sgt i64 %asr_and, 0
Expand Down
18 changes: 9 additions & 9 deletions test/CodeGen/AArch64/tbz-tbnz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end

; CHECK: sub [[CMP:w[0-9]+]], w0, #12
; CHECK: tbz [[CMP]], #31
; CHECK: tbnz [[CMP]], #31

if.then:
call void @t()
Expand All @@ -28,7 +28,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end

; CHECK: sub [[CMP:x[0-9]+]], x0, #12
; CHECK: tbz [[CMP]], #63
; CHECK: tbnz [[CMP]], #63

if.then:
call void @t()
Expand Down Expand Up @@ -118,7 +118,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end

; CHECK: sub [[CMP:w[0-9]+]], w0, #12
; CHECK: tbz [[CMP]], #31
; CHECK: tbnz [[CMP]], #31

if.then:
call void @t()
Expand Down Expand Up @@ -178,7 +178,7 @@ define void @test9(i64 %val1) {
br i1 %tst, label %if.then, label %if.end

; CHECK-NOT: cmp
; CHECK: tbz x0, #63
; CHECK: tbnz x0, #63

if.then:
call void @t()
Expand All @@ -194,7 +194,7 @@ define void @test10(i64 %val1) {
br i1 %tst, label %if.then, label %if.end

; CHECK-NOT: cmp
; CHECK: tbz x0, #63
; CHECK: tbnz x0, #63

if.then:
call void @t()
Expand All @@ -209,7 +209,7 @@ define void @test11(i64 %val1, i64* %ptr) {

; CHECK: ldr [[CMP:x[0-9]+]], [x1]
; CHECK-NOT: cmp
; CHECK: tbz [[CMP]], #63
; CHECK: tbnz [[CMP]], #63

%val = load i64, i64* %ptr
%tst = icmp slt i64 %val, 0
Expand All @@ -229,7 +229,7 @@ define void @test12(i64 %val1) {
br i1 %tst, label %if.then, label %if.end

; CHECK-NOT: cmp
; CHECK: tbz x0, #63
; CHECK: tbnz x0, #63

if.then:
call void @t()
Expand All @@ -247,7 +247,7 @@ define void @test13(i64 %val1, i64 %val2) {

; CHECK: orr [[CMP:x[0-9]+]], x0, x1
; CHECK-NOT: cmp
; CHECK: tbz [[CMP]], #63
; CHECK: tbnz [[CMP]], #63

if.then:
call void @t()
Expand All @@ -262,7 +262,7 @@ define void @test14(i1 %cond) {
br i1 %cond, label %if.end, label %if.then

; CHECK-NOT: and
; CHECK: tbnz w0, #0
; CHECK: tbz w0, #0

if.then:
call void @t()
Expand Down
16 changes: 16 additions & 0 deletions test/CodeGen/AMDGPU/branch-relaxation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,12 @@ loop:
; GCN-NEXT: ;;#ASMEND

; GCN-NEXT: [[BB3]]: ; %bb3
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: v_nop_e64
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: v_nop_e64
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_endpgm
define void @expand_requires_expand(i32 %cond0) #0 {
bb0:
Expand All @@ -356,6 +362,12 @@ bb2:
br label %bb3

bb3:
; These NOPs prevent tail-duplication-based outlining
; from firing, which defeats the need to expand the branches and this test.
call void asm sideeffect
"v_nop_e64", ""() #0
call void asm sideeffect
"v_nop_e64", ""() #0
ret void
}

Expand Down Expand Up @@ -385,6 +397,7 @@ bb3:

; GCN-NEXT: [[ENDIF]]: ; %endif
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
; GCN-NEXT: s_sleep 5
; GCN-NEXT: s_endpgm
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
entry:
Expand All @@ -402,6 +415,9 @@ if_uniform:
br label %endif

endif:
; layout can remove the split branch if it can copy the return block.
; This call makes the return block long enough that it doesn't get copied.
call void @llvm.amdgcn.s.sleep(i32 5);
ret void
}

Expand Down
5 changes: 4 additions & 1 deletion test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ bb5: ; preds = %bb3, %bb1
; OPT-NOT: call i1 @llvm.amdgcn.loop

; GCN-LABEL: {{^}}annotate_ret_noloop:
; GCN: s_cbranch_scc1
; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]]
; GCN: s_endpgm

; GCN: {{^}}[[BODY]]:
; GCN: s_endpgm
; GCN: .Lfunc_end1
define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
Expand Down
Loading

0 comments on commit ada6595

Please sign in to comment.