Skip to content

Commit

Permalink
Revert r287553: [CodeGenPrep] Skip merging empty case blocks
Browse files Browse the repository at this point in the history
It results in assertions in lib/Analysis/BlockFrequencyInfoImpl.cpp line
670 ("Expected irreducible CFG").


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288052 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
jsonn committed Nov 28, 2016
1 parent 2e72c7b commit 46cc792
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 291 deletions.
167 changes: 32 additions & 135 deletions lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
Expand Down Expand Up @@ -126,11 +124,6 @@ static cl::opt<bool> ProfileGuidedSectionPrefix(
"profile-guided-section-prefix", cl::Hidden, cl::init(true),
cl::desc("Use profile info to add section prefix for hot/cold functions"));

static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
"(frequency of destination block) is greater than this ratio"));

namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
Expand All @@ -143,8 +136,6 @@ class TypePromotionTransaction;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;

/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
Expand Down Expand Up @@ -191,11 +182,8 @@ class TypePromotionTransaction;
private:
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
bool optimizeInst(Instruction *I, bool& ModifiedDT);
bool optimizeMemoryInst(Instruction *I, Value *Addr,
Expand Down Expand Up @@ -243,8 +231,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
BFI.reset();
BPI.reset();

ModifiedDT = false;
if (TM)
Expand Down Expand Up @@ -397,38 +383,6 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
return Changed;
}

/// Find a destination block from BB if BB is mergeable empty block.
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isUnconditional())
return nullptr;

// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
BasicBlock::iterator BBI = BI->getIterator();
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
if (BBI == BB->begin())
break;
--BBI;
}
if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
return nullptr;
}

// Do not break infinite loops.
BasicBlock *DestBB = BI->getSuccessor(0);
if (DestBB == BB)
return nullptr;

if (!canMergeBlocks(BB, DestBB))
DestBB = nullptr;

return DestBB;
}

/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
Expand All @@ -447,103 +401,46 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
BasicBlock *BB = &*I++;
BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
if (!DestBB ||
!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
continue;

eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}

bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BasicBlock *DestBB,
bool isPreheader) {
// Do not delete loop preheaders if doing so would create a critical edge.
// Loop preheaders can be good locations to spill registers. If the
// preheader is deleted and we create a critical edge, registers may be
// spilled in the loop body instead.
if (!DisablePreheaderProtect && isPreheader &&
!(BB->getSinglePredecessor() &&
BB->getSinglePredecessor()->getSingleSuccessor()))
return false;

// Try to skip merging if the unique predecessor of BB is terminated by a
// switch or indirect branch instruction, and BB is used as an incoming block
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
// add COPY instructions in the predecessor of BB instead of BB (if it is not
// merged). Note that the critical edge created by merging such blocks wont be
// split in MachineSink because the jump table is not analyzable. By keeping
// such empty block (BB), ISel will place COPY instructions in BB, not in the
// predecessor of BB.
BasicBlock *Pred = BB->getUniquePredecessor();
if (!Pred ||
!(isa<SwitchInst>(Pred->getTerminator()) ||
isa<IndirectBrInst>(Pred->getTerminator())))
return true;

if (BB->getTerminator() != BB->getFirstNonPHI())
return true;

// We use a simple cost heuristic which determine skipping merging is
// profitable if the cost of skipping merging is less than the cost of
// merging : Cost(skipping merging) < Cost(merging BB), where the
// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
// the Cost(merging BB) is Freq(Pred) * Cost(Copy).
// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
// Freq(Pred) / Freq(BB) > 2.
// Note that if there are multiple empty blocks sharing the same incoming
// value for the PHIs in the DestBB, we consider them together. In such
// case, Cost(merging BB) will be the sum of their frequencies.

if (!isa<PHINode>(DestBB->begin()))
return true;

if (!BFI) {
BPI.reset(new BranchProbabilityInfo(*BB->getParent(), *LI));
BFI.reset(new BlockFrequencyInfo(*BB->getParent(), *BPI, *LI));
}

BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;

// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
++PI) {
BasicBlock *DestBBPred = *PI;
if (DestBBPred == BB)
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isUnconditional())
continue;

bool HasAllSameValue = true;
BasicBlock::const_iterator DestBBI = DestBB->begin();
while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
if (DestPN->getIncomingValueForBlock(BB) !=
DestPN->getIncomingValueForBlock(DestBBPred)) {
HasAllSameValue = false;
break;
// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
BasicBlock::iterator BBI = BI->getIterator();
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
if (BBI == BB->begin())
break;
--BBI;
}
if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
continue;
}
if (HasAllSameValue)
SameIncomingValueBBs.insert(DestBBPred);
}

// See if all BB's incoming values are same as the value from Pred. In this
// case, no reason to skip merging because COPYs are expected to be place in
// Pred already.
if (SameIncomingValueBBs.count(Pred))
return true;
// Do not break infinite loops.
BasicBlock *DestBB = BI->getSuccessor(0);
if (DestBB == BB)
continue;

for (auto SameValueBB : SameIncomingValueBBs)
if (SameValueBB->getUniquePredecessor() == Pred &&
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
BBFreq += BFI->getBlockFreq(SameValueBB);
if (!canMergeBlocks(BB, DestBB))
continue;

return PredFreq.getFrequency() <=
BBFreq.getFrequency() * FreqRatioToSkipMerge;
// Do not delete loop preheaders if doing so would create a critical edge.
// Loop preheaders can be good locations to spill registers. If the
// preheader is deleted and we create a critical edge, registers may be
// spilled in the loop body instead.
if (!DisablePreheaderProtect && Preheaders.count(BB) &&
!(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
continue;

eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}

/// Return true if we can merge BB into DestBB if there is a single
Expand Down
3 changes: 1 addition & 2 deletions test/CodeGen/X86/phi-immediate-factoring.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats -cgp-freq-ratio-to-skip-merge=10 2>&1 | grep "Number of blocks eliminated" | grep 6
; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
; PR1296

Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/X86/ragreedy-hoist-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ for.cond357:
br label %for.cond357

sw.bb474:
; CHECK: sw.bb474
; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, sw.bb474 is still cold.
; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: land.rhs485
%cmp476 = icmp eq i8 undef, 0
br i1 %cmp476, label %if.end517, label %do.body479.preheader

do.body479.preheader:
; CHECK: do.body479.preheader
; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold.
; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: land.rhs485
%cmp4833314 = icmp eq i8 undef, 0
br i1 %cmp4833314, label %if.end517, label %land.rhs485

Expand Down
6 changes: 3 additions & 3 deletions test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ return:
; ARM64-LABEL: @widen_switch_i16(
; ARM64: %0 = zext i16 %trunc to i32
; ARM64-NEXT: switch i32 %0, label %sw.default [
; ARM64-NEXT: i32 1, label %sw.bb0
; ARM64-NEXT: i32 1, label %return
; ARM64-NEXT: i32 65535, label %sw.bb1
}

Expand Down Expand Up @@ -58,7 +58,7 @@ return:
; ARM64-LABEL: @widen_switch_i17(
; ARM64: %0 = zext i17 %trunc to i32
; ARM64-NEXT: switch i32 %0, label %sw.default [
; ARM64-NEXT: i32 10, label %sw.bb0
; ARM64-NEXT: i32 10, label %return
; ARM64-NEXT: i32 131071, label %sw.bb1
}

Expand Down Expand Up @@ -89,7 +89,7 @@ return:
; ARM64-LABEL: @widen_switch_i16_sext(
; ARM64: %0 = sext i2 %a to i32
; ARM64-NEXT: switch i32 %0, label %sw.default [
; ARM64-NEXT: i32 1, label %sw.bb0
; ARM64-NEXT: i32 1, label %return
; ARM64-NEXT: i32 -1, label %sw.bb1
}

6 changes: 3 additions & 3 deletions test/Transforms/CodeGenPrepare/X86/widen_switch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ return:
; X86-LABEL: @widen_switch_i16(
; X86: %trunc = trunc i32 %a to i16
; X86-NEXT: switch i16 %trunc, label %sw.default [
; X86-NEXT: i16 1, label %sw.bb0
; X86-NEXT: i16 1, label %return
; X86-NEXT: i16 -1, label %sw.bb1
}

Expand Down Expand Up @@ -58,7 +58,7 @@ return:
; X86-LABEL: @widen_switch_i17(
; X86: %0 = zext i17 %trunc to i32
; X86-NEXT: switch i32 %0, label %sw.default [
; X86-NEXT: i32 10, label %sw.bb0
; X86-NEXT: i32 10, label %return
; X86-NEXT: i32 131071, label %sw.bb1
}

Expand Down Expand Up @@ -89,7 +89,7 @@ return:
; X86-LABEL: @widen_switch_i16_sext(
; X86: %0 = sext i2 %a to i8
; X86-NEXT: switch i8 %0, label %sw.default [
; X86-NEXT: i8 1, label %sw.bb0
; X86-NEXT: i8 1, label %return
; X86-NEXT: i8 -1, label %sw.bb1
}

Loading

0 comments on commit 46cc792

Please sign in to comment.