Skip to content

Commit

Permalink
[JumpThreading] Thread through guards
Browse files Browse the repository at this point in the history
Summary:
This patch allows JumpThreading also thread through guards.
Virtually, guard(cond) is equivalent to the following construction:

  if (cond) { do something } else {deoptimize}

Yet it is not explicitly converted into IFs before lowering.
This patch enables early threading through guards in simple cases.
Currently it covers the following situation:

  if (cond1) {
    // code A
  } else {
    // code B
  }
  // code C
  guard(cond2)
  // code D

If there is implication cond1 => cond2 or !cond1 => cond2, we can transform
this construction into the following:

  if (cond1) {
    // code A
    // code C
  } else {
    // code B
    // code C
    guard(cond2)
  }
  // code D

Thus, removing the guard from one of execution branches.

Patch by Max Kazantsev!

Reviewers: reames, apilipenko, igor-laevsky, anna, sanjoy

Reviewed By: sanjoy

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D29620

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294617 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
sanjoy committed Feb 9, 2017
1 parent 162e055 commit f206bf2
Show file tree
Hide file tree
Showing 6 changed files with 383 additions and 15 deletions.
5 changes: 5 additions & 0 deletions include/llvm/Transforms/Scalar/JumpThreading.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ValueHandle.h"

namespace llvm {
Expand Down Expand Up @@ -62,6 +63,7 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = false;
bool HasGuards = false;
#ifdef NDEBUG
SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
Expand Down Expand Up @@ -122,6 +124,9 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);

bool ProcessGuards(BasicBlock *BB);
bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI);

private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix);
Expand Down
10 changes: 10 additions & 0 deletions include/llvm/Transforms/Utils/Cloning.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,16 @@ Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
void remapInstructionsInBlocks(const SmallVectorImpl<BasicBlock *> &Blocks,
ValueToValueMapTy &VMap);

/// Split edge between BB and PredBB and duplicate all non-Phi instructions
/// from BB between its beginning and the StopAt instruction into the split
/// block. Phi nodes are not duplicated, but their uses are handled correctly:
/// we replace them with the uses of corresponding Phi inputs. ValueMapping
/// is used to map the original instructions from BB to their newly-created
/// copies. Returns the split block.
BasicBlock *
DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
ValueToValueMapTy &ValueMapping);
} // end namespace llvm

#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
167 changes: 152 additions & 15 deletions lib/Transforms/Scalar/JumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
Expand Down Expand Up @@ -169,6 +171,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// When profile data is available, we need to update edge weights after
// successful jump threading, which requires both BPI and BFI being available.
HasProfileData = HasProfileData_;
auto *GuardDecl = F.getParent()->getFunction(
Intrinsic::getName(Intrinsic::experimental_guard));
HasGuards = GuardDecl && !GuardDecl->use_empty();
if (HasProfileData) {
BPI = std::move(BPI_);
BFI = std::move(BFI_);
Expand Down Expand Up @@ -238,26 +243,31 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}

/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
/// thread across it. Stop scanning the block when passing the threshold.
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
Instruction *StopAt,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I(BB->getFirstNonPHI());

// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.

unsigned Bonus = 0;
const TerminatorInst *BBTerm = BB->getTerminator();
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to happen.
if (isa<SwitchInst>(BBTerm))
Bonus = 6;

// The same holds for indirect branches, but slightly more so.
if (isa<IndirectBrInst>(BBTerm))
Bonus = 8;
if (BB->getTerminator() == StopAt) {
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to
// happen.
if (isa<SwitchInst>(StopAt))
Bonus = 6;

// The same holds for indirect branches, but slightly more so.
if (isa<IndirectBrInst>(StopAt))
Bonus = 8;
}

// Bump the threshold up so the early exit from the loop doesn't skip the
// terminator-based Size adjustment at the end.
Expand All @@ -266,7 +276,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
for (; &*I != StopAt; ++I) {

// Stop scanning the block if we've reached the threshold.
if (Size > Threshold)
Expand Down Expand Up @@ -712,6 +722,10 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (TryToUnfoldSelectInCurrBB(BB))
return true;

// Look if we can propagate guards to predecessors.
if (HasGuards && ProcessGuards(BB))
return true;

// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;

Expand Down Expand Up @@ -1466,7 +1480,8 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}

unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
unsigned JumpThreadCost =
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
Expand Down Expand Up @@ -1754,7 +1769,8 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
return false;
}

unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
unsigned DuplicationCost =
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
Expand Down Expand Up @@ -2019,3 +2035,124 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {

return false;
}

/// Try to propagate a guard from the current BB into one of its predecessors
/// in case if another branch of execution implies that the condition of this
/// guard is always true. Currently we only process the simplest case that
/// looks like:
///
/// Start:
/// %cond = ...
/// br i1 %cond, label %T1, label %F1
/// T1:
/// br label %Merge
/// F1:
/// br label %Merge
/// Merge:
/// %condGuard = ...
/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
///
/// And cond either implies condGuard or !condGuard. In this case all the
/// instructions before the guard can be duplicated in both branches, and the
/// guard is then threaded to one of them.
bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
using namespace PatternMatch;
// We only want to deal with two predecessors.
BasicBlock *Pred1, *Pred2;
auto PI = pred_begin(BB), PE = pred_end(BB);
if (PI == PE)
return false;
Pred1 = *PI++;
if (PI == PE)
return false;
Pred2 = *PI++;
if (PI != PE)
return false;

// Try to thread one of the guards of the block.
// TODO: Look up deeper than to immediate predecessor?
if (auto Parent = Pred1->getSinglePredecessor())
if (Parent == Pred2->getSinglePredecessor())
if (BranchInst *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
return true;

return false;
}

/// Try to propagate the guard from BB which is the lower block of a diamond
/// to one of its branches, in case if diamond's condition implies guard's
/// condition.
bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
BranchInst *BI) {
Value *GuardCond = Guard->getArgOperand(0);
Value *BranchCond = BI->getCondition();
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);

auto &DL = BB->getModule()->getDataLayout();
bool TrueDestIsSafe = false;
bool FalseDestIsSafe = false;

// True dest is safe if BranchCond => GuardCond.
auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
if (Impl && *Impl)
TrueDestIsSafe = true;
else {
// False dest is safe if !BranchCond => GuardCond.
Impl =
isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
if (Impl && *Impl)
FalseDestIsSafe = true;
}

if (!TrueDestIsSafe && !FalseDestIsSafe)
return false;

BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;

ValueToValueMapTy UnguardedMapping, GuardedMapping;
Instruction *AfterGuard = Guard->getNextNode();
unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
if (Cost > BBDupThreshold)
return false;
// Duplicate all instructions before the guard and the guard itself to the
// branch where implication is not proved.
GuardedBlock = DuplicateInstructionsInSplitBetween(
BB, GuardedBlock, AfterGuard, GuardedMapping);
assert(GuardedBlock && "Could not create the guarded block?");
// Duplicate all instructions before the guard in the unguarded branch.
// Since we have successfully duplicated the guarded block and this block
// has fewer instructions, we expect it to succeed.
UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
Guard, UnguardedMapping);
assert(UnguardedBlock && "Could not create the unguarded block?");
DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
<< GuardedBlock->getName() << "\n");

// Some instructions before the guard may still have uses. For them, we need
// to create Phi nodes merging their copies in both guarded and unguarded
// branches. Those instructions that have no uses can be just removed.
SmallVector<Instruction *, 4> ToRemove;
for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
if (!isa<PHINode>(&*BI))
ToRemove.push_back(&*BI);

Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
assert(InsertionPoint && "Empty block?");
// Substitute with Phis & remove.
for (auto *Inst : reverse(ToRemove)) {
if (!Inst->use_empty()) {
PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
NewPN->insertBefore(InsertionPoint);
Inst->replaceAllUsesWith(NewPN);
}
Inst->eraseFromParent();
}
return true;
}
37 changes: 37 additions & 0 deletions lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,3 +747,40 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,

return NewLoop;
}

/// \brief Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
BasicBlock *
llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
ValueToValueMapTy &ValueMapping) {
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);

BasicBlock *NewBB = SplitEdge(PredBB, BB);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();

// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; StopAt != &*BI; ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
ValueMapping[&*BI] = New;

// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
auto I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}

return NewBB;
}
Loading

0 comments on commit f206bf2

Please sign in to comment.