Skip to content

Commit

Permalink
[MIR] Add simple PRE pass to MachineCSE
Browse files Browse the repository at this point in the history
This is the second part of the commit fixing PR38917 (hoisting
partitially redundant machine instruction). Most of PRE (partitial
redundancy elimination) and CSE work is done on LLVM IR, but some of
redundancy arises during DAG legalization. Machine CSE is not enough
to deal with it. This simple PRE implementation works a little bit
intricately: it passes before CSE, looking for partitial redundancy
and transforming it to fully redundancy, anticipating that the next
CSE step will eliminate this created redundancy. If CSE doesn't
eliminate this, than created instruction will remain dead and eliminated
later by Remove Dead Machine Instructions pass.

The third part of the commit is supposed to refactor MachineCSE,
to make it more clear and to merge MachinePRE with MachineCSE,
so one need no rely on further Remove Dead pass to clear instrs
not eliminated by CSE.

First step: https://reviews.llvm.org/D54839

Fixes llvm.org/PR38917

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361356 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
anton-afanasyev committed May 22, 2019
1 parent 41c36f8 commit ae84d50
Show file tree
Hide file tree
Showing 9 changed files with 1,511 additions and 2,084 deletions.
122 changes: 113 additions & 9 deletions lib/CodeGen/MachineCSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
Expand Down Expand Up @@ -49,6 +50,8 @@ using namespace llvm;

STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPREs, "Number of partial redundant expression"
" transformed to fully redundant");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
STATISTIC(NumCrossBBCSEs,
Expand Down Expand Up @@ -84,6 +87,7 @@ namespace {

void releaseMemory() override {
ScopeMap.clear();
PREMap.clear();
Exps.clear();
}

Expand All @@ -98,6 +102,7 @@ namespace {

unsigned LookAheadLimit = 0;
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> PREMap;
ScopedHTType VNT;
SmallVector<MachineInstr *, 64> Exps;
unsigned CurrVN = 0;
Expand All @@ -116,13 +121,17 @@ namespace {
PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
bool ProcessBlock(MachineBasicBlock *MBB);
bool ProcessBlockCSE(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);

bool isPRECandidate(MachineInstr *MI);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
};

} // end anonymous namespace
Expand Down Expand Up @@ -405,9 +414,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
}

/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
/// common expression that defines Reg.
/// common expression that defines Reg. CSBB is basic block where CSReg is
/// defined.
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.

// If CSReg is used at all uses of Reg, CSE should not increase register
Expand All @@ -433,7 +443,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
if (TII->isAsCheapAsAMove(*MI)) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
Expand Down Expand Up @@ -488,7 +497,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
ScopeMap.erase(SI);
}

bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
bool Changed = false;

SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
Expand Down Expand Up @@ -598,7 +607,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");

if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
Expand Down Expand Up @@ -738,14 +747,106 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
Changed |= ProcessBlock(MBB);
Changed |= ProcessBlockCSE(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
ExitScopeIfDone(Node, OpenChildren);
}

return Changed;
}

// We use stronger checks for PRE candidate rather than for CSE ones to embrace
// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
// to exclude instrs created by PRE that won't be CSEed later.
bool MachineCSE::isPRECandidate(MachineInstr *MI) {
if (!isCSECandidate(MI) ||
MI->isNotDuplicable() ||
MI->isAsCheapAsAMove() ||
MI->getNumDefs() != 1 ||
MI->getNumExplicitDefs() != 1)
return false;

for (auto def: MI->defs())
if (!TRI->isVirtualRegister(def.getReg()))
return false;

for (auto use: MI->uses())
if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
return false;

return true;
}

bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;

if (!isPRECandidate(MI))
continue;

if (!PREMap.count(MI)) {
PREMap[MI] = MBB;
continue;
}

auto MBB1 = PREMap[MI];
assert(!DT->properlyDominates(MBB, MBB1) &&
"MBB cannot properly dominate MBB1 while DFS through dominators tree!");
auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);

// Two instrs are partial redundant if their basic blocks are reachable
// from one to another but one doesn't dominate another.
if (CMBB != MBB1) {
auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
if (BB != nullptr && BB1 != nullptr &&
(isPotentiallyReachable(BB1, BB) ||
isPotentiallyReachable(BB, BB1))) {

assert(MI->getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
unsigned VReg = MI->getOperand(0).getReg();
unsigned NewReg = MRI->cloneVirtualRegister(VReg);
if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
continue;
MachineInstr &NewMI = TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
NewMI.getOperand(0).setReg(NewReg);

PREMap[MI] = CMBB;
++NumPREs;
Changed = true;
}
}
}
return Changed;
}

// This simple PRE (partial redundancy elimination) pass doesn't actually
// eliminate partial redundancy but transforms it to full redundancy,
// anticipating that the next CSE step will eliminate this created redundancy.
// If CSE doesn't eliminate this, than created instruction will remain dead
// and eliminated later by Remove Dead Machine Instructions pass.
bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
SmallVector<MachineDomTreeNode*, 32> BBs;

PREMap.clear();
bool Changed = false;
BBs.push_back(DT->getRootNode());
do {
auto Node = BBs.pop_back_val();
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
for (MachineDomTreeNode *Child : Children)
BBs.push_back(Child);

MachineBasicBlock *MBB = Node->getBlock();
Changed |= ProcessBlockPRE(DT, MBB);

} while (!BBs.empty());

return Changed;
}

bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand All @@ -756,5 +857,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
return PerformCSE(DT->getRootNode());
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);
ChangedCSE = PerformCSE(DT->getRootNode());
return ChangedPRE || ChangedCSE;
}
3 changes: 1 addition & 2 deletions test/CodeGen/Mips/internalfunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ if.then: ; preds = %entry
if.end: ; preds = %entry, %if.then
; CHECK: lw $[[R2:[0-9]+]], %got(sf2)
; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R1]])
%tobool3 = icmp ne i32 %a0, 0
%tmp4 = load void (...)*, void (...)** @gf1, align 4
%cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
Expand Down
Loading

0 comments on commit ae84d50

Please sign in to comment.