Skip to content

Commit

Permalink
Remove ARM isel hacks that fold large immediates into a pair of add, …
Browse files Browse the repository at this point in the history
…sub, and,

and xor. The 32-bit move immediates can be hoisted out of loops by machine
LICM but the isel hacks were preventing them.

Instead, let peephole optimization pass recognize registers that are defined by
immediates and the ARM target hook will fold the immediates in.

Other changes include 1) do not fold and / xor into cmp to isel TST / TEQ
instructions if there are multiple uses. This happens when the 'and' is live
out, machine sink would have sinked the computation and that ends up pessimizing
code. The peephole pass would recognize situations where the 'and' can be
toggled to define CPSR and eliminate the comparison anyway.

2) Move peephole pass to after machine LICM, sink, and CSE to avoid blocking
important optimizations.

rdar://8663787, rdar://8241368


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119548 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Evan Cheng committed Nov 17, 2010
1 parent f2dc4aa commit c4af463
Show file tree
Hide file tree
Showing 18 changed files with 269 additions and 55 deletions.
1 change: 1 addition & 0 deletions include/llvm/Target/Target.td
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ class Instruction {
bit isBranch = 0; // Is this instruction a branch instruction?
bit isIndirectBranch = 0; // Is this instruction an indirect branch?
bit isCompare = 0; // Is this instruction a comparison instruction?
bit isMoveImm = 0; // Is this instruction a move immediate instruction?
bit isBarrier = 0; // Can control flow fall through this instruction?
bit isCall = 0; // Is this instruction a call instruction?
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
Expand Down
11 changes: 9 additions & 2 deletions include/llvm/Target/TargetInstrDesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ namespace TID {
Terminator,
Branch,
IndirectBranch,
Predicable,
NotDuplicable,
Compare,
MoveImm,
DelaySlot,
FoldableAsLoad,
MayLoad,
MayStore,
Predicable,
NotDuplicable,
UnmodeledSideEffects,
Commutable,
ConvertibleTo3Addr,
Expand Down Expand Up @@ -352,6 +353,12 @@ class TargetInstrDesc {
return Flags & (1 << TID::Compare);
}

/// isMoveImmediate - Return true if this instruction is a move immediate
/// (including conditional moves) instruction.
bool isMoveImmediate() const {
return Flags & (1 << TID::MoveImm);
}

/// isNotDuplicable - Return true if this instruction cannot be safely
/// duplicated. For example, if the instruction has a unique labels attached
/// to it, duplicating it would cause multiple definition errors.
Expand Down
7 changes: 7 additions & 0 deletions include/llvm/Target/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,13 @@ class TargetInstrInfo {
const MachineRegisterInfo *MRI) const {
return false;
}

/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const {
return false;
}

/// getNumMicroOps - Return the number of u-operations the given machine
/// instruction will be decoded to on the target cpu.
Expand Down
4 changes: 3 additions & 1 deletion lib/CodeGen/LLVMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createDeadMachineInstructionElimPass());
printAndVerify(PM, "After codegen DCE pass");

PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");

PM.add(createPeepholeOptimizerPass());
printAndVerify(PM, "After codegen peephole optimization pass");
}

// Pre-ra tail duplication.
Expand Down
62 changes: 62 additions & 0 deletions lib/CodeGen/PeepholeOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;

Expand All @@ -56,6 +58,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),

STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumEliminated, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate foled");

namespace {
class PeepholeOptimizer : public MachineFunctionPass {
Expand Down Expand Up @@ -85,6 +88,12 @@ namespace {
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
}

Expand Down Expand Up @@ -257,6 +266,49 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
return false;
}

bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isMoveImmediate())
return false;
if (TID.getNumDefs() != 1)
return false;
unsigned Reg = MI->getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
ImmDefMIs.insert(std::make_pair(Reg, MI));
ImmDefRegs.insert(Reg);
return true;
}

return false;
}

/// FoldImmediate - Try folding register operands that are defined by move
/// immediate instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
assert(II != ImmDefMIs.end());
if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
++NumImmFold;
return true;
}
}
return false;
}

bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
Expand All @@ -269,9 +321,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;

SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;

bool SeenMoveImm = false;
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();

for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
Expand All @@ -283,8 +341,12 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {

if (MI->getDesc().isCompare()) {
Changed |= OptimizeCmpInstr(MI, MBB);
} else if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
if (SeenMoveImm)
Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
}
}
Expand Down
97 changes: 97 additions & 0 deletions lib/Target/ARM/ARMBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,103 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
return false;
}

bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
MachineInstr *DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
unsigned DefOpc = DefMI->getOpcode();
if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
return false;
if (!DefMI->getOperand(1).isImm())
// Could be t2MOVi32imm <ga:xx>
return false;

if (!MRI->hasOneNonDBGUse(Reg))
return false;

unsigned UseOpc = UseMI->getOpcode();
unsigned NewUseOpc;
uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
uint32_t SOImmValV1, SOImmValV2;
bool Commute = false;
switch (UseOpc) {
default: return false;
case ARM::SUBrr:
case ARM::ADDrr:
case ARM::ORRrr:
case ARM::EORrr:
case ARM::t2SUBrr:
case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
Commute = UseMI->getOperand(2).getReg() != Reg;
switch (UseOpc) {
default: break;
case ARM::SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::SUBri;
// Fallthrough
}
case ARM::ADDrr:
case ARM::ORRrr:
case ARM::EORrr: {
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
}
case ARM::t2SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::t2SUBri;
// Fallthrough
}
case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
}
break;
}
}
}
}

unsigned OpIdx = Commute ? 2 : 1;
unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
*UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
UseMI->setDesc(get(NewUseOpc));
UseMI->getOperand(1).setReg(NewReg);
UseMI->getOperand(1).setIsKill();
UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
DefMI->eraseFromParent();
return true;
}

unsigned
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const {
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/ARM/ARMBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ class ARMBaseInstrInfo : public TargetInstrInfoImpl {
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;

/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const;

virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const;

Expand Down
Loading

0 comments on commit c4af463

Please sign in to comment.