Skip to content

Commit

Permalink
[FastISel][X86] Add MachineMemOperand to load/store instructions.
Browse files Browse the repository at this point in the history
This commit adds MachineMemOperands to load and store instructions. This allows
the peephole optimizer to fold load instructions. Unfortunatelly the peephole
optimizer currently doesn't run at -O0.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210858 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
ributzka committed Jun 12, 2014
1 parent cccc317 commit 4eddf94
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 39 deletions.
3 changes: 3 additions & 0 deletions include/llvm/CodeGen/FastISel.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ class FastISel {
/// Test whether the given value has exactly one use.
bool hasTrivialKill(const Value *V) const;

/// \brief Create a machine mem operand from the given instruction.
MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;

private:
bool SelectBinaryOp(const User *I, unsigned ISDOpcode);

Expand Down
44 changes: 44 additions & 0 deletions lib/CodeGen/SelectionDAG/FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1737,3 +1737,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
}

MachineMemOperand *
FastISel::createMachineMemOperandFor(const Instruction *I) const {
const Value *Ptr;
Type *ValTy;
unsigned Alignment;
unsigned Flags;
bool IsVolatile;

if (const auto *LI = dyn_cast<LoadInst>(I)) {
Alignment = LI->getAlignment();
IsVolatile = LI->isVolatile();
Flags = MachineMemOperand::MOLoad;
Ptr = LI->getPointerOperand();
ValTy = LI->getType();
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
Alignment = SI->getAlignment();
IsVolatile = SI->isVolatile();
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
ValTy = SI->getValueOperand()->getType();
} else {
return nullptr;
}

bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);

if (Alignment == 0) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlignment(ValTy);

unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);

if (IsVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (IsNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
if (IsInvariant)
Flags |= MachineMemOperand::MOInvariant;

return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
Alignment, TBAAInfo, Ranges);
}
107 changes: 68 additions & 39 deletions lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,14 @@ class X86FastISel final : public FastISel {
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);

bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
unsigned &ResultReg);

bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
bool Aligned = false);
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
const X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);

bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
Expand Down Expand Up @@ -180,7 +182,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
unsigned &ResultReg) {
MachineMemOperand *MMO, unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
Expand Down Expand Up @@ -228,18 +230,21 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}

ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc), ResultReg), AM);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
addFullAddress(MIB, AM);
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}

/// X86FastEmitStore - Emit a machine instruction to store a value Val of
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
const X86AddressMode &AM, bool Aligned) {
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
const X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
Expand All @@ -249,7 +254,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
TII.get(X86::AND8ri), AndResult)
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
Expand Down Expand Up @@ -288,13 +294,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
break;
}

addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);

return true;
}

bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
const X86AddressMode &AM, bool Aligned) {
const X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
Expand All @@ -317,10 +328,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}

if (Opc) {
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc)), AM)
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
CI->getZExtValue());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
: CI->getZExtValue());
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
}
Expand All @@ -329,7 +342,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
if (ValReg == 0)
return false;

return X86FastEmitStore(VT, ValReg, AM, Aligned);
bool ValKill = hasTrivialKill(Val);
return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
}

/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
Expand Down Expand Up @@ -740,19 +754,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;

unsigned SABIAlignment =
DL.getABITypeAlignment(S->getValueOperand()->getType());
bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
const Value *Val = S->getValueOperand();
const Value *Ptr = S->getPointerOperand();

MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
return false;

unsigned Alignment = S->getAlignment();
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = ABIAlignment;
bool Aligned = Alignment >= ABIAlignment;

X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(1), AM))
if (!X86SelectAddress(Ptr, AM))
return false;

return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
}

/// X86SelectRet - Select and emit code to implement ret instructions.
Expand Down Expand Up @@ -887,25 +906,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {

/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
bool X86FastISel::X86SelectLoad(const Instruction *I) {
const LoadInst *LI = cast<LoadInst>(I);

// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
if (LI->isAtomic())
return false;

MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
return false;

const Value *Ptr = LI->getPointerOperand();

X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(0), AM))
if (!X86SelectAddress(Ptr, AM))
return false;

unsigned ResultReg = 0;
if (X86FastEmitLoad(VT, AM, ResultReg)) {
UpdateValueMap(I, ResultReg);
return true;
}
return false;
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
return false;

UpdateValueMap(I, ResultReg);
return true;
}

static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
Expand Down Expand Up @@ -1624,8 +1647,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
}

unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
RV &= X86FastEmitStore(VT, Reg, DestAM);
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
assert(RV && "Failed to emit load or store??");

unsigned Size = VT.getSizeInBits()/8;
Expand Down Expand Up @@ -2322,7 +2345,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
return false;
} else {
if (!X86FastEmitStore(ArgVT, Arg, AM))
if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
return false;
}
}
Expand Down Expand Up @@ -2719,22 +2742,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {

bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
const Value *Ptr = LI->getPointerOperand();
X86AddressMode AM;
if (!X86SelectAddress(LI->getOperand(0), AM))
if (!X86SelectAddress(Ptr, AM))
return false;

const X86InstrInfo &XII = (const X86InstrInfo&)TII;

unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();

if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = DL.getABITypeAlignment(LI->getType());

SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);

MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
if (!Result) return false;
if (!Result)
return false;

Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
Expand Down
12 changes: 12 additions & 0 deletions test/CodeGen/X86/fast-isel-fold-mem.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s

define i64 @fold_load(i64* %a, i64 %b) {
; CHECK-LABEL: fold_load
; CHECK: addq (%rdi), %rsi
; CHECK-NEXT: movq %rsi, %rax
%1 = load i64* %a, align 8
%2 = add i64 %1, %b
ret i64 %2
}

0 comments on commit 4eddf94

Please sign in to comment.