Skip to content

Commit

Permalink
R600: Add new functions for splitting vector loads and stores.
Browse files Browse the repository at this point in the history
These will be used in future patches and shouldn't change anything yet.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213877 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Jul 24, 2014
1 parent 519212b commit cb1ac70
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 26 deletions.
149 changes: 129 additions & 20 deletions lib/Target/R600/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1011,28 +1011,32 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
return SDValue();
}

SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT MemVT = Load->getMemoryVT();
EVT MemEltVT = MemVT.getVectorElementType();

EVT LoadVT = Op.getValueType();
EVT EltVT = Op.getValueType().getVectorElementType();
EVT EltVT = LoadVT.getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();

unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
SmallVector<SDValue, 8> Loads;
SmallVector<SDValue, 8> Chains;

SDLoc SL(Op);
unsigned MemEltSize = MemEltVT.getStoreSize();
MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());

for (unsigned i = 0, e = NumElts; i != e; ++i) {
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
DAG.getConstant(i * MemEltSize, PtrVT));

SDValue NewLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
Load->getChain(), Ptr,
MachinePointerInfo(Load->getMemOperand()->getValue()),
SrcValue.getWithOffset(i * MemEltSize),
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment());
Loads.push_back(NewLoad.getValue(0));
Expand All @@ -1047,6 +1051,55 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
return DAG.getMergeValues(Ops, SL);
}

SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();

// If this is a 2 element vector, we really want to scalarize and not create
// weird 1 element vectors.
if (VT.getVectorNumElements() == 2)
return ScalarizeVectorLoad(Op, DAG);

LoadSDNode *Load = cast<LoadSDNode>(Op);
SDValue BasePtr = Load->getBasePtr();
EVT PtrVT = BasePtr.getValueType();
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());

EVT LoVT, HiVT;
EVT LoMemVT, HiMemVT;
SDValue Lo, Hi;

std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
SDValue LoLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr,
SrcValue,
LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment());

SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));

SDValue HiLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
Load->getChain(), HiPtr,
SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment());

SDValue Ops[] = {
DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
LoLoad.getValue(1), HiLoad.getValue(1))
};

return DAG.getMergeValues(Ops, SL);
}

SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
Expand Down Expand Up @@ -1105,8 +1158,8 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
Store->getAlignment());
}

SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SelectionDAG &DAG) const {
SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
EVT EltVT = Store->getValue().getValueType().getVectorElementType();
Expand All @@ -1116,21 +1169,77 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,

SmallVector<SDValue, 8> Chains;

unsigned EltSize = MemEltVT.getStoreSize();
MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());

for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Store->getValue(), DAG.getConstant(i, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
Store->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
PtrVT));
Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
MachinePointerInfo(Store->getMemOperand()->getValue()),
MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Store->getAlignment()));
Store->getValue(),
DAG.getConstant(i, MVT::i32));

SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
SDValue NewStore =
DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
SrcValue.getWithOffset(i * EltSize),
MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
Store->getAlignment());
Chains.push_back(NewStore);
}

return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
}

SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
SDValue Val = Store->getValue();
EVT VT = Val.getValueType();

// If this is a 2 element vector, we really want to scalarize and not create
// weird 1 element vectors.
if (VT.getVectorNumElements() == 2)
return ScalarizeVectorStore(Op, DAG);

EVT MemVT = Store->getMemoryVT();
SDValue Chain = Store->getChain();
SDValue BasePtr = Store->getBasePtr();
SDLoc SL(Op);

EVT LoVT, HiVT;
EVT LoMemVT, HiMemVT;
SDValue Lo, Hi;

std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);

EVT PtrVT = BasePtr.getValueType();
SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));

MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
SDValue LoStore
= DAG.getTruncStore(Chain, SL, Lo,
BasePtr,
SrcValue,
LoMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
Store->getAlignment());
SDValue HiStore
= DAG.getTruncStore(Chain, SL, Hi,
HiPtr,
SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
Store->getAlignment());

return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
}


SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
Expand Down Expand Up @@ -1227,7 +1336,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
return SplitVectorStore(Op, DAG);
return ScalarizeVectorStore(Op, DAG);
}

EVT MemVT = Store->getMemoryVT();
Expand Down
14 changes: 12 additions & 2 deletions lib/Target/R600/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,19 @@ class AMDGPUTargetLowering : public TargetLowering {

virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
/// \brief Split a vector load into multiple scalar loads.
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;

/// \brief Split a vector load into a scalar load of each component.
SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;

/// \brief Split a vector load into 2 loads of half the vector.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;

/// \brief Split a vector store into a scalar store of each component.
SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const;

/// \brief Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
Expand Down
2 changes: 1 addition & 1 deletion lib/Target/R600/R600ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1543,7 +1543,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const

if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
ScalarizeVectorLoad(Op, DAG),
Chain
};
return DAG.getMergeValues(MergedValues, DL);
Expand Down
6 changes: 3 additions & 3 deletions lib/Target/R600/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
break;
// fall-through
case AMDGPUAS::LOCAL_ADDRESS:
return SplitVectorLoad(Op, DAG);
return ScalarizeVectorLoad(Op, DAG);
}
}

Expand Down Expand Up @@ -1073,7 +1073,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return SplitVectorStore(Op, DAG);
return ScalarizeVectorStore(Op, DAG);
return SDValue();
}

Expand All @@ -1082,7 +1082,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Ret;

if (VT.isVector() && VT.getVectorNumElements() >= 8)
return SplitVectorStore(Op, DAG);
return ScalarizeVectorStore(Op, DAG);

if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
Expand Down

0 comments on commit cb1ac70

Please sign in to comment.