Skip to content

Commit

Permalink
Implement AArch64 post-index vector load/store multiple N-element str…
Browse files Browse the repository at this point in the history
…ucture class SIMD(lselem-post).

Including following 14 instructions:
4 ld1 insts: post-index load multiple 1-element structure to sequential 1/2/3/4 registers.
ld2/ld3/ld4: post-index load multiple N-element structure to sequential N registers (N=2,3,4).
4 st1 insts: post-index store multiple 1-element structure from sequential 1/2/3/4 registers.
st2/st3/st4: post-index store multiple N-element structure from sequential N registers (N = 2,3,4).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194043 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Hao Liu authored and Hao Liu committed Nov 5, 2013
1 parent 8263dcd commit 591c2f7
Show file tree
Hide file tree
Showing 11 changed files with 1,425 additions and 15 deletions.
204 changes: 191 additions & 13 deletions lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,12 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
SDNode* Select(SDNode*);
private:
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode);
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
const uint16_t *Opcode);

/// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes);
SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
const uint16_t *Opcodes);

// Form pairs of consecutive 64-bit/128-bit registers.
SDNode *createDPairNode(SDValue V0, SDValue V1);
Expand Down Expand Up @@ -485,7 +487,88 @@ SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
Ops);
}

// Get the register stride update opcode of a VLD/VST instruction that
// is otherwise equivalent to the given fixed stride updating instruction.
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
switch (Opc) {
default: break;
case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;

case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;

case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;

case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;

case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;

case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;

case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;

case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
}
return Opc; // If not one we handle, return it unchanged.
}

SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
bool isUpdating,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");

Expand All @@ -510,7 +593,16 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned Opc = Opcodes[OpcodeIndex];

SmallVector<SDValue, 2> Ops;
Ops.push_back(N->getOperand(2)); // Push back the Memory Address
unsigned AddrOpIdx = isUpdating ? 1 : 2;
Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address

if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
Opc = getVLDSTRegisterUpdateOpcode(Opc);
Ops.push_back(Inc);
}

Ops.push_back(N->getOperand(0)); // Push back the Chain

std::vector<EVT> ResTys;
Expand All @@ -526,6 +618,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
ResTys.push_back(ResTy);
}

if (isUpdating)
ResTys.push_back(MVT::i64); // Type of the updated register
ResTys.push_back(MVT::Other); // Type of the Chain
SDLoc dl(N);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
Expand All @@ -548,19 +642,23 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
// Update users of the Chain
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));

return NULL;
}

SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
bool isUpdating,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();

unsigned Vec0Idx = 3;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
EVT VT = N->getOperand(Vec0Idx).getValueType();
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
Expand All @@ -582,11 +680,19 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned Opc = Opcodes[OpcodeIndex];

std::vector<EVT> ResTys;
if (isUpdating)
ResTys.push_back(MVT::i64);
ResTys.push_back(MVT::Other); // Type for the Chain

SmallVector<SDValue, 6> Ops;
Ops.push_back(N->getOperand(2)); // Push back the Memory Address
Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address

if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
Opc = getVLDSTRegisterUpdateOpcode(Opc);
Ops.push_back(Inc);
}
bool is64BitVector = VT.is64BitVector();

SDValue V0 = N->getOperand(Vec0Idx + 0);
Expand Down Expand Up @@ -768,6 +874,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode;
break;
}
case AArch64ISD::NEON_LD1_UPD: {
static const uint16_t Opcodes[] = {
AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
};
return SelectVLD(Node, 1, true, Opcodes);
}
case AArch64ISD::NEON_LD2_UPD: {
static const uint16_t Opcodes[] = {
AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
AArch64::LD2WB_2S_fixed, AArch64::LD1WB2V_1D_fixed,
AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
};
return SelectVLD(Node, 2, true, Opcodes);
}
case AArch64ISD::NEON_LD3_UPD: {
static const uint16_t Opcodes[] = {
AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
AArch64::LD3WB_2S_fixed, AArch64::LD1WB3V_1D_fixed,
AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
};
return SelectVLD(Node, 3, true, Opcodes);
}
case AArch64ISD::NEON_LD4_UPD: {
static const uint16_t Opcodes[] = {
AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
AArch64::LD4WB_2S_fixed, AArch64::LD1WB4V_1D_fixed,
AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
};
return SelectVLD(Node, 4, true, Opcodes);
}
case AArch64ISD::NEON_ST1_UPD: {
static const uint16_t Opcodes[] = {
AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
};
return SelectVST(Node, 1, true, Opcodes);
}
case AArch64ISD::NEON_ST2_UPD: {
static const uint16_t Opcodes[] = {
AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
AArch64::ST2WB_2S_fixed, AArch64::ST1WB2V_1D_fixed,
AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
};
return SelectVST(Node, 2, true, Opcodes);
}
case AArch64ISD::NEON_ST3_UPD: {
static const uint16_t Opcodes[] = {
AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
AArch64::ST3WB_2S_fixed, AArch64::ST1WB3V_1D_fixed,
AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
};
return SelectVST(Node, 3, true, Opcodes);
}
case AArch64ISD::NEON_ST4_UPD: {
static const uint16_t Opcodes[] = {
AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
AArch64::ST4WB_2S_fixed, AArch64::ST1WB4V_1D_fixed,
AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
};
return SelectVST(Node, 4, true, Opcodes);
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
Expand All @@ -780,56 +958,56 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1_2S, AArch64::LD1_1D,
AArch64::LD1_16B, AArch64::LD1_8H,
AArch64::LD1_4S, AArch64::LD1_2D };
return SelectVLD(Node, 1, Opcodes);
return SelectVLD(Node, 1, false, Opcodes);
}
case Intrinsic::arm_neon_vld2: {
static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H,
AArch64::LD2_2S, AArch64::LD1_2V_1D,
AArch64::LD2_16B, AArch64::LD2_8H,
AArch64::LD2_4S, AArch64::LD2_2D };
return SelectVLD(Node, 2, Opcodes);
return SelectVLD(Node, 2, false, Opcodes);
}
case Intrinsic::arm_neon_vld3: {
static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H,
AArch64::LD3_2S, AArch64::LD1_3V_1D,
AArch64::LD3_16B, AArch64::LD3_8H,
AArch64::LD3_4S, AArch64::LD3_2D };
return SelectVLD(Node, 3, Opcodes);
return SelectVLD(Node, 3, false, Opcodes);
}
case Intrinsic::arm_neon_vld4: {
static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H,
AArch64::LD4_2S, AArch64::LD1_4V_1D,
AArch64::LD4_16B, AArch64::LD4_8H,
AArch64::LD4_4S, AArch64::LD4_2D };
return SelectVLD(Node, 4, Opcodes);
return SelectVLD(Node, 4, false, Opcodes);
}
case Intrinsic::arm_neon_vst1: {
static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H,
AArch64::ST1_2S, AArch64::ST1_1D,
AArch64::ST1_16B, AArch64::ST1_8H,
AArch64::ST1_4S, AArch64::ST1_2D };
return SelectVST(Node, 1, Opcodes);
return SelectVST(Node, 1, false, Opcodes);
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H,
AArch64::ST2_2S, AArch64::ST1_2V_1D,
AArch64::ST2_16B, AArch64::ST2_8H,
AArch64::ST2_4S, AArch64::ST2_2D };
return SelectVST(Node, 2, Opcodes);
return SelectVST(Node, 2, false, Opcodes);
}
case Intrinsic::arm_neon_vst3: {
static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H,
AArch64::ST3_2S, AArch64::ST1_3V_1D,
AArch64::ST3_16B, AArch64::ST3_8H,
AArch64::ST3_4S, AArch64::ST3_2D };
return SelectVST(Node, 3, Opcodes);
return SelectVST(Node, 3, false, Opcodes);
}
case Intrinsic::arm_neon_vst4: {
static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H,
AArch64::ST4_2S, AArch64::ST1_4V_1D,
AArch64::ST4_16B, AArch64::ST4_8H,
AArch64::ST4_4S, AArch64::ST4_2D };
return SelectVST(Node, 4, Opcodes);
return SelectVST(Node, 4, false, Opcodes);
}
}
break;
Expand Down
Loading

0 comments on commit 591c2f7

Please sign in to comment.