Skip to content

Commit

Permalink
Implement AArch64 Neon instruction set Bitwise Extract.
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194118 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Jiangning Liu committed Nov 6, 2013
1 parent 10bb82e commit 2581152
Show file tree
Hide file tree
Showing 10 changed files with 456 additions and 100 deletions.
199 changes: 113 additions & 86 deletions lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_ST3_UPD";
case AArch64ISD::NEON_ST4_UPD:
return "AArch64ISD::NEON_ST4_UPD";
case AArch64ISD::NEON_VEXTRACT:
return "AArch64ISD::NEON_VEXTRACT";
default:
return NULL;
}
Expand Down Expand Up @@ -3797,7 +3799,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

SDValue
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
Expand All @@ -3811,101 +3813,126 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
ArrayRef<int> ShuffleMask = SVN->getMask();

unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize <= 64) {
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;

// Test if V1 is a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
}
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::BUILD_VECTOR) {
bool IsScalarToVector = true;
for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
i != (unsigned)Lane) {
IsScalarToVector = false;
break;
}
if (IsScalarToVector)
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
V1.getOperand(Lane));
}
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i64));
}
// For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
// by element from V2 to V1 .
// If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
// better choice to be inserted than V1 as less insert needed, so we count
// element to be inserted for both V1 and V2, and select less one as insert
// target.

// Collect elements need to be inserted and their index.
SmallVector<int, 8> NV1Elt;
SmallVector<int, 8> N1Index;
SmallVector<int, 8> NV2Elt;
SmallVector<int, 8> N2Index;
int Length = ShuffleMask.size();
int V1EltNum = V1.getValueType().getVectorNumElements();
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != I) {
NV1Elt.push_back(ShuffleMask[I]);
N1Index.push_back(I);
}
if (EltSize > 64)
return SDValue();

// If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;

// Test if V1 is a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
}
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != (I + V1EltNum)) {
NV2Elt.push_back(ShuffleMask[I]);
N2Index.push_back(I);
}
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::BUILD_VECTOR) {
bool IsScalarToVector = true;
for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
i != (unsigned)Lane) {
IsScalarToVector = false;
break;
}
if (IsScalarToVector)
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
V1.getOperand(Lane));
}
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i64));
}

// Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
// will be inserted.
SDValue InsV = V1;
SmallVector<int, 8> InsMasks = NV1Elt;
SmallVector<int, 8> InsIndex = N1Index;
if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
if (NV1Elt.size() > NV2Elt.size()) {
InsV = V2;
InsMasks = NV2Elt;
InsIndex = N2Index;
int Length = ShuffleMask.size();
int V1EltNum = V1.getValueType().getVectorNumElements();

// If the number of v1 elements is the same as the number of shuffle mask
// element and the shuffle masks are sequential values, we can transform
// it into NEON_VEXTRACT.
if (V1EltNum == Length) {
// Check if the shuffle mask is sequential.
bool IsSequential = true;
int CurMask = ShuffleMask[0];
for (int I = 0; I < Length; ++I) {
if (ShuffleMask[I] != CurMask) {
IsSequential = false;
break;
}
} else {
InsV = DAG.getNode(ISD::UNDEF, dl, VT);
CurMask++;
}
if (IsSequential) {
assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
unsigned VecSize = EltSize * V1EltNum;
unsigned Index = (EltSize/8) * ShuffleMask[0];
if (VecSize == 64 || VecSize == 128)
return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
DAG.getConstant(Index, MVT::i64));
}
}

SDValue PassN;
// For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
// by element from V2 to V1 .
// If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
// better choice to be inserted than V1 as less insert needed, so we count
// element to be inserted for both V1 and V2, and select less one as insert
// target.

// Collect elements need to be inserted and their index.
SmallVector<int, 8> NV1Elt;
SmallVector<int, 8> N1Index;
SmallVector<int, 8> NV2Elt;
SmallVector<int, 8> N2Index;
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != I) {
NV1Elt.push_back(ShuffleMask[I]);
N1Index.push_back(I);
}
}
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != (I + V1EltNum)) {
NV2Elt.push_back(ShuffleMask[I]);
N2Index.push_back(I);
}
}

for (int I = 0, E = InsMasks.size(); I != E; ++I) {
SDValue ExtV = V1;
int Mask = InsMasks[I];
if (Mask > V1EltNum) {
ExtV = V2;
Mask -= V1EltNum;
}
// Any value type smaller than i32 is illegal in AArch64, and this lower
// function is called after legalize pass, so we need to legalize
// the result here.
EVT EltVT;
if (VT.getVectorElementType().isFloatingPoint())
EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
else
EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;

PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
DAG.getConstant(Mask, MVT::i64));
PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN,
DAG.getConstant(InsIndex[I], MVT::i64));
// Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
// will be inserted.
SDValue InsV = V1;
SmallVector<int, 8> InsMasks = NV1Elt;
SmallVector<int, 8> InsIndex = N1Index;
if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
if (NV1Elt.size() > NV2Elt.size()) {
InsV = V2;
InsMasks = NV2Elt;
InsIndex = N2Index;
}
return PassN;
} else {
InsV = DAG.getNode(ISD::UNDEF, dl, VT);
}

return SDValue();
for (int I = 0, E = InsMasks.size(); I != E; ++I) {
SDValue ExtV = V1;
int Mask = InsMasks[I];
if (Mask >= V1EltNum) {
ExtV = V2;
Mask -= V1EltNum;
}
// Any value type smaller than i32 is illegal in AArch64, and this lower
// function is called after legalize pass, so we need to legalize
// the result here.
EVT EltVT;
if (VT.getVectorElementType().isFloatingPoint())
EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
else
EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;

ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
DAG.getConstant(Mask, MVT::i64));
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
DAG.getConstant(InsIndex[I], MVT::i64));
}
return InsV;
}

AArch64TargetLowering::ConstraintType
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ namespace AArch64ISD {
// Vector dup by lane
NEON_VDUPLANE,

// Vector extract
NEON_VEXTRACT,

// NEON loads with post-increment base updates:
NEON_LD1_UPD = ISD::FIRST_TARGET_MEMORY_OPCODE,
NEON_LD2_UPD,
Expand Down
18 changes: 18 additions & 0 deletions lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,24 @@ class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit> {
}

// Format AdvSIMD bitwise extract
class NeonI_BitExtract<bit q, bits<2> op2,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29-24} = 0b101110;
let Inst{23-22} = op2;
let Inst{21} = 0b0;
// Inherit Rm in 20-16
let Inst{15} = 0b0;
// imm4 in 14-11
let Inst{10} = 0b0;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}

// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
Expand Down
68 changes: 62 additions & 6 deletions lib/Target/AArch64/AArch64InstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
[SDTCisVec<0>]>>;
def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
[SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;

//===----------------------------------------------------------------------===//
// Multiclasses
Expand Down Expand Up @@ -1062,7 +1065,7 @@ def neon_uimm8_asmoperand : AsmOperandClass

def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm8_asmoperand;
let PrintMethod = "printNeonUImm8Operand";
let PrintMethod = "printUImmHexOperand";
}

def neon_uimm64_mask_asmoperand : AsmOperandClass
Expand Down Expand Up @@ -4430,31 +4433,43 @@ def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def neon_uimm0_bare : Operand<i64>,
ImmLeaf<i64, [{return Imm == 0;}]> {
let ParserMatchClass = neon_uimm0_asmoperand;
let PrintMethod = "printNeonUImm8OperandBare";
let PrintMethod = "printUImmBareOperand";
}

def neon_uimm1_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm1_asmoperand;
let PrintMethod = "printNeonUImm8OperandBare";
let PrintMethod = "printUImmBareOperand";
}

def neon_uimm2_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm2_asmoperand;
let PrintMethod = "printNeonUImm8OperandBare";
let PrintMethod = "printUImmBareOperand";
}

def neon_uimm3_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
let PrintMethod = "printNeonUImm8OperandBare";
let PrintMethod = "printUImmBareOperand";
}

def neon_uimm4_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
let PrintMethod = "printNeonUImm8OperandBare";
let PrintMethod = "printUImmBareOperand";
}

def neon_uimm3 : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
let PrintMethod = "printUImmHexOperand";
}

def neon_uimm4 : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
let PrintMethod = "printUImmHexOperand";
}

class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
Expand All @@ -4472,6 +4487,47 @@ class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
let Constraints = "$src = $Rd";
}

// Bitwise Extract
class NeonI_Extract<bit q, bits<2> op2, string asmop,
string OpS, RegisterOperand OpVPR, Operand OpImm>
: NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
(ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
", $Rm." # OpS # ", $Index",
[],
NoItinerary>{
bits<4> Index;
}

def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
VPR64, neon_uimm3> {
let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
}

def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
VPR128, neon_uimm4> {
let Inst{14-11} = Index;
}

class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
Operand OpImm>
: Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
(i64 OpImm:$Imm))),
(INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;

def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;

// The followings are for instruction class (3V Elem)

// Variant 1
Expand Down
Loading

0 comments on commit 2581152

Please sign in to comment.