Skip to content

Commit

Permalink
[Arm64] Implement ASIMD Extract Insert ExtractVector64 ExtractVector1…
Browse files Browse the repository at this point in the history
…28 (dotnet#35030)

* Implements Extract, Insert, ExtractVector64 and ExtractVector128 intrinsics. 

* Implements a way to generate a fallback mechanism for intrinsics accepting an immediate operand when the operand is not constant.
 
* Renames NoContainment flag to SupportsContainment on Arm64 (presumably, there should be fewer intrinsics supporting containment analysis so it makes more sense to have NoContainment as default)

* Removes ival column from hwintrinsiclistarm64.h table and the corresponding field in HWIntrinsicInfo struct.
  • Loading branch information
echesakov authored Apr 22, 2020
1 parent 484c7ba commit 32dd7d4
Show file tree
Hide file tree
Showing 81 changed files with 29,470 additions and 576 deletions.
52 changes: 52 additions & 0 deletions src/coreclr/src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,58 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
regNumber offsReg,
HWIntrinsicSwitchCaseBody emitSwCase);
#endif // defined(TARGET_XARCH)

#ifdef TARGET_ARM64
class HWIntrinsicImmOpHelper final
{
public:
HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin);

void EmitBegin();
void EmitCaseEnd();

// Returns true after the last call to EmitCaseEnd() (i.e. this signals that code generation is done).
bool Done() const
{
return immValue == immUpperBound;
}

// Returns a value of the immediate operand that should be used for a case.
int ImmValue() const
{
return immValue;
}

private:
// Returns true if immOp is non contained immediate (i.e. the value of the immediate operand is enregistered in
// nonConstImmReg).
bool NonConstImmOp() const
{
return nonConstImmReg != REG_NA;
}

// Returns true if a non constant immediate operand can be either 0 or 1.
bool TestImmOpZeroOrOne() const
{
assert(NonConstImmOp());
return immUpperBound == 2;
}

emitter* GetEmitter() const
{
return codeGen->GetEmitter();
}

CodeGen* const codeGen;
BasicBlock* endLabel;
BasicBlock* nonZeroLabel;
int immValue;
int immUpperBound;
regNumber nonConstImmReg;
regNumber branchTargetReg;
};
#endif // TARGET_ARM64

#endif // FEATURE_HW_INTRINSICS

#if !defined(TARGET_64BIT)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7217,6 +7217,12 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);

// ext extract vector from pair of vectors
theEmitter->emitIns_R_R_R_I(INS_ext, EA_8BYTE, REG_V0, REG_V1, REG_V2, 3, INS_OPTS_8B);
theEmitter->emitIns_R_R_R_I(INS_ext, EA_8BYTE, REG_V4, REG_V5, REG_V6, 7, INS_OPTS_8B);
theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V8, REG_V9, REG_V10, 11, INS_OPTS_16B);
theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V12, REG_V13, REG_V14, 15, INS_OPTS_16B);

#endif // ALL_ARM64_EMITTER_UNIT_TESTS

#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3692,7 +3692,7 @@ class Compiler

GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand);
GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand, int immUpperBound);

#ifdef TARGET_XARCH
GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic,
Expand Down
45 changes: 45 additions & 0 deletions src/coreclr/src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,15 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isVectorRegister(id->idReg3()));
break;

case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
assert(isValidVectorDatasize(id->idOpSize()));
assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
assert(isValidVectorIndex(id->idOpSize(), EA_1BYTE, emitGetInsSC(id)));
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
assert(isVectorRegister(id->idReg3()));
break;

case IF_DV_4A: // DR_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar)
assert(isValidGeneralDatasize(id->idOpSize()));
assert(isVectorRegister(id->idReg1()));
Expand Down Expand Up @@ -947,6 +956,7 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
// Tracked GC pointers cannot be placed into the SIMD registers.
return false;
Expand Down Expand Up @@ -2129,6 +2139,7 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
case IF_DV_3DI:
case IF_DV_3E:
case IF_DV_3F:
case IF_DV_3G:
case IF_DV_4A:
case IF_SN_0A:
case IF_SI_0A:
Expand Down Expand Up @@ -6058,6 +6069,17 @@ void emitter::emitIns_R_R_R_I(instruction ins,
fmt = IF_LS_3G;
break;

case INS_ext:
assert(isVectorRegister(reg1));
assert(isVectorRegister(reg2));
assert(isVectorRegister(reg3));
assert(isValidVectorDatasize(size));
assert(isValidArrangement(size, opt));
assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
assert(isValidVectorIndex(size, EA_1BYTE, imm));
fmt = IF_DV_3G;
break;

default:
unreached();
break;
Expand Down Expand Up @@ -10654,6 +10676,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
imm = emitGetInsSC(id);
code = emitInsCode(ins, fmt);
code |= insEncodeVectorsize(id->idOpSize()); // Q
code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
code |= ((code_t)imm << 11); // iiii
code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
code |= insEncodeReg_Vd(id->idReg1()); // ddddd
dst += emitOutput_Instr(dst, code);
break;

case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
code = emitInsCode(ins, fmt);
elemsize = id->idOpSize();
Expand Down Expand Up @@ -12270,6 +12303,13 @@ void emitter::emitDispIns(
emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
break;

case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg3(), id->idInsOpt(), true);
emitDispImm(emitGetInsSC(id), false);
break;

case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
emitDispReg(id->idReg1(), size, true);
emitDispReg(id->idReg2(), size, true);
Expand Down Expand Up @@ -13950,6 +13990,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_DV_3G: // ext
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar)
case IF_DV_2M: // (vector)
// abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt,
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/emitfmtsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ IF_DEF(DV_3D, IS_NONE, NONE) // DV_3D .........X.mmmmm ......nnnnnddddd
IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
IF_DEF(DV_3F, IS_NONE, NONE) // DV_3F ...........mmmmm ......nnnnnddddd Qd Sn Vm (Qd used as both source and destination)
IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)

IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar)

Expand Down
91 changes: 62 additions & 29 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
{NI_##id, name, InstructionSet_##isa, static_cast<int>(ival), static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
#include "hwintrinsiclistxarch.h"
#elif defined (TARGET_ARM64)
#define HARDWARE_INTRINSIC(isa, name, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{NI_##isa##_##name, #name, InstructionSet_##isa, static_cast<int>(ival), static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{NI_##isa##_##name, #name, InstructionSet_##isa, static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
#include "hwintrinsiclistarm64.h"
#else
#error Unsupported platform
Expand Down Expand Up @@ -59,8 +59,6 @@ var_types Compiler::getBaseTypeFromArgIfNeeded(NamedIntrinsic intrinsic,
CORINFO_SIG_INFO* sig,
var_types baseType)
{
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);

if (HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
{
CORINFO_ARG_LIST_HANDLE arg = sig->args;
Expand Down Expand Up @@ -223,7 +221,6 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
return NO_CLASS_HANDLE;
}

#ifdef FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
// vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID):
//
Expand Down Expand Up @@ -284,7 +281,6 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
// If we see two (or more) different instructions we need the extra VNF_SimdType arg
return (diffInsCount >= 2);
}
#endif // FEATURE_HW_INTRINSICS

//------------------------------------------------------------------------
// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
Expand Down Expand Up @@ -533,15 +529,16 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE
// addRangeCheckIfNeeded: add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic
//
// Arguments:
// intrinsic -- intrinsic ID
// immOP -- the last operand of the intrinsic that points to the imm-arg
// mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
// intrinsic -- intrinsic ID
// immOp -- the immediate operand of the intrinsic
// mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
// immUpperBound -- upper bound for a value of the immediate operand (for a non-full-range imm-intrinsic)
//
// Return Value:
// add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic, which would throw ArgumentOutOfRangeException
// when the imm-argument is not in the valid range
//
GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand)
GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immUpperBound)
{
assert(immOp != nullptr);
// Full-range imm-intrinsics do not need the range-check
Expand All @@ -555,7 +552,7 @@ GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* immO
)
{
assert(!immOp->IsCnsIntOrI());
GenTree* upperBoundNode = gtNewIconNode(HWIntrinsicInfo::lookupImmUpperBound(intrinsic), TYP_INT);
GenTree* upperBoundNode = gtNewIconNode(immUpperBound, TYP_INT);
GenTree* index = nullptr;
if ((immOp->gtFlags & GTF_SIDE_EFFECT) != 0)
{
Expand Down Expand Up @@ -643,30 +640,48 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
assert(sizeBytes != 0);
}

// NOTE: The following code assumes that for all intrinsics
// taking an immediate operand, that operand will be last.
if (sig->numArgs > 0 && HWIntrinsicInfo::isImmOp(intrinsic, impStackTop().val))
baseType = getBaseTypeFromArgIfNeeded(intrinsic, clsHnd, sig, baseType);
unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);

GenTree* immOp = nullptr;

#ifdef TARGET_ARM64
if (intrinsic == NI_AdvSimd_Insert)
{
assert(sig->numArgs == 3);
immOp = impStackTop(1).val;
assert(HWIntrinsicInfo::isImmOp(intrinsic, immOp));
}
else
#endif
if ((sig->numArgs > 0) && HWIntrinsicInfo::isImmOp(intrinsic, impStackTop().val))
{
// NOTE: The following code assumes that for all intrinsics
// taking an immediate operand, that operand will be last.
immOp = impStackTop().val;
}

if (immOp != nullptr)
{
GenTree* lastOp = impStackTop().val;
// The imm-HWintrinsics that do not accept all imm8 values may throw
// ArgumentOutOfRangeException when the imm argument is not in the valid range
if (!HWIntrinsicInfo::HasFullRangeImm(intrinsic))
if (!HWIntrinsicInfo::HasFullRangeImm(intrinsic) && immOp->IsCnsIntOrI())
{
if (!mustExpand && lastOp->IsCnsIntOrI() &&
!HWIntrinsicInfo::isInImmRange(intrinsic, (int)lastOp->AsIntCon()->IconValue()))
const int ival = (int)immOp->AsIntCon()->IconValue();

if (!HWIntrinsicInfo::isInImmRange(intrinsic, ival, simdSize, baseType))
{
assert(!mustExpand);
// The imm-HWintrinsics that do not accept all imm8 values may throw
// ArgumentOutOfRangeException when the imm argument is not in the valid range
return nullptr;
}
}

if (!lastOp->IsCnsIntOrI())
else if (!immOp->IsCnsIntOrI())
{
if (HWIntrinsicInfo::NoJmpTableImm(intrinsic))
{
return impNonConstFallback(intrinsic, retType, baseType);
}

if (!mustExpand)
else if (!mustExpand)
{
// When the imm-argument is not a constant and we are not being forced to expand, we need to
// return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
Expand All @@ -687,13 +702,22 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// table-driven importer of simple intrinsics
if (impIsTableDrivenHWIntrinsic(intrinsic, category))
{
baseType = getBaseTypeFromArgIfNeeded(intrinsic, clsHnd, sig, baseType);
unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
bool isScalar = category == HW_Category_Scalar;
CORINFO_ARG_LIST_HANDLE argList = sig->args;
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass;

int immUpperBound = 0;

if (immOp != nullptr)
{
#if defined(TARGET_XARCH)
immUpperBound = HWIntrinsicInfo::lookupImmUpperBound(intrinsic);
#elif defined(TARGET_ARM64)
immUpperBound = HWIntrinsicInfo::lookupImmUpperBound(intrinsic, simdSize, baseType);
#endif
}

assert(numArgs >= 0);
if (!isScalar && ((HWIntrinsicInfo::lookupIns(intrinsic, baseType) == INS_invalid) ||
((simdSize != 8) && (simdSize != 16) && (simdSize != 32))))
Expand Down Expand Up @@ -742,7 +766,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);

op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immUpperBound);

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
Expand Down Expand Up @@ -775,8 +799,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
GenTree* op3 = getArgForHWIntrinsic(argType, argClass);

op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand);

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);

Expand All @@ -785,6 +807,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

#ifdef TARGET_ARM64
if (intrinsic == NI_AdvSimd_Insert)
{
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immUpperBound);
}
else
#endif
{
op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immUpperBound);
}

retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic)
: gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);

Expand Down
Loading

0 comments on commit 32dd7d4

Please sign in to comment.