Skip to content

Commit

Permalink
Expose various floating-point intrinsics for Avx512F and Avx512DQ (do…
Browse files Browse the repository at this point in the history
…tnet#85716)

* Expose GetExponent and GetMantissa for Avx512F

* Expose Reciprocal14 and ReciprocalSqrt14 for Avx512F

* Expose RoundScale and Scale for Avx512F

* Expose Fixup for Avx512F + Range and Reduce for Avx512DQ

* Ensure the RMW handling for Fixup avoids allocating a register

* Ensure the NI_AVX512F_Fixup handling in isRMWHWIntrinsic compiles

* Ensure vrange is marked as INS_Flags_IsDstDstSrcAVXInstruction

* Apply formatting patch

* Ensure vfixupimm is correctly handled in the JIT

* Ensure FixupScalar only checks the first element when doing RMW validation
  • Loading branch information
tannergooding authored May 4, 2023
1 parent 6bae14c commit cb5fe56
Show file tree
Hide file tree
Showing 32 changed files with 2,627 additions and 719 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_R_R_RM(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3);
void genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
void genBaseIntrinsic(GenTreeHWIntrinsic* node);
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node);
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
Expand Down
157 changes: 157 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8715,6 +8715,109 @@ void emitter::emitIns_SIMD_R_R_S_R(
emitIns_R_S(ins, attr, targetReg, varx, offs);
}
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_A_I: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir
// address, an immediate operand, and that returns a value in register
//
// Arguments:
// ins -- The instruction being emitted
// attr -- The emit attribute
// targetReg -- The target register
// op1Reg -- The register of the first operand
// op2Reg -- The register of the second operand
// indir -- The GenTreeIndir used for the memory address
// ival -- The immediate value
//
void emitter::emitIns_SIMD_R_R_R_A_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
GenTreeIndir* indir,
int ival)
{
assert(UseSimdEncoding());
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_R_A_I(ins, attr, targetReg, op2Reg, indir, ival, IF_RWR_RRD_ARD_CNS);
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_C_I: emits the code for a SIMD instruction that takes two register operands, a field handle +
// offset, an immediate operand, and that returns a value in register
//
// Arguments:
// ins -- The instruction being emitted
// attr -- The emit attribute
// targetReg -- The target register
// op1Reg -- The register of the first operand
// op2Reg -- The register of the second operand
// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
// offs -- The offset added to the memory address from fldHnd
// ival -- The immediate value
//
void emitter::emitIns_SIMD_R_R_R_C_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
CORINFO_FIELD_HANDLE fldHnd,
int offs,
int ival)
{
assert(UseSimdEncoding());
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_R_C_I(ins, attr, targetReg, op2Reg, fldHnd, offs, ival);
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_R_I: emits the code for a SIMD instruction that takes three register operands, an immediate
// operand, and that returns a value in register
//
// Arguments:
// ins -- The instruction being emitted
// attr -- The emit attribute
// targetReg -- The target register
// op1Reg -- The register of the first operand
// op2Reg -- The register of the second operand
// op3Reg -- The register of the third operand
// ival -- The immediate value
//
void emitter::emitIns_SIMD_R_R_R_R_I(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg, int ival)
{
assert(UseSimdEncoding());
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_R_R_I(ins, attr, targetReg, op2Reg, op3Reg, ival);
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_S_I: emits the code for a SIMD instruction that takes two register operands, a variable index +
// offset, an immediate operand, and that returns a value in register
//
// Arguments:
// ins -- The instruction being emitted
// attr -- The emit attribute
// targetReg -- The target register
// op1Reg -- The register of the first operand
// op2Reg -- The register of the second operand
// varx -- The variable index used for the memory address
// offs -- The offset added to the memory address from varx
// ival -- The immediate value
//
void emitter::emitIns_SIMD_R_R_R_S_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
int varx,
int offs,
int ival)
{
assert(UseSimdEncoding());
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_R_S_I(ins, attr, targetReg, op2Reg, varx, offs, ival);
}
#endif // FEATURE_HW_INTRINSICS

/*****************************************************************************
Expand Down Expand Up @@ -18070,9 +18173,35 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vcvttpd2qq:
case INS_vcvttpd2uqq:
case INS_vcvtuqq2pd:
case INS_vfixupimmpd:
case INS_vfixupimmps:
case INS_vfixupimmsd:
case INS_vfixupimmss:
case INS_vgetexppd:
case INS_vgetexpps:
case INS_vgetexpsd:
case INS_vgetexpss:
case INS_vgetmantpd:
case INS_vgetmantps:
case INS_vgetmantsd:
case INS_vgetmantss:
case INS_vrangepd:
case INS_vrangeps:
case INS_vrangesd:
case INS_vrangess:
case INS_vreducepd:
case INS_vreduceps:
case INS_vreducesd:
case INS_vreducess:
case INS_vscalefpd:
case INS_vscalefps:
case INS_vscalefsd:
case INS_vscalefss:
{
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_4C;
break;
}

case INS_vpmovdb:
case INS_vpmovdw:
Expand Down Expand Up @@ -18142,13 +18271,41 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency += PERFSCORE_LATENCY_4C;
break;

case INS_vrcp14pd:
case INS_vrcp14ps:
case INS_vrcp14sd:
case INS_vrcp14ss:
case INS_vrsqrt14pd:
case INS_vrsqrt14sd:
case INS_vrsqrt14ps:
case INS_vrsqrt14ss:
{
if (opSize == EA_64BYTE)
{
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_8C;
}
else
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_4C;
}
break;
}

case INS_roundpd:
case INS_roundps:
case INS_roundsd:
case INS_roundss:
case INS_vrndscalepd:
case INS_vrndscaleps:
case INS_vrndscalesd:
case INS_vrndscaless:
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_8C;
break;
}

case INS_cvttsd2si:
case INS_cvtsd2si:
Expand Down
31 changes: 31 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,37 @@ void emitIns_SIMD_R_R_C_R(instruction ins,
int offs);
void emitIns_SIMD_R_R_S_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs);

void emitIns_SIMD_R_R_R_A_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
GenTreeIndir* indir,
int ival);
void emitIns_SIMD_R_R_R_C_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
CORINFO_FIELD_HANDLE fldHnd,
int offs,
int ival);
void emitIns_SIMD_R_R_R_R_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
regNumber op3Reg,
int ival);
void emitIns_SIMD_R_R_R_S_I(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
int varx,
int offs,
int ival);
#endif // FEATURE_HW_INTRINSICS

enum EmitCallType
Expand Down
57 changes: 53 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19332,12 +19332,15 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
assert(comp != nullptr);

#if defined(TARGET_XARCH)
GenTreeHWIntrinsic* hwintrinsic = AsHWIntrinsic();
NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();

if (!comp->canUseVexEncoding())
{
return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
return HWIntrinsicInfo::HasRMWSemantics(intrinsicId);
}

switch (AsHWIntrinsic()->GetHWIntrinsicId())
switch (intrinsicId)
{
// TODO-XArch-Cleanup: Move this switch block to be table driven.

Expand Down Expand Up @@ -19365,6 +19368,50 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
return true;
}

case NI_AVX512F_Fixup:
case NI_AVX512F_FixupScalar:
case NI_AVX512F_VL_Fixup:
{
// We are actually only RMW in the case where the lookup table
// has any value that could result in `op1` being picked. So
// in the case `op3` is a constant and none of the nibbles are
// `0`, then we don't have to be RMW and can actually "drop" `op1`

GenTree* op3 = hwintrinsic->Op(3);

if (!op3->IsCnsVec())
{
return true;
}

GenTreeVecCon* vecCon = op3->AsVecCon();

var_types simdBaseType = hwintrinsic->GetSimdBaseType();
unsigned simdSize = hwintrinsic->GetSimdSize();
uint32_t count = simdSize / sizeof(uint32_t);
uint32_t incSize = (simdBaseType == TYP_FLOAT) ? 1 : 2;

if (intrinsicId == NI_AVX512F_FixupScalar)
{
// Upper elements come from op2
count = 1;
}

for (uint32_t i = 0; i < count; i += incSize)
{
uint32_t tbl = vecCon->gtSimdVal.u32[i];

if (((tbl & 0x0000000F) == 0) || ((tbl & 0x000000F0) == 0) || ((tbl & 0x00000F00) == 0) ||
((tbl & 0x0000F000) == 0) || ((tbl & 0x000F0000) == 0) || ((tbl & 0x00F00000) == 0) ||
((tbl & 0x0F000000) == 0) || ((tbl & 0xF0000000) == 0))
{
return true;
}
}

return false;
}

default:
{
return false;
Expand Down Expand Up @@ -20585,7 +20632,8 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s
else if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Ceiling;
GenTree* op2 = gtNewIconNode(static_cast<int32_t>(FloatRoundingMode::ToPositiveInfinity));
return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize);
}
else
{
Expand Down Expand Up @@ -22156,7 +22204,8 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType
else if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Floor;
GenTree* op2 = gtNewIconNode(static_cast<int32_t>(FloatRoundingMode::ToNegativeInfinity));
return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize);
}
else
{
Expand Down
13 changes: 11 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,11 +1009,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
switch (numArgs)
{
case 0:
{
assert(!isScalar);
retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
break;
}

case 1:
{
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

if ((category == HW_Category_MemoryLoad) && op1->OperIs(GT_CAST))
Expand Down Expand Up @@ -1067,8 +1070,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
#endif // TARGET_XARCH

break;
}

case 2:
{
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
Expand Down Expand Up @@ -1121,8 +1126,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}
#endif
break;
}

case 3:
{
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
Expand Down Expand Up @@ -1164,9 +1171,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}
#endif
break;
}

#ifdef TARGET_ARM64
case 4:
{
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd);
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound);
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
Expand All @@ -1176,7 +1184,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
assert(!isScalar);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
break;
#endif
}

default:
break;
}
Expand Down
Loading

0 comments on commit cb5fe56

Please sign in to comment.