From 34fd21edebf010cfc9a9d685e4ad22d06f916fc1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 20 May 2020 16:32:50 -0700 Subject: [PATCH] Optimize ToVector128, ToVector128Unsafe and Vector128.GetLower() (#36732) * Optimize ToVector128 , ToVector128Unsafe and Vector128.GetLower() --- src/coreclr/src/jit/hwintrinsic.cpp | 2 +- src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp | 12 ++++++++++++ src/coreclr/src/jit/hwintrinsiclistarm64.h | 3 +++ .../src/System/Runtime/Intrinsics/Vector128.cs | 1 + .../src/System/Runtime/Intrinsics/Vector64.cs | 2 ++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index d0b2c1b53f338f..15c623ceca1515 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -610,7 +610,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType) (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_ToScalar)); #else assert((intrinsic >= NI_Vector64_AsByte && intrinsic <= NI_Vector64_AsUInt32) || - (intrinsic >= NI_Vector64_get_AllBitsSet && intrinsic <= NI_Vector64_ToScalar) || + (intrinsic >= NI_Vector64_get_AllBitsSet && intrinsic <= NI_Vector64_ToVector128Unsafe) || (intrinsic >= NI_Vector128_As && intrinsic <= NI_Vector128_AsUInt64) || (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_ToScalar)); #endif diff --git a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp index e2093a5b5f9510..7b16a566410b05 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp @@ -617,6 +617,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } break; + case NI_Vector64_ToVector128: + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg); + break; + + case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_GetLower: + if (op1Reg != targetReg) + { + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg); + } + break; + case NI_Vector64_GetElement: case NI_Vector128_GetElement: case NI_Vector64_ToScalar: diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index 80a5f487e2ff42..5407ba7670ef9d 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -30,6 +30,8 @@ HARDWARE_INTRINSIC(Vector64, get_Count, HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector64, ToVector128Unsafe, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -53,6 +55,7 @@ HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 1 HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 5caec4828975ac..f2b4fe0ae6d9df 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1554,6 +1554,7 @@ public static Vector128 WithElement(this Vector128 vector, int index, T /// The vector to get the lower 64-bits from. /// The value of the lower 64-bits as a new . /// The type of () is not supported. + [Intrinsic] public static Vector64 GetLower(this Vector128 vector) where T : struct { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 873056c31dca9b..d370a622ae0018 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -942,6 +942,7 @@ public static T ToScalar(this Vector64 vector) /// The vector to extend. /// A new with the lower 64-bits set to the value of and the upper 64-bits initialized to zero. /// The type of () is not supported. + [Intrinsic] public static Vector128 ToVector128(this Vector64 vector) where T : struct { @@ -957,6 +958,7 @@ public static Vector128 ToVector128(this Vector64 vector) /// The vector to extend. /// A new with the lower 64-bits set to the value of and the upper 64-bits left uninitialized. /// The type of () is not supported. + [Intrinsic] public static unsafe Vector128 ToVector128Unsafe(this Vector64 vector) where T : struct {