Skip to content

Commit

Permalink
[Mono] Add SIMD intrinsics for Vector{64, 128} on Arm64 Part I (dotne…
Browse files Browse the repository at this point in the history
…t#64961)

* Add SIMD intrinsics for Arm64

* Add check for architecture

* Another one currently only work on arm64

* Xor intrinsics only work on integer types

* Change the implementation for max/min

* fix build on non-arm

* Fix build on mobile platforms

* Fix fmax and fmin on Amd64

* Only intrinsify Vector64 methods on Arm64
  • Loading branch information
fanyang-mono authored Feb 14, 2022
1 parent 5844486 commit 135a638
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 16 deletions.
70 changes: 57 additions & 13 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -7713,29 +7713,73 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
break;
case OP_FMAX:
case OP_FMIN: {
#if defined(TARGET_X86) || defined(TARGET_AMD64)
LLVMValueRef args [] = { l, r };

#if defined(TARGET_X86) || defined(TARGET_AMD64)
LLVMTypeRef t = LLVMTypeOf (l);
unsigned int elems = LLVMGetVectorSize (t);
gboolean is_r4 = ins->inst_c1 == MONO_TYPE_R4;
if (ins->inst_c0 == OP_FMAX)
result = call_intrins (ctx, is_r4 ? INTRINS_SSE_MAXPS : INTRINS_SSE_MAXPD, args, dname);
else
result = call_intrins (ctx, is_r4 ? INTRINS_SSE_MINPS : INTRINS_SSE_MINPD, args, dname);
int iid = -1;
if (ins->inst_c0 == OP_FMAX) {
if (elems == 1)
iid = is_r4 ? INTRINS_SSE_MAXSS : INTRINS_SSE_MAXSD;
else
iid = is_r4 ? INTRINS_SSE_MAXPS : INTRINS_SSE_MAXPD;
} else {
if (elems == 1)
iid = is_r4 ? INTRINS_SSE_MINSS : INTRINS_SSE_MINSD;
else
iid = is_r4 ? INTRINS_SSE_MINPS : INTRINS_SSE_MINPD;
}
result = call_intrins (ctx, iid, args, dname);
#elif defined(TARGET_ARM64)
int instc0_arm64 = ins->inst_c0 == OP_FMAX ? INTRINS_AARCH64_ADV_SIMD_FMAX : INTRINS_AARCH64_ADV_SIMD_FMIN;
IntrinsicId iid = (IntrinsicId) instc0_arm64;
llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass);
result = call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
#else
NOT_IMPLEMENTED;
#endif
break;
}
case OP_IMAX: {
gboolean is_unsigned = ins->inst_c1 == MONO_TYPE_U1 || ins->inst_c1 == MONO_TYPE_U2 || ins->inst_c1 == MONO_TYPE_U4 || ins->inst_c1 == MONO_TYPE_U8;
LLVMValueRef cmp = LLVMBuildICmp (builder, is_unsigned ? LLVMIntUGT : LLVMIntSGT, l, r, "");
result = LLVMBuildSelect (builder, cmp, l, r, "");
break;
}
case OP_IMAX:
case OP_IMIN: {
gboolean is_unsigned = ins->inst_c1 == MONO_TYPE_U1 || ins->inst_c1 == MONO_TYPE_U2 || ins->inst_c1 == MONO_TYPE_U4 || ins->inst_c1 == MONO_TYPE_U8;
LLVMValueRef cmp = LLVMBuildICmp (builder, is_unsigned ? LLVMIntULT : LLVMIntSLT, l, r, "");
LLVMIntPredicate op;
switch (ins->inst_c0) {
case OP_IMAX:
op = is_unsigned ? LLVMIntUGT : LLVMIntSGT;
break;
case OP_IMIN:
op = is_unsigned ? LLVMIntULT : LLVMIntSLT;
break;
default:
g_assert_not_reached ();
}
#if defined(TARGET_ARM64)
if ((ins->inst_c1 == MONO_TYPE_U8) || (ins->inst_c1 == MONO_TYPE_I8)) {
LLVMValueRef cmp = LLVMBuildICmp (builder, op, l, r, "");
result = LLVMBuildSelect (builder, cmp, l, r, "");
} else {
int instc0_arm64;
switch (ins->inst_c0) {
case OP_IMAX:
instc0_arm64 = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UMAX : INTRINS_AARCH64_ADV_SIMD_SMAX;
break;
case OP_IMIN:
instc0_arm64 = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UMIN : INTRINS_AARCH64_ADV_SIMD_SMIN;
break;
default:
g_assert_not_reached ();
}
IntrinsicId iid = (IntrinsicId) instc0_arm64;
LLVMValueRef args [] = { l, r };
llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass);
result = call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
}
#else
LLVMValueRef cmp = LLVMBuildICmp (builder, op, l, r, "");
result = LLVMBuildSelect (builder, cmp, l, r, "");
#endif
break;
}

Expand Down
98 changes: 95 additions & 3 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,8 @@ type_to_extract_op (MonoTypeEnum type)

static guint16 sri_vector_methods [] = {
SN_Abs,
SN_Add,
SN_AndNot,
SN_As,
SN_AsByte,
SN_AsDouble,
Expand All @@ -575,6 +577,8 @@ static guint16 sri_vector_methods [] = {
SN_AsUInt16,
SN_AsUInt32,
SN_AsUInt64,
SN_BitwiseAnd,
SN_BitwiseOr,
SN_AsVector128,
SN_AsVector2,
SN_AsVector256,
Expand All @@ -585,16 +589,22 @@ static guint16 sri_vector_methods [] = {
SN_Create,
SN_CreateScalar,
SN_CreateScalarUnsafe,
SN_Divide,
SN_Floor,
SN_GetElement,
SN_GetLower,
SN_GetUpper,
SN_Max,
SN_Min,
SN_Multiply,
SN_Subtract,
SN_ToScalar,
SN_ToVector128,
SN_ToVector128Unsafe,
SN_ToVector256,
SN_ToVector256Unsafe,
SN_WithElement,
SN_Xor,
};

/* nint and nuint haven't been enabled yet for System.Runtime.Intrinsics.
Expand Down Expand Up @@ -657,6 +667,75 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
return NULL;
#endif
}
case SN_Add:
case SN_Max:
case SN_Min:
case SN_Multiply:
case SN_Subtract: {
int instc0 = -1;
if (arg0_type == MONO_TYPE_R4 || arg0_type == MONO_TYPE_R8) {
switch (id) {
case SN_Add:
instc0 = OP_FADD;
break;
case SN_Max:
instc0 = OP_FMAX;
break;
case SN_Min:
instc0 = OP_FMIN;
break;
case SN_Multiply:
instc0 = OP_FMUL;
break;
case SN_Subtract:
instc0 = OP_FSUB;
break;
default:
g_assert_not_reached ();
}
} else {
switch (id) {
case SN_Add:
instc0 = OP_IADD;
break;
case SN_Max:
instc0 = OP_IMAX;
break;
case SN_Min:
instc0 = OP_IMIN;
break;
case SN_Multiply:
instc0 = OP_IMUL;
break;
case SN_Subtract:
instc0 = OP_ISUB;
break;
default:
g_assert_not_reached ();
}
}
return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, instc0, arg0_type, fsig, args);
}
case SN_Divide: {
if ((arg0_type != MONO_TYPE_R4) && (arg0_type != MONO_TYPE_R8))
return NULL;
return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_FDIV, arg0_type, fsig, args);
}
case SN_AndNot:
#ifdef TARGET_ARM64
return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_BIC, -1, arg0_type, fsig, args);
#else
return NULL;
#endif
case SN_BitwiseAnd:
return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_IAND, arg0_type, fsig, args);
case SN_BitwiseOr:
return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_IOR, arg0_type, fsig, args);
case SN_Xor: {
if ((arg0_type == MONO_TYPE_R4) || (arg0_type == MONO_TYPE_R8))
return NULL;
return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_IXOR, arg0_type, fsig, args);
}
case SN_As:
case SN_AsByte:
case SN_AsDouble:
Expand Down Expand Up @@ -3107,17 +3186,30 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
if (m_class_get_nested_in (cmethod->klass))
class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass));

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
#if defined(TARGET_ARM64)
if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128") || !strcmp (class_name, "Vector64"))
return emit_sri_vector (cfg, cmethod, fsig, args);
}

if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector64`1") || !strcmp (class_name, "Vector128`1"))
if (!strcmp (class_name, "Vector128`1") || !strcmp (class_name, "Vector64`1"))
return emit_vector64_vector128_t (cfg, cmethod, fsig, args);
}
#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
#endif // defined(TARGET_ARM64)

// There isn't any SIMD intrinsitcs to replace with for Vector64 methods on non-arm architectures
#if defined(TARGET_AMD64)
if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128"))
return emit_sri_vector (cfg, cmethod, fsig, args);
}

if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128`1"))
return emit_vector64_vector128_t (cfg, cmethod, fsig, args);
}
#endif // defined(TARGET_AMD64)

return emit_simd_intrinsics (class_ns, class_name, cfg, cmethod, fsig, args);
}
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/simd-methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ METHOD(AsVector2)
METHOD(AsVector256)
METHOD(AsVector3)
METHOD(AsVector4)
METHOD(BitwiseAnd)
METHOD(BitwiseOr)
METHOD(Create)
METHOD(CreateScalar)
METHOD(CreateScalarUnsafe)
Expand Down

0 comments on commit 135a638

Please sign in to comment.