Skip to content

Commit

Permalink
[X86] Add ktest intrinsics to match gcc and icc.
Browse files Browse the repository at this point in the history
These aren't documented in the Intel Intrinsics Guide, but are supported by gcc and icc.

Includes these intrinsics:
_ktestc_mask8_u8, _ktestz_mask8_u8, _ktest_mask8_u8
_ktestc_mask16_u8, _ktestz_mask16_u8, _ktest_mask16_u8
_ktestc_mask32_u8, _ktestz_mask32_u8, _ktest_mask32_u8
_ktestc_mask64_u8, _ktestz_mask64_u8, _ktest_mask64_u8

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@341265 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Aug 31, 2018
1 parent 48b19b2 commit 0b68110
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 0 deletions.
8 changes: 8 additions & 0 deletions include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,14 @@ TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
Expand Down
44 changes: 44 additions & 0 deletions lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10075,6 +10075,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}

case X86::BI__builtin_ia32_ktestcqi:
case X86::BI__builtin_ia32_ktestzqi:
case X86::BI__builtin_ia32_ktestchi:
case X86::BI__builtin_ia32_ktestzhi:
case X86::BI__builtin_ia32_ktestcsi:
case X86::BI__builtin_ia32_ktestzsi:
case X86::BI__builtin_ia32_ktestcdi:
case X86::BI__builtin_ia32_ktestzdi: {
Intrinsic::ID IID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_ktestcqi:
IID = Intrinsic::x86_avx512_ktestc_b;
break;
case X86::BI__builtin_ia32_ktestzqi:
IID = Intrinsic::x86_avx512_ktestz_b;
break;
case X86::BI__builtin_ia32_ktestchi:
IID = Intrinsic::x86_avx512_ktestc_w;
break;
case X86::BI__builtin_ia32_ktestzhi:
IID = Intrinsic::x86_avx512_ktestz_w;
break;
case X86::BI__builtin_ia32_ktestcsi:
IID = Intrinsic::x86_avx512_ktestc_d;
break;
case X86::BI__builtin_ia32_ktestzsi:
IID = Intrinsic::x86_avx512_ktestz_d;
break;
case X86::BI__builtin_ia32_ktestcdi:
IID = Intrinsic::x86_avx512_ktestc_q;
break;
case X86::BI__builtin_ia32_ktestzdi:
IID = Intrinsic::x86_avx512_ktestz_q;
break;
}

unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
Function *Intr = CGM.getIntrinsic(IID);
return Builder.CreateCall(Intr, {LHS, RHS});
}

case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
Expand Down
36 changes: 36 additions & 0 deletions lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,42 @@ _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
{
return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
{
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kadd_mask32(__mmask32 __A, __mmask32 __B)
{
Expand Down
36 changes: 36 additions & 0 deletions lib/Headers/avx512dqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,42 @@ _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kadd_mask8(__mmask8 __A, __mmask8 __B)
{
Expand Down
68 changes: 68 additions & 0 deletions test/CodeGen/avx512bw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,74 @@ unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}

unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestz_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestz_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D));
}

unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestc_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D));
}

unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_ktest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktest_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}

unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestz_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D));
}

unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestc_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D));
}

unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_ktest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktest_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}

__mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_kadd_mask32
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
Expand Down
68 changes: 68 additions & 0 deletions test/CodeGen/avx512dq-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,74 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}

unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestz_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestz_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D));
}

unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestc_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D));
}

unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_ktest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktest_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}

unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D));
}

unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_ktestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D));
}

unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_ktest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
// CHECK: trunc i32 [[RES]] to i8
return _ktest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}

__mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_kadd_mask8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
Expand Down

0 comments on commit 0b68110

Please sign in to comment.