Skip to content

Commit

Permalink
[X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, a…
Browse files Browse the repository at this point in the history
…nd 64-bit mask registers.

This also adds a second intrinsic name for the 16-bit mask versions.

These intrinsics match gcc and icc. They just aren't published in the Intel Intrinsics Guide so I only recently found they existed.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@340719 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Aug 27, 2018
1 parent 6faba02 commit 119327d
Show file tree
Hide file tree
Showing 8 changed files with 427 additions and 12 deletions.
18 changes: 18 additions & 0 deletions include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512p
TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf")
TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf")

TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw")

TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
Expand Down Expand Up @@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx5
TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
Expand Down
44 changes: 32 additions & 12 deletions lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF,
}

static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
unsigned NumElts, ArrayRef<Value *> Ops,
ArrayRef<Value *> Ops,
bool InvertLHS = false) {
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);

Expand Down Expand Up @@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,

case X86::BI__builtin_ia32_kortestchi:
case X86::BI__builtin_ia32_kortestzhi: {
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
Value *C;
if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
Expand All @@ -10023,26 +10024,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}

case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
case X86::BI__builtin_ia32_kandsi:
case X86::BI__builtin_ia32_kanddi:
return EmitX86MaskLogic(*this, Instruction::And, Ops);
case X86::BI__builtin_ia32_kandnqi:
case X86::BI__builtin_ia32_kandnhi:
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
case X86::BI__builtin_ia32_kandnsi:
case X86::BI__builtin_ia32_kandndi:
return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
case X86::BI__builtin_ia32_korqi:
case X86::BI__builtin_ia32_korhi:
return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
case X86::BI__builtin_ia32_korsi:
case X86::BI__builtin_ia32_kordi:
return EmitX86MaskLogic(*this, Instruction::Or, Ops);
case X86::BI__builtin_ia32_kxnorqi:
case X86::BI__builtin_ia32_kxnorhi:
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
case X86::BI__builtin_ia32_kxnorsi:
case X86::BI__builtin_ia32_kxnordi:
return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
case X86::BI__builtin_ia32_kxorqi:
case X86::BI__builtin_ia32_kxorhi:
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
case X86::BI__builtin_ia32_knothi: {
Ops[0] = getMaskVecValue(*this, Ops[0], 16);
return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
Builder.getInt16Ty());
case X86::BI__builtin_ia32_kxorsi:
case X86::BI__builtin_ia32_kxordi:
return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
case X86::BI__builtin_ia32_knotqi:
case X86::BI__builtin_ia32_knothi:
case X86::BI__builtin_ia32_knotsi:
case X86::BI__builtin_ia32_knotdi: {
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
return Builder.CreateBitCast(Builder.CreateNot(Res),
Ops[0]->getType());
}

case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi: {
unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
uint32_t Indices[64];
Expand Down
72 changes: 72 additions & 0 deletions lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,78 @@ typedef unsigned long long __mmask64;
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))

static __inline __mmask32 __DEFAULT_FN_ATTRS
_knot_mask32(__mmask32 __M)
{
return __builtin_ia32_knotsi(__M);
}

static __inline __mmask64 __DEFAULT_FN_ATTRS
_knot_mask64(__mmask64 __M)
{
return __builtin_ia32_knotdi(__M);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kand_mask32(__mmask32 __A, __mmask32 __B)
{
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kandn_mask32(__mmask32 __A, __mmask32 __B)
{
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kor_mask32(__mmask32 __A, __mmask32 __B)
{
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kxnor_mask32(__mmask32 __A, __mmask32 __B)
{
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kxor_mask32(__mmask32 __A, __mmask32 __B)
{
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}

/* Integer compare */

#define _mm512_cmp_epi8_mask(a, b, p) \
Expand Down
38 changes: 38 additions & 0 deletions lib/Headers/avx512dqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,43 @@

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))

static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)
{
return __builtin_ia32_knotqi(__M);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kand_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kandn_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxnor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
}

static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
Expand Down Expand Up @@ -1257,5 +1294,6 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__mmask8)(U))

#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS

#endif
7 changes: 7 additions & 0 deletions lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -8369,6 +8369,13 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
}

#define _kand_mask16 _mm512_kand
#define _kandn_mask16 _mm512_kandn
#define _knot_mask16 _mm512_knot
#define _kor_mask16 _mm512_kor
#define _kxnor_mask16 _mm512_kxnor
#define _kxor_mask16 _mm512_kxor

static __inline__ void __DEFAULT_FN_ATTRS512
_mm512_stream_si512 (__m512i * __P, __m512i __A)
{
Expand Down
Loading

0 comments on commit 119327d

Please sign in to comment.