Skip to content

Commit

Permalink
[AVX-512] Add unmasked version of shift by immediate and shift by sin…
Browse files Browse the repository at this point in the history
…gle element in XMM.

Summary:
This is the first step towards being able to add the avx512 shift by immediate intrinsics to InstCombineCalls where we aleady support the sse2 and avx2 intrinsics. We need to the unmasked versions so we can avoid having to teach InstCombineCalls that it would need to insert selects sometimes. Instead we'll just add the selects around the new instrinsics in the frontend.

This change should also enable the shift by i32 intrinsics to take a non-constant shift value just like the avx2 and sse intrinsics. This will enable us to fix PR30691 once we update clang.

Next I'll switch clang to use the new builtins. Then we'll come back to the backend and remove/autoupgrade the old intrinsics. Then I'll work on the same series for variable shifts.

Reviewers: RKSimon, zvi, delena

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D26333

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286711 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Nov 12, 2016
1 parent 328301f commit 7fc9a56
Show file tree
Hide file tree
Showing 5 changed files with 958 additions and 0 deletions.
70 changes: 70 additions & 0 deletions include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,76 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;

def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrai_q_256 : GCCBuiltin<"__builtin_ia32_psraqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx512_psll_d_512 : GCCBuiltin<"__builtin_ia32_pslld512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_psll_q_512 : GCCBuiltin<"__builtin_ia32_psllq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx512_psrl_d_512 : GCCBuiltin<"__builtin_ia32_psrld512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrl_q_512 : GCCBuiltin<"__builtin_ia32_psrlq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx512_psra_d_512 : GCCBuiltin<"__builtin_ia32_psrad512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_psra_q_512 : GCCBuiltin<"__builtin_ia32_psraq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;

def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_pslli_d_512 : GCCBuiltin<"__builtin_ia32_pslldi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_pslli_q_512 : GCCBuiltin<"__builtin_ia32_psllqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrli_w_512 : GCCBuiltin<"__builtin_ia32_psrlwi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrai_w_512 : GCCBuiltin<"__builtin_ia32_psrawi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrai_d_512 : GCCBuiltin<"__builtin_ia32_psradi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrai_q_512 : GCCBuiltin<"__builtin_ia32_psraqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_mask_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
Expand Down
22 changes: 22 additions & 0 deletions lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,28 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_w_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psrai_d_512, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_psrai_q_128, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_psrai_q_256, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_psrai_q_512, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_psrai_w_512, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_psrl_d_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_psrl_q_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_psrl_w_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0),
X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0),
X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0),
Expand Down
Loading

0 comments on commit 7fc9a56

Please sign in to comment.