Skip to content

Commit

Permalink
[X86] Remove masking from 512-bit PSHUFB intrinsics in preparation fo…
Browse files Browse the repository at this point in the history
…r being able to constant fold it in InstCombineCalls like we do for 128/256-bit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289344 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Dec 10, 2016
1 parent 6f7d674 commit df9e980
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 32 deletions.
7 changes: 3 additions & 4 deletions include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1328,10 +1328,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;

def int_x86_avx512_mask_pshuf_b_512 :
GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
def int_x86_avx512_pshuf_b_512 :
GCCBuiltin<"__builtin_ia32_pshufb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;

def int_x86_avx512_mask_shuf_f32x4_256 :
Expand Down
5 changes: 3 additions & 2 deletions lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9
Name == "avx512.mask.pshuf.b.128" || // Added in 4.0
Name == "avx512.mask.pshuf.b.256" || // Added in 4.0
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9
Name.startswith("avx512.mask.pmax") || // Added in 4.0
Expand Down Expand Up @@ -1451,6 +1450,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_pshuf_b;
else if (VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");

Expand Down
3 changes: 1 addition & 2 deletions lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1055,8 +1055,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
Expand Down Expand Up @@ -1456,6 +1454,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
Expand Down
24 changes: 24 additions & 0 deletions test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -968,3 +968,27 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1,
ret <32 x i16> %res4
}

declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)

define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm3
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm3
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
ret <64 x i8> %res2
}

56 changes: 42 additions & 14 deletions test/CodeGen/X86/avx512bw-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1747,29 +1747,57 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
ret <32 x i16> %res2
}

declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)

define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
ret <64 x i8> %res
}

define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
%res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
%mask.cast = bitcast i64 %mask to <64 x i1>
%res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
ret <64 x i8> %res2
}

define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
%mask.cast = bitcast i64 %mask to <64 x i1>
%res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
ret <64 x i8> %res2
}

Expand Down
14 changes: 4 additions & 10 deletions test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,7 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; X32: # BB#0:
; X32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X32-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X32-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
Expand All @@ -604,9 +602,9 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
;
; X64-LABEL: combine_pshufb_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X64-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X64-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
Expand Down Expand Up @@ -759,9 +757,7 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_pslldq_mask:
; X32: # BB#0:
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X32-NEXT: retl
;
Expand Down Expand Up @@ -790,9 +786,7 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_psrldq_mask:
; X32: # BB#0:
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
Expand Down

0 comments on commit df9e980

Please sign in to comment.