Skip to content

Commit

Permalink
[x86] autoupgrade and remove AVX2 integer min/max intrinsics
Browse files Browse the repository at this point in the history
This will (hopefully very temporarily) break clang.
The clang side of this should be the next commit.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272932 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
rotateright committed Jun 16, 2016
1 parent 6b7e0cc commit a240c3e
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 91 deletions.
36 changes: 0 additions & 36 deletions include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1934,42 +1934,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".

// Vector min, max
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx512_mask_pmaxs_b_128 : GCCBuiltin<"__builtin_ia32_pmaxsb128_mask">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
Expand Down
14 changes: 10 additions & 4 deletions lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.sse2.pminu.b" ||
Name == "x86.sse41.pminuw" ||
Name == "x86.sse41.pminud" ||
Name.startswith("x86.avx2.pmax") ||
Name.startswith("x86.avx2.pmin") ||
Name.startswith("x86.avx2.vbroadcast") ||
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
Expand Down Expand Up @@ -566,19 +568,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (Name == "llvm.x86.sse41.pmaxsb" ||
Name == "llvm.x86.sse2.pmaxs.w" ||
Name == "llvm.x86.sse41.pmaxsd") {
Name == "llvm.x86.sse41.pmaxsd" ||
Name.startswith("llvm.x86.avx2.pmaxs")) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
} else if (Name == "llvm.x86.sse2.pmaxu.b" ||
Name == "llvm.x86.sse41.pmaxuw" ||
Name == "llvm.x86.sse41.pmaxud") {
Name == "llvm.x86.sse41.pmaxud" ||
Name.startswith("llvm.x86.avx2.pmaxu")) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
} else if (Name == "llvm.x86.sse41.pminsb" ||
Name == "llvm.x86.sse2.pmins.w" ||
Name == "llvm.x86.sse41.pminsd") {
Name == "llvm.x86.sse41.pminsd" ||
Name.startswith("llvm.x86.avx2.pmins")) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
} else if (Name == "llvm.x86.sse2.pminu.b" ||
Name == "llvm.x86.sse41.pminuw" ||
Name == "llvm.x86.sse41.pminud") {
Name == "llvm.x86.sse41.pminud" ||
Name.startswith("llvm.x86.avx2.pminu")) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
} else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
Name == "llvm.x86.sse2.cvtps2pd" ||
Expand Down
12 changes: 0 additions & 12 deletions lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,18 +291,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
Expand Down
72 changes: 36 additions & 36 deletions test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1931,11 +1931,11 @@ define <4 x i64> @test_mm256_max_epi8(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
%cmp = icmp sgt <32 x i8> %arg0, %arg1
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
%bc = bitcast <32 x i8> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_max_epi16:
Expand All @@ -1949,11 +1949,11 @@ define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
%cmp = icmp sgt <16 x i16> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
%bc = bitcast <16 x i16> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_max_epi32:
Expand All @@ -1967,11 +1967,11 @@ define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %arg0, <8 x i32> %arg1)
%bc = bitcast <8 x i32> %res to <4 x i64>
%cmp = icmp sgt <8 x i32> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
%bc = bitcast <8 x i32> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone

define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_max_epu8:
Expand All @@ -1985,11 +1985,11 @@ define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
%cmp = icmp ugt <32 x i8> %arg0, %arg1
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
%bc = bitcast <32 x i8> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_max_epu16:
Expand All @@ -2003,11 +2003,11 @@ define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
%cmp = icmp ugt <16 x i16> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
%bc = bitcast <16 x i16> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_max_epu32:
Expand All @@ -2021,11 +2021,11 @@ define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %arg0, <8 x i32> %arg1)
%bc = bitcast <8 x i32> %res to <4 x i64>
%cmp = icmp ugt <8 x i32> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
%bc = bitcast <8 x i32> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone

define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epi8:
Expand All @@ -2039,11 +2039,11 @@ define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
%cmp = icmp slt <32 x i8> %arg0, %arg1
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
%bc = bitcast <32 x i8> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epi16:
Expand All @@ -2057,11 +2057,11 @@ define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
%cmp = icmp slt <16 x i16> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
%bc = bitcast <16 x i16> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epi32:
Expand All @@ -2075,11 +2075,11 @@ define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %arg0, <8 x i32> %arg1)
%bc = bitcast <8 x i32> %res to <4 x i64>
%cmp = icmp slt <8 x i32> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
%bc = bitcast <8 x i32> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone

define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epu8:
Expand All @@ -2093,11 +2093,11 @@ define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
%cmp = icmp ult <32 x i8> %arg0, %arg1
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
%bc = bitcast <32 x i8> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epu16:
Expand All @@ -2111,11 +2111,11 @@ define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
%cmp = icmp ult <16 x i16> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
%bc = bitcast <16 x i16> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_min_epu32:
Expand All @@ -2129,11 +2129,11 @@ define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %arg0, <8 x i32> %arg1)
%bc = bitcast <8 x i32> %res to <4 x i64>
%cmp = icmp ult <8 x i32> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
%bc = bitcast <8 x i32> %sel to <4 x i64>
ret <4 x i64> %bc
}
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone

define i32 @test_mm256_movemask_epi8(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_movemask_epi8:
Expand Down
Loading

0 comments on commit a240c3e

Please sign in to comment.