Skip to content
This repository has been archived by the owner on Jan 1, 2023. It is now read-only.

Commit

Permalink
Revert "[X86][SSE] Generalize X86ISD::BLENDI support to more value ty…
Browse files Browse the repository at this point in the history
…pes"

This reverts commit r353610.
It causes a miscompile visible in macro expansion in a bootstrapped clang.

http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20190211/626590.html

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353699 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
sam-mccall committed Feb 11, 2019
1 parent e970ce1 commit 2704d6f
Show file tree
Hide file tree
Showing 19 changed files with 385 additions and 290 deletions.
100 changes: 60 additions & 40 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10409,24 +10409,45 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
V2 = getZeroVector(VT, Subtarget, DAG, DL);

switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
case MVT::v4f64:
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v8f32:
assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");
LLVM_FALLTHROUGH;
case MVT::v2f64:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
// If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
// that instruction.
if (Subtarget.hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
LLVM_FALLTHROUGH;
case MVT::v8i16: {
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
Expand Down Expand Up @@ -10454,11 +10475,10 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
}
LLVM_FALLTHROUGH;
}
case MVT::v32i8:
assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");
LLVM_FALLTHROUGH;
case MVT::v16i8: {
assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");
case MVT::v16i8:
case MVT::v32i8: {
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");

// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
Expand Down Expand Up @@ -30954,11 +30974,34 @@ static bool matchBinaryPermuteShuffle(
return true;
}
} else {
// Determine a type compatible with X86ISD::BLENDI.
ShuffleVT = MaskVT;
if (Subtarget.hasAVX2()) {
if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v8i32;
else if (ShuffleVT == MVT::v2i64)
ShuffleVT = MVT::v4i32;
} else {
if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
ShuffleVT = MVT::v8i16;
else if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v4f64;
else if (ShuffleVT == MVT::v8i32)
ShuffleVT = MVT::v8f32;
}

if (!ShuffleVT.isFloatingPoint()) {
int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
BlendMask =
scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
}

V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
ShuffleVT = MaskVT;
return true;
}
}
Expand Down Expand Up @@ -32123,29 +32166,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,

return SDValue();
}
case X86ISD::BLENDI: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);

// blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
// TODO: Handle MVT::v16i16 repeated blend mask.
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
MVT SrcVT = N0.getOperand(0).getSimpleValueType();
if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
SrcVT.getScalarSizeInBits() >= 32) {
unsigned Mask = N.getConstantOperandVal(2);
unsigned Size = VT.getVectorNumElements();
unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
N1.getOperand(0),
DAG.getConstant(ScaleMask, DL, MVT::i8)));
}
}
return SDValue();
}
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
Expand Down
23 changes: 0 additions & 23 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -6507,22 +6507,6 @@ let Predicates = [HasAVX2] in {
VEX_4V, VEX_L, VEX_WIG;
}

// Emulate vXi32/vXi64 blends with vXf32/vXf64.
// ExecutionDomainFixPass will cleanup domains later on.
let Predicates = [HasAVX] in {
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), (iPTR imm:$src3)),
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)),
(VBLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
}

let Predicates = [HasAVX1Only] in {
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), (iPTR imm:$src3)),
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
(VBLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
}

defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
VR128, memop, f128mem, 1, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>;
Expand All @@ -6533,13 +6517,6 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
VR128, memop, i128mem, 1, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>;

let Predicates = [UseSSE41] in {
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)),
(BLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
(BLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
}

// For insertion into the zero index (low half) of a 256-bit vector, it is
// more efficient to generate a blend with immediate instead of an insert*128.
let Predicates = [HasAVX] in {
Expand Down
25 changes: 13 additions & 12 deletions test/CodeGen/X86/avx512-shuffles/partial_permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1912,8 +1912,8 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp, <2
define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_4xi64_to_2xi64_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %xmm2
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
; CHECK-NEXT: vmovdqa 16(%rdi), %xmm2
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1}
; CHECK-NEXT: retq
Expand All @@ -1927,8 +1927,8 @@ define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x
define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_4xi64_to_2xi64_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %xmm1
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
; CHECK-NEXT: vmovdqa 16(%rdi), %xmm1
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],mem[2,3]
; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
Expand Down Expand Up @@ -2553,8 +2553,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask7(<8 x i64>* %vp, <4
define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %xmm0
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovaps 32(%rdi), %xmm1
; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: retq
%vec = load <8 x i64>, <8 x i64>* %vp
%res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 4, i32 1>
Expand All @@ -2563,10 +2564,10 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_8xi64_to_2xi64_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %xmm2
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vmovdqa 32(%rdi), %xmm3
; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} = xmm3[0],xmm2[0]
; CHECK-NEXT: retq
%vec = load <8 x i64>, <8 x i64>* %vp
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 4, i32 1>
Expand All @@ -2578,10 +2579,10 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x
define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %xmm1
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vmovdqa 32(%rdi), %xmm2
; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm1[0]
; CHECK-NEXT: retq
%vec = load <8 x i64>, <8 x i64>* %vp
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 4, i32 1>
Expand Down
9 changes: 6 additions & 3 deletions test/CodeGen/X86/combine-sdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1701,7 +1701,8 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq $62, %xmm2, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpsrlq $2, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq $2, %xmm2, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,2305843009213693952]
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1889,7 +1890,8 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5
; AVX1-NEXT: vpsrlq $62, %xmm5, %xmm5
; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm5
; AVX1-NEXT: vpsrlq $2, %xmm5, %xmm5
; AVX1-NEXT: vpsrlq $2, %xmm5, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854775808,2305843009213693952]
; AVX1-NEXT: vpxor %xmm6, %xmm5, %xmm5
; AVX1-NEXT: vpsubq %xmm6, %xmm5, %xmm5
Expand All @@ -1909,7 +1911,8 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq $62, %xmm2, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpsrlq $2, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq $2, %xmm2, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm6, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
Expand Down
10 changes: 8 additions & 2 deletions test/CodeGen/X86/insertelement-ones.ll
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,21 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm1[6],ymm2[7,8,9,10,11,12,13],ymm1[14],ymm2[15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
;
; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm1[6],ymm2[7,8,9,10,11,12,13],ymm1[14],ymm2[15]
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX512-NEXT: retq
Expand Down
10 changes: 5 additions & 5 deletions test/CodeGen/X86/known-signbits-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: vpsrlq $32, %xmm0, %xmm0
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,0,1,0]
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,32768,0,0,1,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Expand All @@ -115,7 +115,7 @@ define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,8,0]
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsllq $20, %xmm0, %xmm0
Expand Down Expand Up @@ -231,7 +231,7 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,8,0]
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
Expand Down Expand Up @@ -272,7 +272,7 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,8,0]
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; X32-NEXT: vpmovsxdq %xmm1, %xmm1
Expand Down Expand Up @@ -322,7 +322,7 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4
; X32-NEXT: vextractf128 $1, %ymm2, %xmm5
; X32-NEXT: vpsrlq $33, %xmm5, %xmm5
; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [1073741824,0,1,0]
; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [0,16384,0,0,1,0,0,0]
; X32-NEXT: vpxor %xmm6, %xmm5, %xmm5
; X32-NEXT: vpsubq %xmm6, %xmm5, %xmm5
; X32-NEXT: vpsrlq $33, %xmm2, %xmm2
Expand Down
Loading

0 comments on commit 2704d6f

Please sign in to comment.