Skip to content

Commit

Permalink
[X86][SSE] psrl(w/d/q) and psll(w/d/q) bit shifts for SSE2
Browse files Browse the repository at this point in the history
Patch to match cases where shuffle masks can be reduced to bit shifts. Similar to byte shift shuffle matching from D5699.

Differential Revision: http://reviews.llvm.org/D6649

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228047 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed Feb 3, 2015
1 parent 8c775a4 commit 3d04e48
Show file tree
Hide file tree
Showing 9 changed files with 670 additions and 15 deletions.
108 changes: 108 additions & 0 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7808,6 +7808,79 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
return SDValue();
}

/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSRL(W/D/Q) and PSLL(W/D/Q)
/// SSE2 and AVX2 logical bit-shift instructions. The function matches
/// elements from one of the input vectors shuffled to the left or right
/// with zeroable elements 'shifted in'.
static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

int Size = Mask.size();
assert(Size == VT.getVectorNumElements() && "Unexpected mask size");

// PSRL : (little-endian) right bit shift.
// [ 1, zz, 3, zz]
// [ -1, -1, 7, zz]
// PSHL : (little-endian) left bit shift.
// [ zz, 0, zz, 2 ]
// [ -1, 4, zz, -1 ]
auto MatchBitShift = [&](int Shift, int Scale) -> SDValue {
MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
assert(TLI.isTypeLegal(ShiftVT) && "Illegal integer vector type");

bool MatchLeft = true, MatchRight = true;
for (int i = 0; i != Size; i += Scale) {
for (int j = 0; j != Shift; j++) {
MatchLeft &= Zeroable[i + j];
}
for (int j = Scale - Shift; j != Scale; j++) {
MatchRight &= Zeroable[i + j];
}
}
if (!(MatchLeft || MatchRight))
return SDValue();

bool MatchV1 = true, MatchV2 = true;
for (int i = 0; i != Size; i += Scale) {
unsigned Pos = MatchLeft ? i + Shift : i;
unsigned Low = MatchLeft ? i : i + Shift;
unsigned Len = Scale - Shift;
MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + Size);
}
if (!(MatchV1 || MatchV2))
return SDValue();

// Cast the inputs to ShiftVT to match VSRLI/VSHLI and back again.
unsigned OpCode = MatchLeft ? X86ISD::VSHLI : X86ISD::VSRLI;
int ShiftAmt = Shift * VT.getScalarSizeInBits();
SDValue V = MatchV1 ? V1 : V2;
V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, V);
};

// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
// keep doubling the size of the integer elements up to that. We can
// then shift the elements of the integer vector by whole multiples of
// their width within the elements of the larger integer vector. Test each
// multiple to see if we can find a match with the moved element indices
// and that the shifted in elements are all zeroable.
for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 64; Scale *= 2)
for (int Shift = 1; Shift != Scale; Shift++)
if (SDValue BitShift = MatchBitShift(Shift, Scale))
return BitShift;

// no match
return SDValue();
}

/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
Expand Down Expand Up @@ -8654,6 +8727,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(Mask, DAG));
}

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v4i32, V1, V2, Mask, DAG))
return Shift;

// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v4i32, V1, V2, Mask, DAG))
Expand Down Expand Up @@ -8739,6 +8817,11 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
Mask, Subtarget, DAG))
return Broadcast;

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i16, V, V, Mask, DAG))
return Shift;

// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V, V, Mask, DAG))
Expand Down Expand Up @@ -9356,6 +9439,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
"to be V1-input shuffles.");

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;

// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V1, V2, Mask, DAG))
Expand Down Expand Up @@ -9512,6 +9600,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
return Shift;

// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
Expand Down Expand Up @@ -10602,6 +10695,11 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
}

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i32, V1, V2, Mask, DAG))
return Shift;

// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
Expand Down Expand Up @@ -10685,6 +10783,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
}

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v16i16, V1, V2, Mask, DAG))
return Shift;

// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
Expand Down Expand Up @@ -10763,6 +10866,11 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
}

// Try to use bit shift instructions.
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v32i8, V1, V2, Mask, DAG))
return Shift;

// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
Expand Down
18 changes: 8 additions & 10 deletions test/CodeGen/X86/combine-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,14 @@ define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
; shuffle instruction when the shuffle indexes are not compatible.

define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test17:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; CHECK-NEXT: orps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
; CHECK-LABEL: test17:
; CHECK: # BB#0:
; CHECK-NEXT: psllq $32, %xmm0
; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
Expand Down
22 changes: 20 additions & 2 deletions test/CodeGen/X86/vec_insert-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
define <16 x i8> @t5(<16 x i8> %x) nounwind {
; CHECK-LABEL: t5:
; CHECK: # BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: psrlw $8, %xmm0
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
ret <16 x i8> %s
Expand All @@ -72,7 +72,7 @@ define <16 x i8> @t5(<16 x i8> %x) nounwind {
define <16 x i8> @t6(<16 x i8> %x) nounwind {
; CHECK-LABEL: t6:
; CHECK: # BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: psrlw $8, %xmm0
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s
Expand All @@ -86,3 +86,21 @@ define <16 x i8> @t7(<16 x i8> %x) nounwind {
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
ret <16 x i8> %s
}

define <16 x i8> @t8(<16 x i8> %x) nounwind {
; CHECK-LABEL: t8:
; CHECK: # BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
ret <16 x i8> %s
}

define <16 x i8> @t9(<16 x i8> %x) nounwind {
; CHECK-LABEL: t9:
; CHECK: # BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
ret <16 x i8> %s
}
108 changes: 105 additions & 3 deletions test/CodeGen/X86/vector-shuffle-128-v16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -705,21 +705,21 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSSE3-NEXT: pslld $24, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSE41-NEXT: pslld $24, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; AVX-NEXT: vpslld $24, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
Expand Down Expand Up @@ -1185,6 +1185,108 @@ entry:
ret void
}

;
; Shuffle to logical bit shifts
;

define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
; SSE: # BB#0:
; SSE-NEXT: psllw $8, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
; AVX: # BB#0:
; AVX-NEXT: vpsllw $8, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
; SSE: # BB#0:
; SSE-NEXT: pslld $24, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
; AVX: # BB#0:
; AVX-NEXT: vpslld $24, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
; SSE: # BB#0:
; SSE-NEXT: psllq $56, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
; AVX: # BB#0:
; AVX-NEXT: vpsllq $56, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
; SSE: # BB#0:
; SSE-NEXT: psllq $8, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
; AVX: # BB#0:
; AVX-NEXT: vpsllq $8, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
; SSE: # BB#0:
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $8, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
; SSE: # BB#0:
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vpsrld $16, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
ret <16 x i8> %shuffle
}

define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
; SSE: # BB#0:
; SSE-NEXT: psrlq $56, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $56, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
ret <16 x i8> %shuffle
}

define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
; SSE2-LABEL: PR12412:
; SSE2: # BB#0: # %entry
Expand Down
32 changes: 32 additions & 0 deletions test/CodeGen/X86/vector-shuffle-128-v4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1373,3 +1373,35 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
%shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}

;
; Shuffle to logical bit shifts
;

define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
; SSE-LABEL: shuffle_v4i32_z0zX:
; SSE: # BB#0:
; SSE-NEXT: psllq $32, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_z0zX:
; AVX: # BB#0:
; AVX-NEXT: vpsllq $32, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
ret <4 x i32> %shuffle
}

define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
; SSE-LABEL: shuffle_v4i32_1z3z:
; SSE: # BB#0:
; SSE-NEXT: psrlq $32, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_1z3z:
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $32, %xmm0
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
ret <4 x i32> %shuffle
}
Loading

0 comments on commit 3d04e48

Please sign in to comment.