Skip to content

Commit

Permalink
[X86][SSE] Improve shuffle combining of PACKSS instructions.
Browse files Browse the repository at this point in the history
Support unary packing and fix the faux shuffle mask for vectors larger than 128 bits.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314629 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed Oct 1, 2017
1 parent 7ea96b3 commit 62a43ff
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 20 deletions.
30 changes: 24 additions & 6 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5932,16 +5932,34 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case X86ISD::PACKSS: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
"Unexpected input value type");

// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
return false;

Ops.push_back(N.getOperand(0));
Ops.push_back(N.getOperand(1));
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(i * 2);
bool IsUnary = (N0 == N1);
unsigned Offset = IsUnary ? 0 : NumElts;
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumEltsPerLane = NumElts / NumLanes;
unsigned HalfEltsPerLane = NumEltsPerLane / 2;

Ops.push_back(N0);
if (!IsUnary)
Ops.push_back(N1);

for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
for (unsigned Elt = 0; Elt != HalfEltsPerLane; ++Elt)
Mask.push_back((Elt * 2) + (Lane * NumEltsPerLane));
for (unsigned Elt = 0; Elt != HalfEltsPerLane; ++Elt)
Mask.push_back((Elt * 2) + (Lane * NumEltsPerLane) + Offset);
}
return true;
}
case X86ISD::VSHLI:
Expand Down
14 changes: 4 additions & 10 deletions test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -808,15 +808,13 @@ define <16 x i16> @shuffle_combine_packssdw_pshufb(<8 x i32> %a0) {
; X32-LABEL: shuffle_combine_packssdw_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,8,9,4,5,0,1,12,13,8,9,4,5,0,1,16,17,20,21,24,25,28,29,28,29,24,25,20,21,16,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packssdw_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsrad $31, %ymm0, %ymm0
; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,8,9,4,5,0,1,12,13,8,9,4,5,0,1,16,17,20,21,24,25,28,29,28,29,24,25,20,21,16,17]
; X64-NEXT: retq
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1)
Expand All @@ -829,17 +827,13 @@ define <32 x i8> @shuffle_combine_packsswb_pshufb(<16 x i16> %a0, <16 x i16> %a1
; X32-LABEL: shuffle_combine_packsswb_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
; X32-NEXT: vpsraw $15, %ymm1, %ymm1
; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0,30,28,26,24,22,20,18,16,30,28,26,24,22,20,18,16]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packsswb_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsraw $15, %ymm0, %ymm0
; X64-NEXT: vpsraw $15, %ymm1, %ymm1
; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0,30,28,26,24,22,20,18,16,30,28,26,24,22,20,18,16]
; X64-NEXT: retq
%1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
Expand Down
6 changes: 2 additions & 4 deletions test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -644,15 +644,13 @@ define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) {
; SSE-LABEL: shuffle_combine_packssdw_pshufb:
; SSE: # BB#0:
; SSE-NEXT: psrad $31, %xmm0
; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packssdw_pshufb:
; AVX: # BB#0:
; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0]
; AVX-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
%2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1)
Expand Down

0 comments on commit 62a43ff

Please sign in to comment.