Skip to content

Commit

Permalink
[X86][AVX2] Fix SIGN_EXTEND vector handling on AVX2 targets.
Browse files Browse the repository at this point in the history
On AVX2 target we are poorly legalizing SIGN_EXTEND ops for which the input's legalized type doesn't have the same number of elements as the destination, resulting in an ANY_EXTEND followed by a SIGN_EXTEND_INREG.

This patch uses the existing SIGN_EXTEND -> SIGN_EXTEND_VECTOR_INREG combine to extend the input to the size of the result and using SIGN_EXTEND_VECTOR_INREG instead.

Differential Revision: http://reviews.llvm.org/D16994

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260210 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed Feb 9, 2016
1 parent a55dfdc commit 6a78b65
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 20 deletions.
26 changes: 20 additions & 6 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
setOperationAction(ISD::UMIN, MVT::v8i32, Legal);

setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);

// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
Expand Down Expand Up @@ -15576,16 +15580,26 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
MVT InVT = In.getSimpleValueType();
assert(VT.getSizeInBits() == InVT.getSizeInBits());

MVT SVT = VT.getVectorElementType();
MVT InSVT = InVT.getVectorElementType();
assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits());
assert(SVT.getSizeInBits() > InSVT.getSizeInBits());

if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
!(VT.is256BitVector() && Subtarget.hasInt256()))
return SDValue();

SDLoc dl(Op);

// For 256-bit vectors, we only need the lower (128-bit) half of the input.
if (VT.is256BitVector())
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
MVT::getVectorVT(InSVT, InVT.getVectorNumElements() / 2),
In, DAG.getIntPtrConstant(0, dl));

// SSE41 targets can use the pmovsx* instructions directly.
if (Subtarget.hasSSE41())
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
Expand Down Expand Up @@ -27915,10 +27929,10 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}

// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
if (VT.getSizeInBits() == 128) {
SDValue ExOp = ExtendVecSize(DL, N0, 128);
// If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
// ISD::SIGN_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::VSEXT.
if (VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256())) {
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
}

Expand Down
4 changes: 1 addition & 3 deletions test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1085,9 +1085,7 @@ define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
;
; AVX2-LABEL: sitofp_8i8_to_8f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-NEXT: vpslld $24, %ymm0, %ymm0
; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: retq
%shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand Down
14 changes: 3 additions & 11 deletions test/CodeGen/X86/vector-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,7 @@ define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp
;
; AVX2-LABEL: sext_16i8_to_8i32:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-NEXT: vpslld $24, %ymm0, %ymm0
; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX2-NEXT: retq
;
; X32-SSE41-LABEL: sext_16i8_to_8i32:
Expand Down Expand Up @@ -283,10 +281,7 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
;
; AVX2-LABEL: sext_16i8_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-NEXT: vpslld $24, %xmm0, %xmm0
; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX2-NEXT: retq
;
; X32-SSE41-LABEL: sext_16i8_to_4i64:
Expand Down Expand Up @@ -477,10 +472,7 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp
;
; AVX2-LABEL: sext_8i16_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpslld $16, %xmm0, %xmm0
; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX2-NEXT: retq
;
; X32-SSE41-LABEL: sext_8i16_to_4i64:
Expand Down

0 comments on commit 6a78b65

Please sign in to comment.