Skip to content

Commit

Permalink
[X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. …
Browse files Browse the repository at this point in the history
…NFCI

Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286045 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed Nov 5, 2016
1 parent 30c59a5 commit cc24df9
Showing 1 changed file with 24 additions and 20 deletions.
44 changes: 24 additions & 20 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7558,12 +7558,12 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Original,
const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
SmallVector<int, 8> Mask(Original.begin(), Original.end());
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
bool ForceV1Zero = false, ForceV2Zero = false;

// Attempt to generate the binary blend mask. If an input is zero then
Expand Down Expand Up @@ -9047,6 +9047,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
Expand Down Expand Up @@ -9108,7 +9109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Use dedicated unpack instructions for masks that match their pattern.
Expand Down Expand Up @@ -9201,7 +9202,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Use dedicated unpack instructions for masks that match their pattern.
Expand Down Expand Up @@ -9396,7 +9397,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

if (Subtarget.hasSSE41()) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Use INSERTPS if we can complete the shuffle efficiently.
Expand Down Expand Up @@ -9485,7 +9486,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
Expand Down Expand Up @@ -10125,7 +10126,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
Expand Down Expand Up @@ -10402,8 +10403,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// important as a single pshufb is significantly faster for that.
if (V1InUse && V2InUse) {
if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
Mask, Subtarget, DAG))
if (SDValue Blend = lowerVectorShuffleAsBlend(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Blend;

// We can use an unpack to do the blending rather than an or in some
Expand Down Expand Up @@ -10536,7 +10537,7 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
case MVT::v2i64:
return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v2f64:
return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i32:
return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4f32:
Expand Down Expand Up @@ -10841,6 +10842,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// TODO: If minimizing size and one of the inputs is a zero vector and the
Expand All @@ -10849,7 +10851,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,

// Blends are faster and handle all the non-lane-crossing cases.
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
Expand Down Expand Up @@ -11359,6 +11361,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
Expand All @@ -11369,7 +11372,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return V;

if (V2.isUndef()) {
Expand Down Expand Up @@ -11413,7 +11416,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Check if the blend happens to exactly fit that of SHUFPD.
Expand Down Expand Up @@ -11464,11 +11467,11 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return V;

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Check for being able to broadcast a single element.
Expand Down Expand Up @@ -11530,6 +11533,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
Expand All @@ -11538,7 +11542,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Check for being able to broadcast a single element.
Expand Down Expand Up @@ -11632,7 +11636,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return ZExt;

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Check for being able to broadcast a single element.
Expand Down Expand Up @@ -11717,7 +11721,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Use dedicated unpack instructions for masks that match their pattern.
Expand Down Expand Up @@ -11803,7 +11807,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;

if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Blend;

// Use dedicated unpack instructions for masks that match their pattern.
Expand Down Expand Up @@ -11900,11 +11904,11 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

switch (VT.SimpleTy) {
case MVT::v4f64:
return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i64:
return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8f32:
return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i32:
return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i16:
Expand Down

0 comments on commit cc24df9

Please sign in to comment.