Skip to content

Commit

Permalink
[CostModel][x86] Improved cost model for alternate shuffles.
Browse files Browse the repository at this point in the history
This patch:
 1) Improves the cost model for x86 alternate shuffles (originally
added at revision 211339);
 2) Teaches the Cost Model Analysis pass how to analyze alternate shuffles.

Alternate shuffles are a special kind of blend; on x86, we can often
easily lowered alternate shuffled into single blend
instruction (depending on the subtarget features).

The existing cost model didn't take into account subtarget features.
Also, it had a couple of "dead" entries for vector types that are never
legal (example: on x86 types v2i32 and v2f32 are not legal; those are
always either promoted or widened to 128-bit vector types).

The new x86 cost model takes into account what target features we have
before returning the shuffle cost (i.e. the number of instructions
after the blend is lowered/expanded).

This patch also teaches the Cost Model Analysis how to identify and analyze
alternate shuffles (i.e. 'SK_Alternate' shufflevector instructions):
 - added function 'isAlternateVectorMask';
 - added some logic to check if an instruction is a alternate shuffle and, in
   case, call the target specific TTI to get the corresponding shuffle cost;
 - added a test to verify the cost model analysis on alternate shuffles.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212296 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
adibiagio committed Jul 3, 2014
1 parent 26272de commit 60e9a53
Show file tree
Hide file tree
Showing 3 changed files with 468 additions and 20 deletions.
37 changes: 34 additions & 3 deletions lib/Analysis/CostModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,31 @@ static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
return true;
}

static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
bool isAlternate = true;
unsigned MaskSize = Mask.size();

// Example: shufflevector A, B, <0,5,2,7>
for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
if (Mask[i] < 0)
continue;
isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
}

if (isAlternate)
return true;

isAlternate = true;
// Example: shufflevector A, B, <4,1,6,3>
for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
if (Mask[i] < 0)
continue;
isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
}

return isAlternate;
}

static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
TargetTransformInfo::OperandValueKind OpInfo =
TargetTransformInfo::OK_AnyValue;
Expand Down Expand Up @@ -466,9 +491,15 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
unsigned NumVecElems = VecTypOp0->getVectorNumElements();
SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
nullptr);
if (NumVecElems == Mask.size()) {
if (isReverseVectorMask(Mask))
return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
0, nullptr);
if (isAlternateVectorMask(Mask))
return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
VecTypOp0, 0, nullptr);
}

return -1;
}
case Instruction::Call:
Expand Down
104 changes: 87 additions & 17 deletions lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,29 +417,99 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
}

if (Kind == SK_Alternate) {
static const CostTblEntry<MVT::SimpleValueType> X86AltShuffleTbl[] = {
// Alt shuffle cost table for X86. Cost is the number of instructions
// required to create the shuffled vector.
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);

// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
if (ST->hasAVX2() && LT.second == MVT::v16i16)
return LT.first;

{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd

{ISD::VECTOR_SHUFFLE, MVT::v2i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps

{ISD::VECTOR_SHUFFLE, MVT::v4i16, 8},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8},
// This shuffle is custom lowered into a sequence of:
// 2x vextractf128 , 2x vpblendw , 1x vinsertf128
{ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},

{ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}};
// This shuffle is custom lowered into a long sequence of:
// 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
};

std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
if (ST->hasAVX()) {
int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * AVXAltShuffleTbl[Idx].Cost;
}

static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
// These are lowered into movsd.
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},

// packed float vectors with four elements are lowered into BLENDI dag
// nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},

// This shuffle generates a single pshufw.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},

// There is no instruction that matches a v16i8 alternate shuffle.
// The backend will expand it into the sequence 'pshufb + pshufb + or'.
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
};

if (ST->hasSSE41()) {
int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * SSE41AltShuffleTbl[Idx].Cost;
}

static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd

// SSE3 doesn't have 'blendps'. The following shuffles are expanded into
// the sequence 'shufps + pshufd'
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},

{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
};

if (ST->hasSSSE3()) {
int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
}

int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
return LT.first * X86AltShuffleTbl[Idx].Cost;
static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd

{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd

// This is expanded into a long sequence of four extract + four insert.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.

// 8 x (pinsrw + pextrw + and + movb + movzb + or)
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
};

// Fall-back (SSE3 and SSE2).
int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * SSEAltShuffleTbl[Idx].Cost;
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}

return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
Expand Down
Loading

0 comments on commit 60e9a53

Please sign in to comment.