Skip to content

Commit

Permalink
[LV] Move interleaved access helper functions to VectorUtils (NFC)
Browse files Browse the repository at this point in the history
This patch moves some helper functions related to interleaved access
vectorization out of LoopVectorize.cpp and into VectorUtils.cpp. We would like
to use these functions in a follow-on patch that improves interleaved load and
store lowering in (ARM/AArch64)ISelLowering.cpp. One of the functions was
already duplicated there and has been removed.

Differential Revision: https://reviews.llvm.org/D29398

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293788 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
mssimpso committed Feb 1, 2017
1 parent 460d4d4 commit aa96bc9
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 127 deletions.
53 changes: 53 additions & 0 deletions include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"

namespace llvm {

Expand Down Expand Up @@ -123,6 +124,58 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
/// This function always sets a (possibly null) value for each K in Kinds.
Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);

/// \brief Create an interleave shuffle mask.
///
/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
/// vectorization factor \p VF into a single wide vector. The mask is of the
/// form:
///
/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
///
/// For example, the mask for VF = 4 and NumVecs = 2 is:
///
/// <0, 4, 1, 5, 2, 6, 3, 7>.
Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs);

/// \brief Create a stride shuffle mask.
///
/// This function creates a shuffle mask whose elements begin at \p Start and
/// are incremented by \p Stride. The mask can be used to deinterleave an
/// interleaved vector into separate vectors of vectorization factor \p VF. The
/// mask is of the form:
///
/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
///
/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
///
/// <0, 2, 4, 6>
Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
unsigned Stride, unsigned VF);

/// \brief Create a sequential shuffle mask.
///
/// This function creates shuffle mask whose elements are sequential and begin
/// at \p Start. The mask contains \p NumInts integers and is padded with \p
/// NumUndefs undef values. The mask is of the form:
///
/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
///
/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
///
/// <0, 1, 2, 3, undef, undef, undef, undef>
Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
unsigned NumInts, unsigned NumUndefs);

/// \brief Concatenate a list of vectors.
///
/// This function generates code that concatenate the vectors in \p Vecs into a
/// single large vector. The number of vectors should be greater than one, and
/// their element types should be the same. The number of elements in the
/// vectors should also be the same; however, if the last vector has fewer
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);

} // llvm namespace

#endif
85 changes: 85 additions & 0 deletions lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,88 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

return Inst;
}

Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < NumVecs; j++)
Mask.push_back(Builder.getInt32(j * VF + i));

return ConstantVector::get(Mask);
}

Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
unsigned Stride, unsigned VF) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < VF; i++)
Mask.push_back(Builder.getInt32(Start + i * Stride));

return ConstantVector::get(Mask);
}

Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start,
unsigned NumInts, unsigned NumUndefs) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < NumInts; i++)
Mask.push_back(Builder.getInt32(Start + i));

Constant *Undef = UndefValue::get(Builder.getInt32Ty());
for (unsigned i = 0; i < NumUndefs; i++)
Mask.push_back(Undef);

return ConstantVector::get(Mask);
}

/// A helper function for concatenating vectors. This function concatenates two
/// vectors having the same element type. If the second vector has fewer
/// elements than the first, it is padded with undefs.
static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
Value *V2) {
VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
assert(VecTy1 && VecTy2 &&
VecTy1->getScalarType() == VecTy2->getScalarType() &&
"Expect two vectors with the same element type");

unsigned NumElts1 = VecTy1->getNumElements();
unsigned NumElts2 = VecTy2->getNumElements();
assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

if (NumElts1 > NumElts2) {
// Extend with UNDEFs.
Constant *ExtMask =
createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
}

Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
return Builder.CreateShuffleVector(V1, V2, Mask);
}

Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
unsigned NumVecs = Vecs.size();
assert(NumVecs > 1 && "Should be at least two vectors");

SmallVector<Value *, 8> ResList;
ResList.append(Vecs.begin(), Vecs.end());
do {
SmallVector<Value *, 8> TmpList;
for (unsigned i = 0; i < NumVecs - 1; i += 2) {
Value *V0 = ResList[i], *V1 = ResList[i + 1];
assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
"Only the last vector may have a different type");

TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
}

// Push the last vector if the total number of vectors is odd.
if (NumVecs % 2 != 0)
TmpList.push_back(ResList[NumVecs - 1]);

ResList = TmpList;
NumVecs = ResList.size();
} while (NumVecs > 1);

return ResList[0];
}
17 changes: 3 additions & 14 deletions lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -7315,18 +7316,6 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
return true;
}

/// \brief Get a mask consisting of sequential integers starting from \p Start.
///
/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
unsigned NumElts) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < NumElts; i++)
Mask.push_back(Builder.getInt32(Start + i));

return ConstantVector::get(Mask);
}

/// \brief Lower an interleaved store into a stN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
Expand Down Expand Up @@ -7408,7 +7397,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
for (unsigned i = 0; i < Factor; i++) {
if (Mask[i] >= 0) {
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
Expand All @@ -7423,7 +7412,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in isReInterleaveMask
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
}

Expand Down
17 changes: 3 additions & 14 deletions lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
Expand Down Expand Up @@ -13342,18 +13343,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
return true;
}

/// \brief Get a mask consisting of sequential integers starting from \p Start.
///
/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
unsigned NumElts) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < NumElts; i++)
Mask.push_back(Builder.getInt32(Start + i));

return ConstantVector::get(Mask);
}

/// \brief Lower an interleaved store into a vstN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
Expand Down Expand Up @@ -13439,7 +13428,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
for (unsigned i = 0; i < Factor; i++) {
if (Mask[i] >= 0) {
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
Expand All @@ -13454,7 +13443,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in isReInterleaveMask
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
}

Expand Down
102 changes: 3 additions & 99 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2550,102 +2550,6 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}

// Get a mask to interleave \p NumVec vectors into a wide vector.
// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...>
// E.g. For 2 interleaved vectors, if VF is 4, the mask is:
// <0, 4, 1, 5, 2, 6, 3, 7>
static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVec) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < NumVec; j++)
Mask.push_back(Builder.getInt32(j * VF + i));

return ConstantVector::get(Mask);
}

// Get the strided mask starting from index \p Start.
// I.e. <Start, Start + Stride, ..., Start + Stride*(VF-1)>
static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start,
unsigned Stride, unsigned VF) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < VF; i++)
Mask.push_back(Builder.getInt32(Start + i * Stride));

return ConstantVector::get(Mask);
}

// Get a mask of two parts: The first part consists of sequential integers
// starting from 0, The second part consists of UNDEFs.
// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef>
static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt,
unsigned NumUndef) {
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < NumInt; i++)
Mask.push_back(Builder.getInt32(i));

Constant *Undef = UndefValue::get(Builder.getInt32Ty());
for (unsigned i = 0; i < NumUndef; i++)
Mask.push_back(Undef);

return ConstantVector::get(Mask);
}

// Concatenate two vectors with the same element type. The 2nd vector should
// not have more elements than the 1st vector. If the 2nd vector has less
// elements, extend it with UNDEFs.
static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
Value *V2) {
VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
assert(VecTy1 && VecTy2 &&
VecTy1->getScalarType() == VecTy2->getScalarType() &&
"Expect two vectors with the same element type");

unsigned NumElts1 = VecTy1->getNumElements();
unsigned NumElts2 = VecTy2->getNumElements();
assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

if (NumElts1 > NumElts2) {
// Extend with UNDEFs.
Constant *ExtMask =
getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2);
V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
}

Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0);
return Builder.CreateShuffleVector(V1, V2, Mask);
}

// Concatenate vectors in the given list. All vectors have the same type.
static Value *ConcatenateVectors(IRBuilder<> &Builder,
ArrayRef<Value *> InputList) {
unsigned NumVec = InputList.size();
assert(NumVec > 1 && "Should be at least two vectors");

SmallVector<Value *, 8> ResList;
ResList.append(InputList.begin(), InputList.end());
do {
SmallVector<Value *, 8> TmpList;
for (unsigned i = 0; i < NumVec - 1; i += 2) {
Value *V0 = ResList[i], *V1 = ResList[i + 1];
assert((V0->getType() == V1->getType() || i == NumVec - 2) &&
"Only the last vector may have a different type");

TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1));
}

// Push the last vector if the total number of vectors is odd.
if (NumVec % 2 != 0)
TmpList.push_back(ResList[NumVec - 1]);

ResList = TmpList;
NumVec = ResList.size();
} while (NumVec > 1);

return ResList[0];
}

// Try to vectorize the interleave group that \p Instr belongs to.
//
// E.g. Translate following interleaved load group (factor = 3):
Expand Down Expand Up @@ -2751,7 +2655,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
continue;

VectorParts Entry(UF);
Constant *StrideMask = getStridedMask(Builder, I, InterleaveFactor, VF);
Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
NewLoads[Part], UndefVec, StrideMask, "strided.vec");
Expand Down Expand Up @@ -2795,10 +2699,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
}

// Concatenate all vectors into a wide vector.
Value *WideVec = ConcatenateVectors(Builder, StoredVecs);
Value *WideVec = concatenateVectors(Builder, StoredVecs);

// Interleave the elements in the wide vector.
Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor);
Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor);
Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
"interleaved.vec");

Expand Down

0 comments on commit aa96bc9

Please sign in to comment.