From aa96bc9c68c6f61ff514b6a828cdb53136b4184c Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Wed, 1 Feb 2017 17:45:46 +0000 Subject: [PATCH] [LV] Move interleaved access helper functions to VectorUtils (NFC) This patch moves some helper functions related to interleaved access vectorization out of LoopVectorize.cpp and into VectorUtils.cpp. We would like to use these functions in a follow-on patch that improves interleaved load and store lowering in (ARM/AArch64)ISelLowering.cpp. One of the functions was already duplicated there and has been removed. Differential Revision: https://reviews.llvm.org/D29398 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293788 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/VectorUtils.h | 53 +++++++++++ lib/Analysis/VectorUtils.cpp | 85 +++++++++++++++++ lib/Target/AArch64/AArch64ISelLowering.cpp | 17 +--- lib/Target/ARM/ARMISelLowering.cpp | 17 +--- lib/Transforms/Vectorize/LoopVectorize.cpp | 102 +-------------------- 5 files changed, 147 insertions(+), 127 deletions(-) diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index 7b154530f184..6315e8408f05 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -16,6 +16,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { @@ -123,6 +124,58 @@ computeMinimumValueSizes(ArrayRef Blocks, /// This function always sets a (possibly null) value for each K in Kinds. Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +/// \brief Create an interleave shuffle mask. +/// +/// This function creates a shuffle mask for interleaving \p NumVecs vectors of +/// vectorization factor \p VF into a single wide vector. The mask is of the +/// form: +/// +/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> +/// +/// For example, the mask for VF = 4 and NumVecs = 2 is: +/// +/// <0, 4, 1, 5, 2, 6, 3, 7>. +Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs); + +/// \brief Create a stride shuffle mask. +/// +/// This function creates a shuffle mask whose elements begin at \p Start and +/// are incremented by \p Stride. The mask can be used to deinterleave an +/// interleaved vector into separate vectors of vectorization factor \p VF. The +/// mask is of the form: +/// +/// +/// +/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: +/// +/// <0, 2, 4, 6> +Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF); + +/// \brief Create a sequential shuffle mask. +/// +/// This function creates shuffle mask whose elements are sequential and begin +/// at \p Start. The mask contains \p NumInts integers and is padded with \p +/// NumUndefs undef values. The mask is of the form: +/// +/// +/// +/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: +/// +/// <0, 1, 2, 3, undef, undef, undef, undef> +Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs); + +/// \brief Concatenate a list of vectors. +/// +/// This function generates code that concatenate the vectors in \p Vecs into a +/// single large vector. The number of vectors should be greater than one, and +/// their element types should be the same. The number of elements in the +/// vectors should also be the same; however, if the last vector has fewer +/// elements, it will be padded with undefs. +Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef Vecs); + } // llvm namespace #endif diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 7e598f435ff5..722f17a8067e 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -488,3 +488,88 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { return Inst; } + +Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs) { + SmallVector Mask; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < NumVecs; j++) + Mask.push_back(Builder.getInt32(j * VF + i)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF) { + SmallVector Mask; + for (unsigned i = 0; i < VF; i++) + Mask.push_back(Builder.getInt32(Start + i * Stride)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs) { + SmallVector Mask; + for (unsigned i = 0; i < NumInts; i++) + Mask.push_back(Builder.getInt32(Start + i)); + + Constant *Undef = UndefValue::get(Builder.getInt32Ty()); + for (unsigned i = 0; i < NumUndefs; i++) + Mask.push_back(Undef); + + return ConstantVector::get(Mask); +} + +/// A helper function for concatenating vectors. This function concatenates two +/// vectors having the same element type. If the second vector has fewer +/// elements than the first, it is padded with undefs. +static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, + Value *V2) { + VectorType *VecTy1 = dyn_cast(V1->getType()); + VectorType *VecTy2 = dyn_cast(V2->getType()); + assert(VecTy1 && VecTy2 && + VecTy1->getScalarType() == VecTy2->getScalarType() && + "Expect two vectors with the same element type"); + + unsigned NumElts1 = VecTy1->getNumElements(); + unsigned NumElts2 = VecTy2->getNumElements(); + assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); + + if (NumElts1 > NumElts2) { + // Extend with UNDEFs. + Constant *ExtMask = + createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2); + V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); + } + + Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0); + return Builder.CreateShuffleVector(V1, V2, Mask); +} + +Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef Vecs) { + unsigned NumVecs = Vecs.size(); + assert(NumVecs > 1 && "Should be at least two vectors"); + + SmallVector ResList; + ResList.append(Vecs.begin(), Vecs.end()); + do { + SmallVector TmpList; + for (unsigned i = 0; i < NumVecs - 1; i += 2) { + Value *V0 = ResList[i], *V1 = ResList[i + 1]; + assert((V0->getType() == V1->getType() || i == NumVecs - 2) && + "Only the last vector may have a different type"); + + TmpList.push_back(concatenateTwoVectors(Builder, V0, V1)); + } + + // Push the last vector if the total number of vectors is odd. + if (NumVecs % 2 != 0) + TmpList.push_back(ResList[NumVecs - 1]); + + ResList = TmpList; + NumVecs = ResList.size(); + } while (NumVecs > 1); + + return ResList[0]; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7b581a706fa2..8fb9c5c4a38e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -7315,18 +7316,6 @@ bool AArch64TargetLowering::lowerInterleavedLoad( return true; } -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); - - return ConstantVector::get(Mask); -} - /// \brief Lower an interleaved store into a stN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): @@ -7408,7 +7397,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); + Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { @@ -7423,7 +7412,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2306b76cc57a..247aa98d9f7c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/IntrinsicLowering.h" @@ -13342,18 +13343,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( return true; } -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); - - return ConstantVector::get(Mask); -} - /// \brief Lower an interleaved store into a vstN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): @@ -13439,7 +13428,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); + Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { @@ -13454,7 +13443,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 4645714faeba..e75a47d51d10 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2550,102 +2550,6 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { "reverse"); } -// Get a mask to interleave \p NumVec vectors into a wide vector. -// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...> -// E.g. For 2 interleaved vectors, if VF is 4, the mask is: -// <0, 4, 1, 5, 2, 6, 3, 7> -static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF, - unsigned NumVec) { - SmallVector Mask; - for (unsigned i = 0; i < VF; i++) - for (unsigned j = 0; j < NumVec; j++) - Mask.push_back(Builder.getInt32(j * VF + i)); - - return ConstantVector::get(Mask); -} - -// Get the strided mask starting from index \p Start. -// I.e. -static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start, - unsigned Stride, unsigned VF) { - SmallVector Mask; - for (unsigned i = 0; i < VF; i++) - Mask.push_back(Builder.getInt32(Start + i * Stride)); - - return ConstantVector::get(Mask); -} - -// Get a mask of two parts: The first part consists of sequential integers -// starting from 0, The second part consists of UNDEFs. -// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef> -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt, - unsigned NumUndef) { - SmallVector Mask; - for (unsigned i = 0; i < NumInt; i++) - Mask.push_back(Builder.getInt32(i)); - - Constant *Undef = UndefValue::get(Builder.getInt32Ty()); - for (unsigned i = 0; i < NumUndef; i++) - Mask.push_back(Undef); - - return ConstantVector::get(Mask); -} - -// Concatenate two vectors with the same element type. The 2nd vector should -// not have more elements than the 1st vector. If the 2nd vector has less -// elements, extend it with UNDEFs. -static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1, - Value *V2) { - VectorType *VecTy1 = dyn_cast(V1->getType()); - VectorType *VecTy2 = dyn_cast(V2->getType()); - assert(VecTy1 && VecTy2 && - VecTy1->getScalarType() == VecTy2->getScalarType() && - "Expect two vectors with the same element type"); - - unsigned NumElts1 = VecTy1->getNumElements(); - unsigned NumElts2 = VecTy2->getNumElements(); - assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); - - if (NumElts1 > NumElts2) { - // Extend with UNDEFs. - Constant *ExtMask = - getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2); - V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); - } - - Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0); - return Builder.CreateShuffleVector(V1, V2, Mask); -} - -// Concatenate vectors in the given list. All vectors have the same type. -static Value *ConcatenateVectors(IRBuilder<> &Builder, - ArrayRef InputList) { - unsigned NumVec = InputList.size(); - assert(NumVec > 1 && "Should be at least two vectors"); - - SmallVector ResList; - ResList.append(InputList.begin(), InputList.end()); - do { - SmallVector TmpList; - for (unsigned i = 0; i < NumVec - 1; i += 2) { - Value *V0 = ResList[i], *V1 = ResList[i + 1]; - assert((V0->getType() == V1->getType() || i == NumVec - 2) && - "Only the last vector may have a different type"); - - TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1)); - } - - // Push the last vector if the total number of vectors is odd. - if (NumVec % 2 != 0) - TmpList.push_back(ResList[NumVec - 1]); - - ResList = TmpList; - NumVec = ResList.size(); - } while (NumVec > 1); - - return ResList[0]; -} - // Try to vectorize the interleave group that \p Instr belongs to. // // E.g. Translate following interleaved load group (factor = 3): @@ -2751,7 +2655,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { continue; VectorParts Entry(UF); - Constant *StrideMask = getStridedMask(Builder, I, InterleaveFactor, VF); + Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( NewLoads[Part], UndefVec, StrideMask, "strided.vec"); @@ -2795,10 +2699,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { } // Concatenate all vectors into a wide vector. - Value *WideVec = ConcatenateVectors(Builder, StoredVecs); + Value *WideVec = concatenateVectors(Builder, StoredVecs); // Interleave the elements in the wide vector. - Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor); + Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor); Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask, "interleaved.vec");