Skip to content

Commit

Permalink
Allow vectorization of intrinsics such as powi,cttz and ctlz in Loop …
Browse files Browse the repository at this point in the history
…and SLP Vectorizer.

This patch adds support to vectorize intrinsics such as powi, cttz and ctlz in Vectorizer. These intrinsics are different from other
intrinsics as second argument to these function must be same in order to vectorize them and it should be represented as a scalar.
Review: http://reviews.llvm.org/D3851#inline-32769 and http://reviews.llvm.org/D3937#inline-32857


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209873 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
kvbhat committed May 30, 2014
1 parent 102187e commit 8ffc96a
Show file tree
Hide file tree
Showing 5 changed files with 426 additions and 2 deletions.
15 changes: 15 additions & 0 deletions include/llvm/Transforms/Utils/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,27 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::powi:
return true;
default:
return false;
}
}

static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
switch (ID) {
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::powi:
return (ScalarOpdIdx == 1);
default:
return false;
}
}

static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
Expand Down
15 changes: 15 additions & 0 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3123,9 +3123,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
scalarizeInstruction(it);
break;
default:
bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
if (HasScalarOpd && i == 1) {
Args.push_back(CI->getArgOperand(i));
continue;
}
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
Expand Down Expand Up @@ -3474,6 +3479,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}

// Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
// second argument is the same (i.e. loop invariant)
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
return false;
}
}

// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
Expand Down
29 changes: 27 additions & 2 deletions lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -961,9 +961,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}

Function *Int = CI->getCalledFunction();

Value *A1I = nullptr;
if (hasVectorInstrinsicScalarOpd(ID, 1))
A1I = CI->getArgOperand(1);
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
Expand All @@ -973,6 +974,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
<< "\n");
return;
}
// ctlz,cttz and powi are special intrinsics whose second argument
// should be same in order for them to be vectorized.
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
<< "\n");
return;
}
}
}

newTreeEntry(VL, true);
Expand Down Expand Up @@ -1652,9 +1665,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);
Function *FI;
Intrinsic::ID IID = Intrinsic::not_intrinsic;
if (CI && (FI = CI->getCalledFunction())) {
IID = (Intrinsic::ID) FI->getIntrinsicID();
}
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
// ctlz,cttz and powi are special intrinsics whose second argument is
// a scalar. This argument should not be vectorized.
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
CallInst *CEI = cast<CallInst>(E->Scalars[0]);
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
OpVL.push_back(CEI->getArgOperand(j));
Expand Down
102 changes: 102 additions & 0 deletions test/Transforms/LoopVectorize/intrinsic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body
ret void
}

declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone

;CHECK-LABEL: @powi_f64(
;CHECK: llvm.powi.v4f64
;CHECK: ret void
define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
%0 = load double* %arrayidx, align 8
%call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
store double %call, double* %arrayidx4, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}

;CHECK-LABEL: @powi_f64_neg(
;CHECK-NOT: llvm.powi.v4f64
;CHECK: ret void
define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
%0 = load double* %arrayidx, align 8
%1 = trunc i64 %indvars.iv to i32
%call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
store double %call, double* %arrayidx4, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}

declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone

;CHECK-LABEL: @cttz_f64(
;CHECK: llvm.cttz.v4i64
;CHECK: ret void
define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
%0 = load i64* %arrayidx, align 8
%call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
store i64 %call, i64* %arrayidx4, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}

declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone

;CHECK-LABEL: @ctlz_f64(
;CHECK: llvm.ctlz.v4i64
;CHECK: ret void
define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
%0 = load i64* %arrayidx, align 8
%call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
store i64 %call, i64* %arrayidx4, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}
Loading

0 comments on commit 8ffc96a

Please sign in to comment.