Skip to content

Commit

Permalink
SLPVectorizer: Only vectorize intrinsics whose operands are widened e…
Browse files Browse the repository at this point in the history
…qually

The vectorizer only knows how to vectorize intrinics by widening all operands by
the same factor.

Patch by Tyler Nowicki!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
aschwaighofer committed Apr 9, 2014
1 parent 0d5d656 commit b0ee237
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 26 deletions.
54 changes: 54 additions & 0 deletions include/llvm/Transforms/Utils/VectorUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines some vectorizer utilities.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H

namespace llvm {

/// \brief Identify if the intrinsic is trivially vectorizable.
///
/// This method returns true if the intrinsic's argument types are all
/// scalars for the scalar form of the intrinsic and all vectors for
/// the vector form of the intrinsic.
static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::ctpop:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
return true;
default:
return false;
}
}

} // llvm namespace

#endif
31 changes: 6 additions & 25 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>

Expand Down Expand Up @@ -2266,32 +2267,12 @@ static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
switch (II->getIntrinsicID()) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
Intrinsic::ID ID = II->getIntrinsicID();
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
ID == Intrinsic::lifetime_end)
return ID;
else
return Intrinsic::not_intrinsic;
}
}

if (!TLI)
Expand Down
5 changes: 4 additions & 1 deletion lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>

Expand Down Expand Up @@ -949,7 +950,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
if (II==NULL) {
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;

if (!isTriviallyVectorizable(ID)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
Expand Down
36 changes: 36 additions & 0 deletions test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000

target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx--nvidiacl"

; CTLZ cannot be vectorized currently because the second argument is a scalar
; for both the scalar and vector forms of the intrinsic. In the future it
; should be possible to vectorize such functions.
; Test causes an assert if LLVM tries to vectorize CTLZ.

define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
entry:
%0 = extractelement <2 x i8> %x, i32 0
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}

define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
entry:
%0 = extractelement <2 x i8> %x, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}

declare i8 @llvm.ctlz.i8(i8, i1) #3

attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

0 comments on commit b0ee237

Please sign in to comment.