forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SLPVectorizer: Only vectorize intrinsics whose operands are widened e…
…qually The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
1 parent
0d5d656
commit b0ee237
Showing
4 changed files
with
100 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file defines some vectorizer utilities. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H | ||
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H | ||
|
||
namespace llvm { | ||
|
||
/// \brief Identify if the intrinsic is trivially vectorizable. | ||
/// | ||
/// This method returns true if the intrinsic's argument types are all | ||
/// scalars for the scalar form of the intrinsic and all vectors for | ||
/// the vector form of the intrinsic. | ||
static inline bool isTriviallyVectorizable(Intrinsic::ID ID) { | ||
switch (ID) { | ||
case Intrinsic::sqrt: | ||
case Intrinsic::sin: | ||
case Intrinsic::cos: | ||
case Intrinsic::exp: | ||
case Intrinsic::exp2: | ||
case Intrinsic::log: | ||
case Intrinsic::log10: | ||
case Intrinsic::log2: | ||
case Intrinsic::fabs: | ||
case Intrinsic::copysign: | ||
case Intrinsic::floor: | ||
case Intrinsic::ceil: | ||
case Intrinsic::trunc: | ||
case Intrinsic::rint: | ||
case Intrinsic::nearbyint: | ||
case Intrinsic::round: | ||
case Intrinsic::ctpop: | ||
case Intrinsic::pow: | ||
case Intrinsic::fma: | ||
case Intrinsic::fmuladd: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
} // llvm namespace | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000 | ||
|
||
target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64" | ||
target triple = "nvptx--nvidiacl" | ||
|
||
; CTLZ cannot be vectorized currently because the second argument is a scalar | ||
; for both the scalar and vector forms of the intrinsic. In the future it | ||
; should be possible to vectorize such functions. | ||
; Test causes an assert if LLVM tries to vectorize CTLZ. | ||
|
||
define <2 x i8> @cltz_test(<2 x i8> %x) #0 { | ||
entry: | ||
%0 = extractelement <2 x i8> %x, i32 0 | ||
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false) | ||
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0 | ||
%1 = extractelement <2 x i8> %x, i32 1 | ||
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false) | ||
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1 | ||
ret <2 x i8> %vecinit2 | ||
} | ||
|
||
define <2 x i8> @cltz_test2(<2 x i8> %x) #1 { | ||
entry: | ||
%0 = extractelement <2 x i8> %x, i32 0 | ||
%1 = extractelement <2 x i8> %x, i32 1 | ||
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false) | ||
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false) | ||
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0 | ||
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1 | ||
ret <2 x i8> %vecinit2 | ||
} | ||
|
||
declare i8 @llvm.ctlz.i8(i8, i1) #3 | ||
|
||
attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
attributes #1 = { nounwind readnone } |