Skip to content

Commit

Permalink
[TTI] Let the cost model estimate ctpop costs based on legality
Browse files Browse the repository at this point in the history
PPC has a vector popcount, this lets the vectorizer use the correct cost
for it. Tweak X86 test to use an intrinsic that's actually scalarized (we
have a somewhat efficient lowering for vector popcount using SSE, the
cost model finds that now).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265005 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
d0k committed Mar 31, 2016
1 parent 4e6485e commit 638cd03
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
10 changes: 9 additions & 1 deletion include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) {
unsigned ISD = 0;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
default: {
// Assume that we need to scalarize this intrinsic.
Expand Down Expand Up @@ -725,6 +726,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_load:
return static_cast<T *>(this)
->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::ctpop:
ISD = ISD::CTPOP;
// In case of legalization use TCC_Expensive. This is cheaper than a
// library call but still not a cheap instruction.
SingleCallCost = TargetTransformInfo::TCC_Expensive;
break;
// FIXME: ctlz, cttz, ...
}

const TargetLoweringBase *TLI = getTLI();
Expand Down Expand Up @@ -785,7 +793,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}

// This is going to be turned into a library call, make it expensive.
return 10;
return SingleCallCost;
}

/// \brief Compute a cost of the given call instruction.
Expand Down
11 changes: 11 additions & 0 deletions test/Analysis/CostModel/PowerPC/popcnt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

define <4 x i32> @test1(<4 x i32> %arg) {
; CHECK: cost of 1 {{.*}} call <4 x i32> @llvm.ctpop.v4i32
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %arg)
ret <4 x i32> %ctpop
}

declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
16 changes: 8 additions & 8 deletions test/Analysis/CostModel/X86/scalarize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
declare %i4 @llvm.bswap.v4i32(%i4)
declare %i8 @llvm.bswap.v2i64(%i8)

declare %i4 @llvm.ctpop.v4i32(%i4)
declare %i8 @llvm.ctpop.v2i64(%i8)
declare %i4 @llvm.cttz.v4i32(%i4)
declare %i8 @llvm.cttz.v2i64(%i8)

; CHECK32-LABEL: test_scalarized_intrinsics
; CHECK64-LABEL: test_scalarized_intrinsics
Expand All @@ -28,12 +28,12 @@ define void @test_scalarized_intrinsics() {
; CHECK64: cost of 6 {{.*}}bswap.v2i64
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)

; CHECK32: cost of 12 {{.*}}ctpop.v4i32
; CHECK64: cost of 12 {{.*}}ctpop.v4i32
%r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
; CHECK32: cost of 10 {{.*}}ctpop.v2i64
; CHECK64: cost of 6 {{.*}}ctpop.v2i64
%r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
; CHECK32: cost of 12 {{.*}}cttz.v4i32
; CHECK64: cost of 12 {{.*}}cttz.v4i32
%r4 = call %i4 @llvm.cttz.v4i32(%i4 undef)
; CHECK32: cost of 10 {{.*}}cttz.v2i64
; CHECK64: cost of 6 {{.*}}cttz.v2i64
%r5 = call %i8 @llvm.cttz.v2i64(%i8 undef)

; CHECK32: ret
; CHECK64: ret
Expand Down

0 comments on commit 638cd03

Please sign in to comment.