Skip to content

Commit

Permalink
[SLP] Added more missed optimization remarks
Browse files Browse the repository at this point in the history
Summary:
Added more remarks to SLP pass, in particular "missed" optimization remarks.
Also proposed several tests for new functionality.

Patch by Vladimir Miloserdov!

For reference you may look at: https://reviews.llvm.org/rL302811

Reviewers: anemet, fhahn

Reviewed By: anemet

Subscribers: javed.absar, lattner, petecoup, yakush, llvm-commits

Differential Revision: https://reviews.llvm.org/D38367

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318307 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
anemet committed Nov 15, 2017
1 parent 73e1b04 commit e074ad4
Show file tree
Hide file tree
Showing 7 changed files with 303 additions and 21 deletions.
88 changes: 74 additions & 14 deletions lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4452,19 +4452,51 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
if (MaxVF < 2)
return false;
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "SmallVF", I0)
<< "Cannot SLP vectorize list: vectorization factor "
<< "less than 2 is not supported";
});
return false;
}

for (Value *V : VL) {
Type *Ty = V->getType();
if (!isValidElementType(Ty))
if (!isValidElementType(Ty)) {
// NOTE: the following will give user internal llvm type name, which may not be useful
R.getORE()->emit([&]() {
std::string type_str;
llvm::raw_string_ostream rso(type_str);
Ty->print(rso);
return OptimizationRemarkMissed(
SV_NAME, "UnsupportedType", I0)
<< "Cannot SLP vectorize list: type "
<< rso.str() + " is unsupported by vectorizer";
});
return false;
}
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst || Inst->getOpcode() != Opcode0)

if (!Inst)
return false;
if (Inst->getOpcode() != Opcode0) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "InequableTypes", I0)
<< "Cannot SLP vectorize list: not all of the "
<< "parts of scalar instructions are of the same type: "
<< ore::NV("Instruction1Opcode", I0) << " and "
<< ore::NV("Instruction2Opcode", Inst);
});
return false;
}
}

bool Changed = false;
bool CandidateFound = false;
int MinCost = SLPCostThreshold;

// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
Expand Down Expand Up @@ -4518,14 +4550,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

R.computeMinimumValueSizes();
int Cost = R.getTreeCost();
CandidateFound = true;
MinCost = std::min(MinCost, Cost);

if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
cast<Instruction>(Ops[0]))
<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
cast<Instruction>(Ops[0]))
<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));

Value *VectorizedRoot = R.vectorizeTree();

Expand Down Expand Up @@ -4560,6 +4594,22 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}
}

if (!Changed && CandidateFound) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "NotBeneficial", I0)
<< "List vectorization was possible but not beneficial with cost "
<< ore::NV("Cost", MinCost) << " >= "
<< ore::NV("Treshold", -SLPCostThreshold);
});
} else if (!Changed) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "NotPossible", I0)
<< "Cannot SLP vectorize list: vectorization was impossible"
<< " with available vectorization factors";
});
}
return Changed;
}

Expand Down Expand Up @@ -5268,17 +5318,27 @@ class HorizontalReduction {
// Estimate cost.
int Cost =
V.getTreeCost() + getReductionCost(TTI, ReducedVals[i], ReduxWidth);
if (Cost >= -SLPCostThreshold)
break;
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "HorSLPNotBeneficial", cast<Instruction>(VL[0]))
<< "Vectorizing horizontal reduction is possible"
<< "but not beneficial with cost "
<< ore::NV("Cost", Cost) << " and threshold "
<< ore::NV("Threshold", -SLPCostThreshold);
});
break;
}

DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
<< ". (HorRdx)\n");
auto *I0 = cast<Instruction>(VL[0]);
V.getORE()->emit(
OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0)
V.getORE()->emit([&]() {
return OptimizationRemark(
SV_NAME, "VectorizedHorizontalReduction", cast<Instruction>(VL[0]))
<< "Vectorized horizontal reduction with cost "
<< ore::NV("Cost", Cost) << " and with tree size "
<< ore::NV("TreeSize", V.getTreeSize()));
<< ore::NV("TreeSize", V.getTreeSize());
});

// Vectorize a tree.
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Expand Down
12 changes: 8 additions & 4 deletions test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ target triple = "aarch64--linux-gnu"
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
; CHECK: sext i32 [[X]] to i64

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
Expand All @@ -37,7 +38,8 @@ target triple = "aarch64--linux-gnu"
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '5'

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
Expand Down Expand Up @@ -92,7 +94,8 @@ for.body:
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
; CHECK: sext i32 [[X]] to i64

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_2x32
; YAML-NEXT: Args:
Expand All @@ -101,7 +104,8 @@ for.body:
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '5'

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_2x32
; YAML-NEXT: Args:
Expand Down
9 changes: 6 additions & 3 deletions test/Transforms/SLPVectorizer/AArch64/horizontal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ target triple = "aarch64--linux"
; CHECK: load <4 x i32>
; CHECK: select <4 x i1>

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: test_select
; YAML-NEXT: Args:
Expand Down Expand Up @@ -108,7 +109,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK: load <4 x i32>
; CHECK: mul nsw <4 x i32>

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: reduction_with_br
; YAML-NEXT: Args:
Expand Down Expand Up @@ -175,7 +177,8 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK: load <8 x i8>
; CHECK: select <8 x i1>

; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: test_unrolled_select
; YAML-NEXT: Args:
Expand Down
78 changes: 78 additions & 0 deletions test/Transforms/SLPVectorizer/X86/remark_horcost.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s

define i32 @foo(i32* %diff) #0 {
entry:
%m2 = alloca [8 x [8 x i32]], align 16
%0 = bitcast [8 x [8 x i32]]* %m2 to i8*
br label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
%1 = shl i64 %indvars.iv, 3
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
%2 = load i32, i32* %arrayidx, align 4
%3 = or i64 %1, 4
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
%4 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %4, %2
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
store i32 %add3, i32* %arrayidx6, align 16

%add10 = add nsw i32 %add3, %a.088
%5 = or i64 %1, 1
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
%6 = load i32, i32* %arrayidx13, align 4
%7 = or i64 %1, 5
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
%8 = load i32, i32* %arrayidx16, align 4
%add17 = add nsw i32 %8, %6
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
store i32 %add17, i32* %arrayidx20, align 4

%add24 = add nsw i32 %add10, %add17
%9 = or i64 %1, 2
%arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
%10 = load i32, i32* %arrayidx27, align 4
%11 = or i64 %1, 6
%arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11
%12 = load i32, i32* %arrayidx30, align 4
%add31 = add nsw i32 %12, %10
%arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
store i32 %add31, i32* %arrayidx34, align 8

%add38 = add nsw i32 %add24, %add31
%13 = or i64 %1, 3
%arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
%14 = load i32, i32* %arrayidx41, align 4
%15 = or i64 %1, 7
%arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
%16 = load i32, i32* %arrayidx44, align 4

%add45 = add nsw i32 %16, %14
%arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
store i32 %add45, i32* %arrayidx48, align 4

%add52 = add nsw i32 %add38, %add45
; CHECK: add nsw <{{[0-9]+}} x i32>
; CHECK-NOT: add nsw <{{[0-9]+}} x i32>

; YAML: --- !Missed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: HorSLPNotBeneficial
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: Vectorizing horizontal reduction is possible
; YAML-NEXT: - String: 'but not beneficial with cost '
; YAML-NEXT: - Cost: '1'
; YAML-NEXT: - String: ' and threshold '
; YAML-NEXT: - Threshold: '0'

%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body
ret i32 %add52
}
43 changes: 43 additions & 0 deletions test/Transforms/SLPVectorizer/X86/remark_listcost.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s

define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
br label %1

%idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ]
%po.03 = phi i32* [ %pout, %0 ], [ %7, %1 ]
%ptmpi2.02 = phi i32* [ %pin2, %0 ], [ %4, %1 ]
%ptmpi1.01 = phi i32* [ %pin1, %0 ], [ %2, %1 ]
%2 = getelementptr inbounds i32, i32* %ptmpi1.01, i64 1
%3 = load i32, i32* %ptmpi1.01, align 4, !tbaa !1
%4 = getelementptr inbounds i32, i32* %ptmpi2.02, i64 1
%5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1
%6 = sub nsw i32 %3, %5
%7 = getelementptr inbounds i32, i32* %po.03, i64 1
; CHECK-NOT: <{{[0-9]+}} x i32>
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: NotBeneficial
; YAML-NEXT: Function: vsub2_test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost '
; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' >= '
; YAML-NEXT: - Treshold: '0'
store i32 %6, i32* %po.03, align 4, !tbaa !1
%8 = add nuw nsw i32 %idx.04, 1
%exitcond = icmp eq i32 %8, 64
br i1 %exitcond, label %9, label %1, !llvm.loop !5

ret void
}

!llvm.ident = !{!0}

!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = distinct !{!5, !6, !7}
!6 = !{!"llvm.loop.vectorize.width", i32 1}
!7 = !{!"llvm.loop.interleave.count", i32 1}
60 changes: 60 additions & 0 deletions test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s

define i32 @foo(i32* nocapture readonly %diff) #0 {
entry:
%m2 = alloca [8 x [8 x i32]], align 16
%0 = bitcast [8 x [8 x i32]]* %m2 to i8*
br label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%a.088 = phi i32 [ 0, %entry ], [ %add24, %for.body ]
%1 = shl i64 %indvars.iv, 3
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
%2 = load i32, i32* %arrayidx, align 4
%3 = or i64 %1, 4
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
%4 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %4, %2
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
store i32 %add3, i32* %arrayidx6, align 16
%add10 = add nsw i32 %add3, %a.088
%5 = or i64 %1, 1
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
%6 = load i32, i32* %arrayidx13, align 4
%7 = or i64 %1, 5
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
%8 = load i32, i32* %arrayidx16, align 4
%add17 = add nsw i32 %8, %6
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
store i32 %add17, i32* %arrayidx20, align 4
%add24 = add nsw i32 %add10, %add17

; CHECK-NOT: add nsw <{{[0-9]+}} x i32>
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: InequableTypes
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Cannot SLP vectorize list: not all of the '
; YAML-NEXT: - String: 'parts of scalar instructions are of the same type: '
; YAML-NEXT: - Instruction1Opcode: add
; YAML-NEXT: - String: ' and '
; YAML-NEXT: - Instruction2Opcode: phi

; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: NotPossible
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Cannot SLP vectorize list: vectorization was impossible'
; YAML-NEXT: - String: ' with available vectorization factors'

%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body
%arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0
ret i32 %add24
}

Loading

0 comments on commit e074ad4

Please sign in to comment.