[SLP] Added more missed optimization remarks

Summary: Added more remarks to SLP pass, in particular "missed" optimization remarks. Also proposed several tests for new functionality. Patch by Vladimir Miloserdov! For reference you may look at: https://reviews.llvm.org/rL302811 Reviewers: anemet, fhahn Reviewed By: anemet Subscribers: javed.absar, lattner, petecoup, yakush, llvm-commits Differential Revision: https://reviews.llvm.org/D38367 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318307 91177308-0d34-0410-b5e6-96231b3b80d8
GiorgioNatili · Nov 15, 2017 · e074ad4 · e074ad4
1 parent 73e1b04
commit e074ad4
Show file tree

Hide file tree

Showing 7 changed files with 303 additions and 21 deletions.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4452,19 +4452,51 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   unsigned Sz = R.getVectorElementSize(I0);
   unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
   unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
-  if (MaxVF < 2)
-    return false;
+  if (MaxVF < 2) {
+     R.getORE()->emit([&]() {
+         return OptimizationRemarkMissed(
+                    SV_NAME, "SmallVF", I0)
+                << "Cannot SLP vectorize list: vectorization factor "
+                << "less than 2 is not supported";
+     });
+     return false;
+  }
 
   for (Value *V : VL) {
     Type *Ty = V->getType();
-    if (!isValidElementType(Ty))
+    if (!isValidElementType(Ty)) {
+      // NOTE: the following will give user internal llvm type name, which may not be useful
+      R.getORE()->emit([&]() {
+          std::string type_str;
+          llvm::raw_string_ostream rso(type_str);
+          Ty->print(rso);
+          return OptimizationRemarkMissed(
+                     SV_NAME, "UnsupportedType", I0)
+                 << "Cannot SLP vectorize list: type "
+                 << rso.str() + " is unsupported by vectorizer";
+      });
       return false;
+    }
     Instruction *Inst = dyn_cast<Instruction>(V);
-    if (!Inst || Inst->getOpcode() != Opcode0)
+
+    if (!Inst)
+        return false;
+    if (Inst->getOpcode() != Opcode0) {
+      R.getORE()->emit([&]() {
+          return OptimizationRemarkMissed(
+                     SV_NAME, "InequableTypes", I0)
+                 << "Cannot SLP vectorize list: not all of the "
+                 << "parts of scalar instructions are of the same type: "
+                 << ore::NV("Instruction1Opcode", I0) << " and "
+                 << ore::NV("Instruction2Opcode", Inst);
+      });
       return false;
+    }
   }
 
   bool Changed = false;
+  bool CandidateFound = false;
+  int MinCost = SLPCostThreshold;
 
   // Keep track of values that were deleted by vectorizing in the loop below.
   SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
@@ -4518,14 +4550,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
       R.computeMinimumValueSizes();
       int Cost = R.getTreeCost();
+      CandidateFound = true;
+      MinCost = std::min(MinCost, Cost);
 
       if (Cost < -SLPCostThreshold) {
         DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
         R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
-                                            cast<Instruction>(Ops[0]))
-                         << "SLP vectorized with cost " << ore::NV("Cost", Cost)
-                         << " and with tree size "
-                         << ore::NV("TreeSize", R.getTreeSize()));
+                                                    cast<Instruction>(Ops[0]))
+                                 << "SLP vectorized with cost " << ore::NV("Cost", Cost)
+                                 << " and with tree size "
+                                 << ore::NV("TreeSize", R.getTreeSize()));
 
         Value *VectorizedRoot = R.vectorizeTree();
 
@@ -4560,6 +4594,22 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
     }
   }
 
+  if (!Changed && CandidateFound) {
+    R.getORE()->emit([&]() {
+        return OptimizationRemarkMissed(
+                   SV_NAME, "NotBeneficial",  I0)
+               << "List vectorization was possible but not beneficial with cost "
+               << ore::NV("Cost", MinCost) << " >= "
+               << ore::NV("Treshold", -SLPCostThreshold);
+    });
+  } else if (!Changed) {
+    R.getORE()->emit([&]() {
+        return OptimizationRemarkMissed(
+                   SV_NAME, "NotPossible", I0)
+               << "Cannot SLP vectorize list: vectorization was impossible"
+               << " with available vectorization factors";
+    });
+  }
   return Changed;
 }
 
@@ -5268,17 +5318,27 @@ class HorizontalReduction {
       // Estimate cost.
       int Cost =
           V.getTreeCost() + getReductionCost(TTI, ReducedVals[i], ReduxWidth);
-      if (Cost >= -SLPCostThreshold)
-        break;
+      if (Cost >= -SLPCostThreshold) {
+          V.getORE()->emit([&]() {
+              return OptimizationRemarkMissed(
+                         SV_NAME, "HorSLPNotBeneficial", cast<Instruction>(VL[0]))
+                     << "Vectorizing horizontal reduction is possible"
+                     << "but not beneficial with cost "
+                     << ore::NV("Cost", Cost) << " and threshold "
+                     << ore::NV("Threshold", -SLPCostThreshold);
+          });
+          break;
+      }
 
       DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
                    << ". (HorRdx)\n");
-      auto *I0 = cast<Instruction>(VL[0]);
-      V.getORE()->emit(
-          OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0)
+      V.getORE()->emit([&]() {
+          return OptimizationRemark(
+                     SV_NAME, "VectorizedHorizontalReduction", cast<Instruction>(VL[0]))
           << "Vectorized horizontal reduction with cost "
           << ore::NV("Cost", Cost) << " and with tree size "
-          << ore::NV("TreeSize", V.getTreeSize()));
+          << ore::NV("TreeSize", V.getTreeSize());
+      });
 
       // Vectorize a tree.
       DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();

diff --git a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -28,7 +28,8 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
 ; CHECK: sext i32 [[X]] to i64
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedList
 ; YAML-NEXT: Function:        getelementptr_4x32
 ; YAML-NEXT: Args:
@@ -37,7 +38,8 @@ target triple = "aarch64--linux-gnu"
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '5'
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedList
 ; YAML-NEXT: Function:        getelementptr_4x32
 ; YAML-NEXT: Args:
@@ -92,7 +94,8 @@ for.body:
 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
 ; CHECK: sext i32 [[X]] to i64
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedList
 ; YAML-NEXT: Function:        getelementptr_2x32
 ; YAML-NEXT: Args:
@@ -101,7 +104,8 @@ for.body:
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '5'
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedList
 ; YAML-NEXT: Function:        getelementptr_2x32
 ; YAML-NEXT: Args:

diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -15,7 +15,8 @@ target triple = "aarch64--linux"
 ; CHECK: load <4 x i32>
 ; CHECK: select <4 x i1>
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedHorizontalReduction
 ; YAML-NEXT: Function:        test_select
 ; YAML-NEXT: Args:
@@ -108,7 +109,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
 ; CHECK: load <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedHorizontalReduction
 ; YAML-NEXT: Function:        reduction_with_br
 ; YAML-NEXT: Args:
@@ -175,7 +177,8 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK: load <8 x i8>
 ; CHECK: select <8 x i1>
 
-; YAML:      Pass:            slp-vectorizer
+; YAML:      --- !Passed
+; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedHorizontalReduction
 ; YAML-NEXT: Function:        test_unrolled_select
 ; YAML-NEXT: Args:

diff --git a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -0,0 +1,78 @@
+; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+define i32 @foo(i32* %diff) #0 {
+entry:
+  %m2 = alloca [8 x [8 x i32]], align 16
+  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
+  %1 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = or i64 %1, 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
+  %4 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %4, %2
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, i32* %arrayidx6, align 16
+
+  %add10 = add nsw i32 %add3, %a.088
+  %5 = or i64 %1, 1
+  %arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
+  %6 = load i32, i32* %arrayidx13, align 4
+  %7 = or i64 %1, 5
+  %arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
+  %8 = load i32, i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %8, %6
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, i32* %arrayidx20, align 4
+
+  %add24 = add nsw i32 %add10, %add17
+  %9 = or i64 %1, 2
+  %arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
+  %10 = load i32, i32* %arrayidx27, align 4
+  %11 = or i64 %1, 6
+  %arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11
+  %12 = load i32, i32* %arrayidx30, align 4
+  %add31 = add nsw i32 %12, %10
+  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
+  store i32 %add31, i32* %arrayidx34, align 8
+
+  %add38 = add nsw i32 %add24, %add31
+  %13 = or i64 %1, 3
+  %arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
+  %14 = load i32, i32* %arrayidx41, align 4
+  %15 = or i64 %1, 7
+  %arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
+  %16 = load i32, i32* %arrayidx44, align 4
+
+  %add45 = add nsw i32 %16, %14
+  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
+  store i32 %add45, i32* %arrayidx48, align 4
+
+  %add52 = add nsw i32 %add38, %add45
+ ; CHECK: add nsw <{{[0-9]+}} x i32>
+ ; CHECK-NOT: add nsw <{{[0-9]+}} x i32>
+
+ ; YAML:      --- !Missed
+ ; YAML-NEXT: Pass:            slp-vectorizer
+ ; YAML-NEXT: Name:            HorSLPNotBeneficial
+ ; YAML-NEXT: Function:        foo
+ ; YAML-NEXT: Args:
+ ; YAML-NEXT:   - String:          Vectorizing horizontal reduction is possible
+ ; YAML-NEXT:   - String:          'but not beneficial with cost ' 
+ ; YAML-NEXT:   - Cost:            '1'
+ ; YAML-NEXT:   - String:          ' and threshold '
+ ; YAML-NEXT:   - Threshold:       '0'
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add52
+}
diff --git a/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
+  br label %1
+
+  %idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ]
+  %po.03 = phi i32* [ %pout, %0 ], [ %7, %1 ]
+  %ptmpi2.02 = phi i32* [ %pin2, %0 ], [ %4, %1 ]
+  %ptmpi1.01 = phi i32* [ %pin1, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i32, i32* %ptmpi1.01, i64 1
+  %3 = load i32, i32* %ptmpi1.01, align 4, !tbaa !1
+  %4 = getelementptr inbounds i32, i32* %ptmpi2.02, i64 1
+  %5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1
+  %6 = sub nsw i32 %3, %5
+  %7 = getelementptr inbounds i32, i32* %po.03, i64 1
+ ; CHECK-NOT: <{{[0-9]+}} x i32>
+ ; YAML:      Pass:            slp-vectorizer
+ ; YAML-NEXT: Name:            NotBeneficial
+ ; YAML-NEXT: Function:        vsub2_test
+ ; YAML-NEXT: Args:
+ ; YAML-NEXT:   - String:          'List vectorization was possible but not beneficial with cost '
+ ; YAML-NEXT:   - Cost:            '0'
+ ; YAML-NEXT:   - String:          ' >= '
+ ; YAML-NEXT:   - Treshold:        '0'
+  store i32 %6, i32* %po.03, align 4, !tbaa !1
+  %8 = add nuw nsw i32 %idx.04, 1
+  %exitcond = icmp eq i32 %8, 64
+  br i1 %exitcond, label %9, label %1, !llvm.loop !5
+
+  ret void
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = distinct !{!5, !6, !7}
+!6 = !{!"llvm.loop.vectorize.width", i32 1}
+!7 = !{!"llvm.loop.interleave.count", i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+define i32 @foo(i32* nocapture readonly %diff) #0 {
+entry:
+  %m2 = alloca [8 x [8 x i32]], align 16
+  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %a.088 = phi i32 [ 0, %entry ], [ %add24, %for.body ]
+  %1 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = or i64 %1, 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
+  %4 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %4, %2
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, i32* %arrayidx6, align 16
+  %add10 = add nsw i32 %add3, %a.088
+  %5 = or i64 %1, 1
+  %arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
+  %6 = load i32, i32* %arrayidx13, align 4
+  %7 = or i64 %1, 5
+  %arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
+  %8 = load i32, i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %8, %6
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, i32* %arrayidx20, align 4
+  %add24 = add nsw i32 %add10, %add17
+
+ ; CHECK-NOT: add nsw <{{[0-9]+}} x i32> 
+ ; YAML:      Pass:            slp-vectorizer
+ ; YAML-NEXT: Name:            InequableTypes
+ ; YAML-NEXT: Function:        foo
+ ; YAML-NEXT: Args:
+ ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: not all of the '
+ ; YAML-NEXT:   - String:          'parts of scalar instructions are of the same type: '
+ ; YAML-NEXT:   - Instruction1Opcode: add
+ ; YAML-NEXT:   - String:          ' and '
+ ; YAML-NEXT:   - Instruction2Opcode: phi
+
+ ; YAML:      Pass:            slp-vectorizer
+ ; YAML-NEXT: Name:            NotPossible
+ ; YAML-NEXT: Function:        foo
+ ; YAML-NEXT: Args:
+ ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: vectorization was impossible'
+ ; YAML-NEXT:   - String:          ' with available vectorization factors'
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0
+  ret i32 %add24
+}
+