forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SLP] Added more missed optimization remarks
Summary: Added more remarks to SLP pass, in particular "missed" optimization remarks. Also proposed several tests for new functionality. Patch by Vladimir Miloserdov! For reference you may look at: https://reviews.llvm.org/rL302811 Reviewers: anemet, fhahn Reviewed By: anemet Subscribers: javed.absar, lattner, petecoup, yakush, llvm-commits Differential Revision: https://reviews.llvm.org/D38367 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318307 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
7 changed files
with
303 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s | ||
|
||
define i32 @foo(i32* %diff) #0 { | ||
entry: | ||
%m2 = alloca [8 x [8 x i32]], align 16 | ||
%0 = bitcast [8 x [8 x i32]]* %m2 to i8* | ||
br label %for.body | ||
|
||
for.body: ; preds = %for.body, %entry | ||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||
%a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ] | ||
%1 = shl i64 %indvars.iv, 3 | ||
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1 | ||
%2 = load i32, i32* %arrayidx, align 4 | ||
%3 = or i64 %1, 4 | ||
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3 | ||
%4 = load i32, i32* %arrayidx2, align 4 | ||
%add3 = add nsw i32 %4, %2 | ||
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0 | ||
store i32 %add3, i32* %arrayidx6, align 16 | ||
|
||
%add10 = add nsw i32 %add3, %a.088 | ||
%5 = or i64 %1, 1 | ||
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5 | ||
%6 = load i32, i32* %arrayidx13, align 4 | ||
%7 = or i64 %1, 5 | ||
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7 | ||
%8 = load i32, i32* %arrayidx16, align 4 | ||
%add17 = add nsw i32 %8, %6 | ||
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1 | ||
store i32 %add17, i32* %arrayidx20, align 4 | ||
|
||
%add24 = add nsw i32 %add10, %add17 | ||
%9 = or i64 %1, 2 | ||
%arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9 | ||
%10 = load i32, i32* %arrayidx27, align 4 | ||
%11 = or i64 %1, 6 | ||
%arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11 | ||
%12 = load i32, i32* %arrayidx30, align 4 | ||
%add31 = add nsw i32 %12, %10 | ||
%arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2 | ||
store i32 %add31, i32* %arrayidx34, align 8 | ||
|
||
%add38 = add nsw i32 %add24, %add31 | ||
%13 = or i64 %1, 3 | ||
%arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13 | ||
%14 = load i32, i32* %arrayidx41, align 4 | ||
%15 = or i64 %1, 7 | ||
%arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15 | ||
%16 = load i32, i32* %arrayidx44, align 4 | ||
|
||
%add45 = add nsw i32 %16, %14 | ||
%arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3 | ||
store i32 %add45, i32* %arrayidx48, align 4 | ||
|
||
%add52 = add nsw i32 %add38, %add45 | ||
; CHECK: add nsw <{{[0-9]+}} x i32> | ||
; CHECK-NOT: add nsw <{{[0-9]+}} x i32> | ||
|
||
; YAML: --- !Missed | ||
; YAML-NEXT: Pass: slp-vectorizer | ||
; YAML-NEXT: Name: HorSLPNotBeneficial | ||
; YAML-NEXT: Function: foo | ||
; YAML-NEXT: Args: | ||
; YAML-NEXT: - String: Vectorizing horizontal reduction is possible | ||
; YAML-NEXT: - String: 'but not beneficial with cost ' | ||
; YAML-NEXT: - Cost: '1' | ||
; YAML-NEXT: - String: ' and threshold ' | ||
; YAML-NEXT: - Threshold: '0' | ||
|
||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond = icmp eq i64 %indvars.iv.next, 8 | ||
br i1 %exitcond, label %for.end, label %for.body | ||
|
||
for.end: ; preds = %for.body | ||
ret i32 %add52 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s | ||
|
||
define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 { | ||
br label %1 | ||
|
||
%idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ] | ||
%po.03 = phi i32* [ %pout, %0 ], [ %7, %1 ] | ||
%ptmpi2.02 = phi i32* [ %pin2, %0 ], [ %4, %1 ] | ||
%ptmpi1.01 = phi i32* [ %pin1, %0 ], [ %2, %1 ] | ||
%2 = getelementptr inbounds i32, i32* %ptmpi1.01, i64 1 | ||
%3 = load i32, i32* %ptmpi1.01, align 4, !tbaa !1 | ||
%4 = getelementptr inbounds i32, i32* %ptmpi2.02, i64 1 | ||
%5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1 | ||
%6 = sub nsw i32 %3, %5 | ||
%7 = getelementptr inbounds i32, i32* %po.03, i64 1 | ||
; CHECK-NOT: <{{[0-9]+}} x i32> | ||
; YAML: Pass: slp-vectorizer | ||
; YAML-NEXT: Name: NotBeneficial | ||
; YAML-NEXT: Function: vsub2_test | ||
; YAML-NEXT: Args: | ||
; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' | ||
; YAML-NEXT: - Cost: '0' | ||
; YAML-NEXT: - String: ' >= ' | ||
; YAML-NEXT: - Treshold: '0' | ||
store i32 %6, i32* %po.03, align 4, !tbaa !1 | ||
%8 = add nuw nsw i32 %idx.04, 1 | ||
%exitcond = icmp eq i32 %8, 64 | ||
br i1 %exitcond, label %9, label %1, !llvm.loop !5 | ||
|
||
ret void | ||
} | ||
|
||
!llvm.ident = !{!0} | ||
|
||
!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"} | ||
!1 = !{!2, !2, i64 0} | ||
!2 = !{!"int", !3, i64 0} | ||
!3 = !{!"omnipotent char", !4, i64 0} | ||
!4 = !{!"Simple C/C++ TBAA"} | ||
!5 = distinct !{!5, !6, !7} | ||
!6 = !{!"llvm.loop.vectorize.width", i32 1} | ||
!7 = !{!"llvm.loop.interleave.count", i32 1} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s | ||
|
||
define i32 @foo(i32* nocapture readonly %diff) #0 { | ||
entry: | ||
%m2 = alloca [8 x [8 x i32]], align 16 | ||
%0 = bitcast [8 x [8 x i32]]* %m2 to i8* | ||
br label %for.body | ||
|
||
for.body: ; preds = %for.body, %entry | ||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||
%a.088 = phi i32 [ 0, %entry ], [ %add24, %for.body ] | ||
%1 = shl i64 %indvars.iv, 3 | ||
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1 | ||
%2 = load i32, i32* %arrayidx, align 4 | ||
%3 = or i64 %1, 4 | ||
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3 | ||
%4 = load i32, i32* %arrayidx2, align 4 | ||
%add3 = add nsw i32 %4, %2 | ||
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0 | ||
store i32 %add3, i32* %arrayidx6, align 16 | ||
%add10 = add nsw i32 %add3, %a.088 | ||
%5 = or i64 %1, 1 | ||
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5 | ||
%6 = load i32, i32* %arrayidx13, align 4 | ||
%7 = or i64 %1, 5 | ||
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7 | ||
%8 = load i32, i32* %arrayidx16, align 4 | ||
%add17 = add nsw i32 %8, %6 | ||
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1 | ||
store i32 %add17, i32* %arrayidx20, align 4 | ||
%add24 = add nsw i32 %add10, %add17 | ||
|
||
; CHECK-NOT: add nsw <{{[0-9]+}} x i32> | ||
; YAML: Pass: slp-vectorizer | ||
; YAML-NEXT: Name: InequableTypes | ||
; YAML-NEXT: Function: foo | ||
; YAML-NEXT: Args: | ||
; YAML-NEXT: - String: 'Cannot SLP vectorize list: not all of the ' | ||
; YAML-NEXT: - String: 'parts of scalar instructions are of the same type: ' | ||
; YAML-NEXT: - Instruction1Opcode: add | ||
; YAML-NEXT: - String: ' and ' | ||
; YAML-NEXT: - Instruction2Opcode: phi | ||
|
||
; YAML: Pass: slp-vectorizer | ||
; YAML-NEXT: Name: NotPossible | ||
; YAML-NEXT: Function: foo | ||
; YAML-NEXT: Args: | ||
; YAML-NEXT: - String: 'Cannot SLP vectorize list: vectorization was impossible' | ||
; YAML-NEXT: - String: ' with available vectorization factors' | ||
|
||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond = icmp eq i64 %indvars.iv.next, 8 | ||
br i1 %exitcond, label %for.end, label %for.body | ||
|
||
for.end: ; preds = %for.body | ||
%arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0 | ||
ret i32 %add24 | ||
} | ||
|
Oops, something went wrong.