forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LoopUnroll] Respect the convergent attribute.
Summary: Specifically, when we perform runtime loop unrolling of a loop that contains a convergent op, we can only unroll k times, where k divides the loop trip multiple. Without this change, we'll happily unroll e.g. the following loop for (int i = 0; i < N; ++i) { if (i == 0) convergent_op(); foo(); } into int i = 0; if (N % 2 == 1) { convergent_op(); foo(); ++i; } for (; i < N - 1; i += 2) { if (i == 0) convergent_op(); foo(); foo(); }. This is unsafe, because we've just added a control-flow dependency to the convergent op in the prelude. In general, runtime unrolling loops that contain convergent ops is safe only if we don't have emit a prelude, which occurs when the unroll count divides the trip multiple. Reviewers: resistor Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17526 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263509 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Justin Lebar
committed
Mar 14, 2016
1 parent
0440e81
commit 64d996c
Showing
3 changed files
with
142 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s | ||
|
||
declare void @f() convergent | ||
|
||
; Although this loop contains a convergent instruction, it should be | ||
; fully unrolled. | ||
; | ||
; CHECK-LABEL: @full_unroll( | ||
define i32 @full_unroll() { | ||
entry: | ||
br label %l3 | ||
|
||
l3: | ||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK-NOT: call void @f() | ||
call void @f() ;convergent | ||
%inc = add nsw i32 %x.0, 1 | ||
%exitcond = icmp eq i32 %inc, 3 | ||
br i1 %exitcond, label %exit, label %l3 | ||
|
||
exit: | ||
ret i32 0 | ||
} | ||
|
||
; This loop contains a convergent instruction, but it should be partially | ||
; unrolled. The unroll count is the largest power of 2 that divides the | ||
; multiple -- 4, in this case. | ||
; | ||
; CHECK-LABEL: @runtime_unroll( | ||
define i32 @runtime_unroll(i32 %n) { | ||
entry: | ||
%loop_ctl = mul nsw i32 %n, 12 | ||
br label %l3 | ||
|
||
l3: | ||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK-NOT: call void @f() | ||
call void @f() convergent | ||
%inc = add nsw i32 %x.0, 1 | ||
%exitcond = icmp eq i32 %inc, %loop_ctl | ||
br i1 %exitcond, label %exit, label %l3 | ||
|
||
exit: | ||
ret i32 0 | ||
} | ||
|
||
; This loop contains a convergent instruction, so its partial unroll | ||
; count must divide its trip multiple. This overrides its unroll | ||
; pragma -- we unroll exactly 8 times, even though 16 is requested. | ||
; CHECK-LABEL: @pragma_unroll | ||
define i32 @pragma_unroll(i32 %n) { | ||
entry: | ||
%loop_ctl = mul nsw i32 %n, 24 | ||
br label %l3, !llvm.loop !0 | ||
|
||
l3: | ||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK: call void @f() | ||
; CHECK-NOT: call void @f() | ||
call void @f() convergent | ||
%inc = add nsw i32 %x.0, 1 | ||
%exitcond = icmp eq i32 %inc, %loop_ctl | ||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !0 | ||
|
||
exit: | ||
ret i32 0 | ||
} | ||
|
||
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}} |