Skip to content

Commit

Permalink
[LICM] Allow promotion of some stores that are not guaranteed to exec…
Browse files Browse the repository at this point in the history
…ute.

Promotion is always legal when a store within the loop is guaranteed to execute.

However, this is not a necessary condition - for promotion to be memory model
semantics-preserving, it is enough to have a store that dominates every exit
block. This is because if the store dominates every exit block, the fact the
exit block was executed implies the original store was executed as well.

Differential Revision: https://reviews.llvm.org/D28147


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291171 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
mkuperst committed Jan 5, 2017
1 parent 4700463 commit 82d65fb
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 4 deletions.
28 changes: 24 additions & 4 deletions lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,14 +912,23 @@ bool llvm::promoteLoopAccessesToScalars(
//
// If at least one store is guaranteed to execute, both properties are
// satisfied, and promotion is legal.
//
// This, however, is not a necessary condition. Even if no store/load is
// guaranteed to execute, we can still establish these properties:
// (p1) by proving that hoisting the load into the preheader is
// safe (i.e. proving dereferenceability on all paths through the loop). We
// guaranteed to execute, we can still establish these properties.
// We can establish (p1) by proving that hoisting the load into the preheader
// is safe (i.e. proving dereferenceability on all paths through the loop). We
// can use any access within the alias set to prove dereferenceability,
// since they're all must alias.
// (p2) by proving the memory is thread-local, so the memory model
//
// There are two ways establish (p2):
// a) Prove the location is thread-local. In this case the memory model
// requirement does not apply, and stores are safe to insert.
// b) Prove a store dominates every exit block. In this case, if an exit
// blocks is reached, the original dynamic path would have taken us through
// the store, so inserting a store into the exit block is safe. Note that this
// is different from the store being guaranteed to execute. For instance,
// if an exception is thrown on the first iteration of the loop, the original
// store is never executed, but the exit blocks are not executed either.

bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
Expand Down Expand Up @@ -1001,6 +1010,17 @@ bool llvm::promoteLoopAccessesToScalars(
}
}

// If a store dominates all exit blocks, it is safe to sink.
// As explained above, if an exit block was executed, a dominating
// store must have been been executed at least once, so we are not
// introducing stores on paths that did not have them.
// Note that this only looks at explicit exit blocks. If we ever
// start sinking stores into unwind edges (see above), this will break.
if (!SafeToInsertStore)
SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
return DT->dominates(Store->getParent(), Exit);
});

// If the store is not guaranteed to execute, we may still get
// deref info through it.
if (!DereferenceableInPH) {
Expand Down
192 changes: 192 additions & 0 deletions test/Transforms/LICM/scalar_promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,198 @@ for.end: ; preds = %for.cond.for.end_cr
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
}

declare i32 @opaque(i32) argmemonly
declare void @capture(i32*)

; We can promote even if opaque may throw.
define i32 @test7() {
; CHECK-LABEL: @test7(
; CHECK: entry:
; CHECK-NEXT: %local = alloca
; CHECK-NEXT: call void @capture(i32* %local)
; CHECK-NEXT: load i32, i32* %local
; CHECK-NEXT: br label %loop
; CHECK: exit:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
; CHECK-NEXT: %ret = load i32, i32* %local
; CHECK-NEXT: ret i32 %ret
entry:
%local = alloca i32
call void @capture(i32* %local)
br label %loop

loop:
%j = phi i32 [ 0, %entry ], [ %next, %loop ]
%x = load i32, i32* %local
%x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
store i32 %x2, i32* %local
%next = add i32 %j, 1
%cond = icmp eq i32 %next, 0
br i1 %cond, label %exit, label %loop

exit:
%ret = load i32, i32* %local
ret i32 %ret
}

; Make sure we don't promote if the store is really control-flow dependent.
define i32 @test7bad() {
; CHECK-LABEL: @test7bad(
; CHECK: entry:
; CHECK-NEXT: %local = alloca
; CHECK-NEXT: call void @capture(i32* %local)
; CHECK-NEXT: br label %loop
; CHECK: if:
; CHECK-NEXT: store i32 %x2, i32* %local
; CHECK-NEXT: br label %else
; CHECK: exit:
; CHECK-NEXT: %ret = load i32, i32* %local
; CHECK-NEXT: ret i32 %ret
entry:
%local = alloca i32
call void @capture(i32* %local)
br label %loop
loop:
%j = phi i32 [ 0, %entry ], [ %next, %else ]
%x = load i32, i32* %local
%x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
%cmp = icmp eq i32 %x2, 0
br i1 %cmp, label %if, label %else

if:
store i32 %x2, i32* %local
br label %else

else:
%next = add i32 %j, 1
%cond = icmp eq i32 %next, 0
br i1 %cond, label %exit, label %loop

exit:
%ret = load i32, i32* %local
ret i32 %ret
}

; Even if neither the load nor the store or guaranteed to execute because
; opaque() may throw, we can still promote - the load not being guaranteed
; doesn't block us, because %local is always dereferenceable.
define i32 @test8() {
; CHECK-LABEL: @test8(
; CHECK: entry:
; CHECK-NEXT: %local = alloca
; CHECK-NEXT: call void @capture(i32* %local)
; CHECK-NEXT: load i32, i32* %local
; CHECK-NEXT: br label %loop
; CHECK: exit:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
; CHECK-NEXT: %ret = load i32, i32* %local
; CHECK-NEXT: ret i32 %ret
entry:
%local = alloca i32
call void @capture(i32* %local)
br label %loop

loop:
%j = phi i32 [ 0, %entry ], [ %next, %loop ]
%throwaway = call i32 @opaque(i32 %j)
%x = load i32, i32* %local
%x2 = call i32 @opaque(i32 %x)
store i32 %x2, i32* %local
%next = add i32 %j, 1
%cond = icmp eq i32 %next, 0
br i1 %cond, label %exit, label %loop

exit:
%ret = load i32, i32* %local
ret i32 %ret
}


; If the store is "guaranteed modulo exceptions", and the load depends on
; control flow, we can only promote if the pointer is otherwise known to be
; dereferenceable
define i32 @test9() {
; CHECK-LABEL: @test9(
; CHECK: entry:
; CHECK-NEXT: %local = alloca
; CHECK-NEXT: call void @capture(i32* %local)
; CHECK-NEXT: load i32, i32* %local
; CHECK-NEXT: br label %loop
; CHECK: exit:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ]
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
; CHECK-NEXT: %ret = load i32, i32* %local
; CHECK-NEXT: ret i32 %ret
entry:
%local = alloca i32
call void @capture(i32* %local)
br label %loop

loop:
%j = phi i32 [ 0, %entry ], [ %next, %else ]
%j2 = call i32 @opaque(i32 %j)
%cmp = icmp eq i32 %j2, 0
br i1 %cmp, label %if, label %else

if:
%x = load i32, i32* %local
br label %else

else:
%x2 = phi i32 [ 0, %loop ], [ %x, %if]
store i32 %x2, i32* %local
%next = add i32 %j, 1
%cond = icmp eq i32 %next, 0
br i1 %cond, label %exit, label %loop

exit:
%ret = load i32, i32* %local
ret i32 %ret
}

define i32 @test9bad(i32 %i) {
; CHECK-LABEL: @test9bad(
; CHECK: entry:
; CHECK-NEXT: %local = alloca
; CHECK-NEXT: call void @capture(i32* %local)
; CHECK-NEXT: %notderef = getelementptr
; CHECK-NEXT: br label %loop
; CHECK: if:
; CHECK-NEXT: load i32, i32* %notderef
; CHECK-NEXT: br label %else
; CHECK: exit:
; CHECK-NEXT: %ret = load i32, i32* %notderef
; CHECK-NEXT: ret i32 %ret
entry:
%local = alloca i32
call void @capture(i32* %local)
%notderef = getelementptr i32, i32* %local, i32 %i
br label %loop

loop:
%j = phi i32 [ 0, %entry ], [ %next, %else ]
%j2 = call i32 @opaque(i32 %j)
%cmp = icmp eq i32 %j2, 0
br i1 %cmp, label %if, label %else

if:
%x = load i32, i32* %notderef
br label %else

else:
%x2 = phi i32 [ 0, %loop ], [ %x, %if]
store i32 %x2, i32* %notderef
%next = add i32 %j, 1
%cond = icmp eq i32 %next, 0
br i1 %cond, label %exit, label %loop

exit:
%ret = load i32, i32* %notderef
ret i32 %ret
}

!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
Expand Down

0 comments on commit 82d65fb

Please sign in to comment.