Skip to content

Commit

Permalink
[LV] Allow reductions that have several uses outside the loop
Browse files Browse the repository at this point in the history
We currently check whether a reduction has a single outside user. We don't
really need to require that - we just need to make sure a single value is
used externally. The number of external users of that value shouldn't actually
matter.

Differential Revision: https://reviews.llvm.org/D28830


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292424 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
mkuperst committed Jan 18, 2017
1 parent 92e82c5 commit c3a226d
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 10 deletions.
17 changes: 12 additions & 5 deletions lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
// - By one instruction outside of the loop (safe).
// - By instructions outside of the loop (safe).
// * One value may have several outside users, but all outside
// uses must be of the same value.
// - By further instructions outside of the loop (not safe).
// - By an instruction that is not part of the reduction (not safe).
// This is either:
Expand Down Expand Up @@ -297,10 +299,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users or if the header phi node is
// being used. In this case the user uses the value of the previous
// iteration, in which case we would loose "VF-1" iterations of the
// reduction operation if we vectorize.
// If we already know this instruction is used externally, move on to
// the next user.
if (ExitInstruction == Cur)
continue;

// Exit if you find multiple values used outside or if the header phi
// node is being used. In this case the user uses the value of the
// previous iteration, in which case we would loose "VF-1" iterations of
// the reduction operation if we vectorize.
if (ExitInstruction != nullptr || Cur == Phi)
return false;

Expand Down
7 changes: 2 additions & 5 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4091,13 +4091,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
// we already fixed them.
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");

// We found our reduction value exit-PHI. Update it with the
// We found a reduction value exit-PHI. Update it with the
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
// Add an edge coming from the bypass.
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst)
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
} // end of the LCSSA phi scan.

// Fix the scalar loop reduction variable with the incoming reduction sum
Expand Down
46 changes: 46 additions & 0 deletions test/Transforms/LoopVectorize/reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -493,3 +493,49 @@ exit:
%inc.2 = add nsw i32 %inc511.1.inc4.1, 2
ret i32 %inc.2
}

;CHECK-LABEL: @reduction_sum_multiuse(
;CHECK: phi <4 x i32>
;CHECK: load <4 x i32>
;CHECK: add <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
;CHECK: add <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
;CHECK: add <4 x i32>
;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ]
;CHECK: ret i32
define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph.preheader, label %end
.lr.ph.preheader: ; preds = %0
br label %.lr.ph

.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%3 = load i32, i32* %2, align 4
%4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%5 = load i32, i32* %4, align 4
%6 = trunc i64 %indvars.iv to i32
%7 = add i32 %sum.02, %6
%8 = add i32 %7, %3
%9 = add i32 %8, %5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph

._crit_edge: ; preds = %.lr.ph, %0
%sum.lcssa = phi i32 [ %9, %.lr.ph ]
%sum.copy = phi i32 [ %9, %.lr.ph ]
br label %end

end:
%f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
%f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
%final = add i32 %f1, %f2
ret i32 %final
}

0 comments on commit c3a226d

Please sign in to comment.