Skip to content

Commit

Permalink
[LV] Fix-up external IV users after updating dominator tree
Browse files Browse the repository at this point in the history
This patch delays the fix-up step for external induction variable users until
after the dominator tree has been properly updated. This should fix PR30742.
The SCEVExpander in InductionDescriptor::transform can generate code in the
wrong location if the dominator tree is not up-to-date. We should work towards
keeping the dominator tree up-to-date throughout the transformation.

Reference: https://llvm.org/bugs/show_bug.cgi?id=30742
Differential Revision: https://reviews.llvm.org/D28168

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291462 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
mssimpso committed Jan 9, 2017
1 parent f7c0d40 commit 0a3fcf0
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 7 deletions.
27 changes: 20 additions & 7 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,10 @@ class InnerLoopVectorizer {
// Similarly, we create a new latch condition when setting up the structure
// of the new loop, so the old one can become dead.
SmallPtrSet<Instruction *, 4> DeadInstructions;

// Holds the end values for each induction variable. We save the end values
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
};

class InnerLoopUnroller : public InnerLoopVectorizer {
Expand Down Expand Up @@ -3417,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Create phi nodes to merge from the backedge-taken check block.
PHINode *BCResumeVal = PHINode::Create(
OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
Value *EndValue;
Value *&EndValue = IVEndValues[OrigPhi];
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = CountRoundDown;
Expand All @@ -3436,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, MiddleBlock);

// Fix up external users of the induction variable.
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);

// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);

Expand Down Expand Up @@ -4109,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
} // end of for each Phi in PHIsToFix.

fixLCSSAPHIs();

// Make sure DomTree is updated.
// Update the dominator tree.
//
// FIXME: After creating the structure of the new loop, the dominator tree is
// no longer up-to-date, and it remains that way until we update it
// here. An out-of-date dominator tree is problematic for SCEV,
// because SCEVExpander uses it to guide code generation. The
// vectorizer use SCEVExpanders in several places. Instead, we should
// keep the dominator tree up-to-date as we go.
updateAnalysis();

// Fix-up external users of the induction variables.
for (auto &Entry : *Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
IVEndValues[Entry.first], LoopMiddleBlock);

fixLCSSAPHIs();
predicateInstructions();

// Remove redundant induction instructions.
Expand Down
45 changes: 45 additions & 0 deletions test/Transforms/LoopVectorize/iv_outside_user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,48 @@ for.end:
store i32 %phi2, i32* %p
ret i32 %phi
}

; CHECK-LABEL: @PR30742
; CHECK: min.iters.checked
; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
; CHECK: middle.block
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
; CHECK: %[[T15:.+]] = add i32 %tmp03, -7
; CHECK: %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3
; CHECK: %[[T17:.+]] = add i32 %[[T15]], %[[T16]]
; CHECK: %[[T18:.+]] = shl i32 {{.*}}, 3
; CHECK: %ind.escape = sub i32 %[[T17]], %[[T18]]
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
define void @PR30742() {
BB0:
br label %BB1

BB1:
%tmp00 = load i32, i32* undef, align 16
%tmp01 = sub i32 %tmp00, undef
%tmp02 = icmp slt i32 %tmp01, 1
%tmp03 = select i1 %tmp02, i32 1, i32 %tmp01
%tmp04 = add nsw i32 %tmp03, -7
br label %BB2

BB2:
%tmp05 = phi i32 [ %tmp04, %BB1 ], [ %tmp06, %BB2 ]
%tmp06 = add i32 %tmp05, -8
%tmp07 = icmp sgt i32 %tmp06, 0
br i1 %tmp07, label %BB2, label %BB3

BB3:
%tmp08 = phi i32 [ %tmp05, %BB2 ]
%tmp09 = sub i32 %tmp00, undef
%tmp10 = icmp slt i32 %tmp09, 1
%tmp11 = select i1 %tmp10, i32 1, i32 %tmp09
%tmp12 = add nsw i32 %tmp11, -7
br label %BB4

BB4:
%tmp13 = phi i32 [ %tmp12, %BB3 ], [ %tmp14, %BB4 ]
%tmp14 = add i32 %tmp13, -8
%tmp15 = icmp sgt i32 %tmp14, 0
br i1 %tmp15, label %BB4, label %BB1
}

0 comments on commit 0a3fcf0

Please sign in to comment.