Skip to content

Commit

Permalink
[LV] Extend trunc optimization to all IVs with constant integer steps
Browse files Browse the repository at this point in the history
This patch extends the optimization of truncations whose operand is an
induction variable with a constant integer step. Previously we were only
applying this optimization to the primary induction variable. However, the cost
model assumes the optimization is applied to the truncation of all integer
induction variables (even regardless of step type). The transformation is now
applied to the other induction variables, and I've updated the cost model to
ensure it is better in sync with the transformation we actually perform.

Differential Revision: https://reviews.llvm.org/D29847

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294967 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
mssimpso committed Feb 13, 2017
1 parent d73a792 commit f32a3fd
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 14 deletions.
32 changes: 20 additions & 12 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4879,12 +4879,15 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
// (c) other casts depend on pointer size.
auto ID = Legal->getInductionVars()->lookup(OldInduction);
if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
ID.getConstIntStepValue()) {
widenIntInduction(OldInduction, cast<TruncInst>(CI));
break;
}
if (auto *Trunc = dyn_cast<TruncInst>(CI))
if (auto *Phi = dyn_cast<PHINode>(Trunc->getOperand(0))) {
auto II = Legal->getInductionVars()->find(Phi);
if (II != Legal->getInductionVars()->end())
if (II->second.getConstIntStepValue()) {
widenIntInduction(Phi, Trunc);
break;
}
}

/// Vectorize casts.
Type *DestTy =
Expand Down Expand Up @@ -7224,12 +7227,17 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
// We optimize the truncation of induction variable.
// The cost of these is the same as the scalar operation.
if (I->getOpcode() == Instruction::Trunc &&
Legal->isInductionVariable(I->getOperand(0)))
return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType());
// We optimize the truncation of induction variables having constant
// integer steps. The cost of these truncations is the same as the scalar
// operation.
if (auto *Trunc = dyn_cast<TruncInst>(I))
if (auto *Phi = dyn_cast<PHINode>(Trunc->getOperand(0))) {
auto II = Legal->getInductionVars()->find(Phi);
if (II != Legal->getInductionVars()->end())
if (II->second.getConstIntStepValue())
return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
Trunc->getSrcTy());
}

Type *SrcScalarTy = I->getOperand(0)->getType();
Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
Expand Down
31 changes: 31 additions & 0 deletions test/Transforms/LoopVectorize/induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -773,3 +773,34 @@ for.body:
exit:
ret void
}

; CHECK-LABEL: @non_primary_iv_trunc(
; CHECK: vector.body:
; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
; CHECK: [[TMP3:%.*]] = add i64 %index, 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %a, i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4
; CHECK-NEXT: %index.next = add i64 %index, 2
; CHECK: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
define void @non_primary_iv_trunc(i32* %a, i64 %n) {
entry:
br label %for.body

for.body:
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
%j = phi i64 [ %j.next, %for.body ], [ 0, %entry ]
%tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
%tmp1 = trunc i64 %j to i32
store i32 %tmp1, i32* %tmp0, align 4
%i.next = add nuw nsw i64 %i, 1
%j.next = add nuw nsw i64 %j, 2
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end

for.end:
ret void
}
5 changes: 3 additions & 2 deletions test/Transforms/LoopVectorize/reverse_iter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

; Make sure that the reverse iterators are calculated using 64bit arithmetic, not 32.
; PR15882: This test ensures that we do not produce wrapping arithmetic when
; creating constant reverse step vectors.
;
; int foo(int n, int *A) {
; int sum;
Expand All @@ -13,7 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;

;CHECK-LABEL: @foo(
;CHECK: <i64 0, i64 -1, i64 -2, i64 -3>
;CHECK: <i32 0, i32 -1, i32 -2, i32 -3>
;CHECK: ret
define i32 @foo(i32 %n, i32* nocapture %A) {
%1 = icmp sgt i32 %n, 0
Expand Down

0 comments on commit f32a3fd

Please sign in to comment.