diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0c2f1d63805c..0eac92e44167 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -337,6 +337,33 @@ void Formula::dump() const { print(errs()); errs() << '\n'; } +/// isAddRecSExtable - Return true if the given addrec can be sign-extended +/// without changing its value. +static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(AR->getType()) + 1); + return isa(SE.getSignExtendExpr(AR, WideTy)); +} + +/// isAddSExtable - Return true if the given add can be sign-extended +/// without changing its value. +static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(A->getType()) + 1); + return isa(SE.getSignExtendExpr(A, WideTy)); +} + +/// isMulSExtable - Return true if the given add can be sign-extended +/// without changing its value. +static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(A->getType()) + 1); + return isa(SE.getSignExtendExpr(A, WideTy)); +} + /// getSDiv - Return an expression for LHS /s RHS, if it can be determined, /// or null otherwise. If IgnoreSignificantBits is true, expressions like /// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may @@ -365,33 +392,37 @@ static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS, .sdiv(RC->getValue()->getValue())); } - // Distribute the sdiv over addrec operands. + // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) { - const SCEV *Start = getSDiv(AR->getStart(), RHS, SE, - IgnoreSignificantBits); - if (!Start) return 0; - const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE, - IgnoreSignificantBits); - if (!Step) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { + const SCEV *Start = getSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; + const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE, + IgnoreSignificantBits); + if (!Step) return 0; + return SE.getAddRecExpr(Start, Step, AR->getLoop()); + } } - // Distribute the sdiv over add operands. + // Distribute the sdiv over add operands, if the add doesn't overflow. if (const SCEVAddExpr *Add = dyn_cast(LHS)) { - SmallVector Ops; - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { - const SCEV *Op = getSDiv(*I, RHS, SE, - IgnoreSignificantBits); - if (!Op) return 0; - Ops.push_back(Op); + if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { + SmallVector Ops; + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Op = getSDiv(*I, RHS, SE, + IgnoreSignificantBits); + if (!Op) return 0; + Ops.push_back(Op); + } + return SE.getAddExpr(Ops); } - return SE.getAddExpr(Ops); } // Check for a multiply operand that we can pull RHS out of. if (const SCEVMulExpr *Mul = dyn_cast(LHS)) - if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) { + if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { SmallVector Ops; bool Found = false; for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll new file mode 100644 index 000000000000..ec8db501ef34 --- /dev/null +++ b/test/CodeGen/X86/lsr-wrap.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; LSR would like to use a single IV for both of these, however it's +; not safe due to wraparound. + +; CHECK: addb $-4, %r +; CHECK: decw % + +@g_19 = common global i32 0 ; [#uses=2] + +declare i32 @func_8(i8 zeroext) nounwind + +declare i32 @func_3(i8 signext) nounwind + +define void @func_1() nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] + %tmp = sub i16 0, %indvar ; [#uses=1] + %tmp27 = trunc i16 %tmp to i8 ; [#uses=1] + %tmp1 = load i32* @g_19, align 4 ; [#uses=2] + %tmp2 = add i32 %tmp1, 1 ; [#uses=1] + store i32 %tmp2, i32* @g_19, align 4 + %tmp3 = trunc i32 %tmp1 to i8 ; [#uses=1] + %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; [#uses=0] + %tmp5 = shl i8 %tmp27, 2 ; [#uses=1] + %tmp6 = add i8 %tmp5, -112 ; [#uses=1] + %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; [#uses=0] + %indvar.next = add i16 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i16 %indvar.next, -28 ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +}