diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 8d0db1611609..823c5fba745e 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -114,7 +114,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); // // LoopVectorize - Create a loop vectorization pass. // -Pass *createLoopVectorizePass(); +Pass *createLoopVectorizePass(bool NoUnrolling = false); //===----------------------------------------------------------------------===// // diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 743dc4234343..d4c0c2caeaa7 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -196,7 +196,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopDeletionPass()); // Delete dead loops if (!LateVectorize && LoopVectorize) - MPM.add(createLoopVectorizePass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops)); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops @@ -250,7 +250,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // Add the various vectorization passes and relevant cleanup passes for // them since we are no longer in the middle of the main scalar pipeline. if (LoopVectorize) { - MPM.add(createLoopVectorizePass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops)); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 2ee1441e4b5a..0afc73e5098f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -761,9 +761,9 @@ struct LoopVectorizeHints { /// Vectorization unroll factor. unsigned Unroll; - LoopVectorizeHints(const Loop *L) + LoopVectorizeHints(const Loop *L, bool DisableUnrolling) : Width(VectorizationFactor) - , Unroll(VectorizationUnroll) + , Unroll(DisableUnrolling ? 1 : VectorizationUnroll) , LoopID(L->getLoopID()) { getHints(L); // The command line options override any loop metadata except for when @@ -772,6 +772,9 @@ struct LoopVectorizeHints { Width = VectorizationFactor; if (VectorizationUnroll.getNumOccurrences() > 0) Unroll = VectorizationUnroll; + + DEBUG(if (DisableUnrolling && Unroll == 1) + dbgs() << "LV: Unrolling disabled by the pass manager\n"); } /// Return the loop vectorizer metadata prefix. @@ -878,7 +881,8 @@ struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid static char ID; - explicit LoopVectorize() : LoopPass(ID) { + explicit LoopVectorize(bool NoUnrolling = false) + : LoopPass(ID), DisableUnrolling(NoUnrolling) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -888,6 +892,7 @@ struct LoopVectorize : public LoopPass { TargetTransformInfo *TTI; DominatorTree *DT; TargetLibraryInfo *TLI; + bool DisableUnrolling; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -909,7 +914,7 @@ struct LoopVectorize : public LoopPass { DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LoopVectorizeHints Hints(L); + LoopVectorizeHints Hints(L, DisableUnrolling); if (Hints.Width == 1 && Hints.Unroll == 1) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); @@ -4786,8 +4791,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { - Pass *createLoopVectorizePass() { - return new LoopVectorize(); + Pass *createLoopVectorizePass(bool NoUnrolling) { + return new LoopVectorize(NoUnrolling); } } diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll new file mode 100644 index 000000000000..5064fec286ce --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -disable-loop-unrolling -S | FileCheck %s -check-prefix=CHECK-NOUNRL + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" +;CHECK-LABEL: @bar( +;CHECK: store <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret +;CHECK-NOUNRL-LABEL: @bar( +;CHECK-NOUNRL: store <4 x i32> +;CHECK-NOUNRL-NOT: store <4 x i32> +;CHECK-NOUNRL: ret +define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = add nsw i32 %3, 6 + store i32 %4, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll index ae72d3c60821..4fd4c989de43 100644 --- a/test/Transforms/LoopVectorize/global_alias.ll +++ b/test/Transforms/LoopVectorize/global_alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index ca820617861d..691080aba373 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -135,6 +135,11 @@ UnitAtATime("funit-at-a-time", cl::desc("Enable IPO. This is same as llvm-gcc's -funit-at-a-time"), cl::init(true)); +static cl::opt +DisableLoopUnrolling("disable-loop-unrolling", + cl::desc("Disable loop unrolling in all relevant passes"), + cl::init(false)); + static cl::opt DisableSimplifyLibCalls("disable-simplify-libcalls", cl::desc("Disable simplify-libcalls")); @@ -447,12 +452,13 @@ static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM, Builder.Inliner = createAlwaysInlinerPass(); } Builder.DisableUnitAtATime = !UnitAtATime; - Builder.DisableUnrollLoops = OptLevel == 0; + Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? + DisableLoopUnrolling : OptLevel == 0; + Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; + Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); - - Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; } static void AddStandardCompilePasses(PassManagerBase &PM) {