diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 13bd02215be5..3185db4bfbe8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -606,14 +606,16 @@ namespace { SmallVectorImpl &LoopExitBlocks; AliasSetTracker &AST; DebugLoc DL; + int Alignment; public: LoopPromoter(Value *SP, const SmallVectorImpl &Insts, SSAUpdater &S, SmallPtrSet &PMA, SmallVectorImpl &LEB, AliasSetTracker &ast, - DebugLoc dl) + DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), + Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl &) const { @@ -635,6 +637,7 @@ namespace { Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); } } @@ -680,10 +683,14 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; - + SmallVector LoopUses; SmallPtrSet PointerMustAliases; + // We start with an alignment of one and try to find instructions that allow + // us to prove better alignment. + unsigned Alignment = 1; + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. @@ -706,24 +713,38 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // If there is an non-load/store instruction in the loop, we can't promote // it. - if (isa(Use)) + unsigned InstAlignment; + if (LoadInst *load = dyn_cast(Use)) { assert(!cast(Use)->isVolatile() && "AST broken"); - else if (isa(Use)) { + InstAlignment = load->getAlignment(); + } else if (StoreInst *store = dyn_cast(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; + InstAlignment = store->getAlignment(); assert(!cast(Use)->isVolatile() && "AST broken"); } else return; // Not a load or store. - + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isSafeToExecuteUnconditionally(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); LoopUses.push_back(Use); } } - + // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; @@ -746,13 +767,14 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL); + *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/test/Transforms/LICM/2011-07-06-Alignment.ll b/test/Transforms/LICM/2011-07-06-Alignment.ll new file mode 100644 index 000000000000..f97b7010bc02 --- /dev/null +++ b/test/Transforms/LICM/2011-07-06-Alignment.ll @@ -0,0 +1,26 @@ +; RUN: opt -licm -S %s | FileCheck %s + +@A = common global [1024 x float] zeroinitializer, align 4 + +define i32 @main() nounwind { +entry: + br label %for.cond + +for.cond: + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 3 + %vecidx = bitcast float* %arrayidx to <4 x float>* + store <4 x float> zeroinitializer, <4 x float>* %vecidx, align 4 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: + br label %for.cond + +for.end: + ret i32 0 +} + +;CHECK: store <4 x float> {{.*}} align 4 +