Skip to content

Commit 4b49239

Browse files
committed
Implement load to store => memcpy in MemCpyOpt for aggregates
Summary: Most of the tool chain is able to optimize scalar and memcpy like operation effisciently while it isn't that good with aggregates. In order to improve the support of aggregate, we try to change aggregate manipulation into either scalar or memcpy like ones whenever possible without loosing informations. This is one such opportunity. Reviewers: craig.topper, spatel, dexonsmith, Prazek, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15894 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256868 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e597ffa commit 4b49239

File tree

2 files changed

+120
-11
lines changed

2 files changed

+120
-11
lines changed

lib/Transforms/Scalar/MemCpyOptimizer.cpp

+73-11
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
481481
return AMemSet;
482482
}
483483

484+
static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
485+
const LoadInst *LI) {
486+
unsigned StoreAlign = SI->getAlignment();
487+
if (!StoreAlign)
488+
StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
489+
unsigned LoadAlign = LI->getAlignment();
490+
if (!LoadAlign)
491+
LoadAlign = DL.getABITypeAlignment(LI->getType());
492+
493+
return std::min(StoreAlign, LoadAlign);
494+
}
484495

485496
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
486497
if (!SI->isSimple()) return false;
@@ -496,12 +507,70 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
496507

497508
const DataLayout &DL = SI->getModule()->getDataLayout();
498509

499-
// Detect cases where we're performing call slot forwarding, but
500-
// happen to be using a load-store pair to implement it, rather than
501-
// a memcpy.
510+
// Load to store forwarding can be interpreted as memcpy.
502511
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
503512
if (LI->isSimple() && LI->hasOneUse() &&
504513
LI->getParent() == SI->getParent()) {
514+
515+
auto *T = LI->getType();
516+
if (T->isAggregateType()) {
517+
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
518+
MemoryLocation LoadLoc = MemoryLocation::get(LI);
519+
520+
// We use alias analysis to check if an instruction may store to
521+
// the memory we load from in between the load and the store. If
522+
// such an instruction is found, we store it in AI.
523+
Instruction *AI = nullptr;
524+
for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
525+
I != E; ++I) {
526+
if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) {
527+
AI = &*I;
528+
break;
529+
}
530+
}
531+
532+
// If no aliasing instruction is found, then we can promote the
533+
// load/store pair to a memcpy at the store loaction.
534+
if (!AI) {
535+
// If we load from memory that may alias the memory we store to,
536+
// memmove must be used to preserve semantic. If not, memcpy can
537+
// be used.
538+
bool UseMemMove = false;
539+
if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
540+
UseMemMove = true;
541+
542+
unsigned Align = findCommonAlignment(DL, SI, LI);
543+
uint64_t Size = DL.getTypeStoreSize(T);
544+
545+
IRBuilder<> Builder(SI);
546+
Instruction *M;
547+
if (UseMemMove)
548+
M = Builder.CreateMemMove(SI->getPointerOperand(),
549+
LI->getPointerOperand(), Size,
550+
Align, SI->isVolatile());
551+
else
552+
M = Builder.CreateMemCpy(SI->getPointerOperand(),
553+
LI->getPointerOperand(), Size,
554+
Align, SI->isVolatile());
555+
556+
DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
557+
<< " => " << *M << "\n");
558+
559+
MD->removeInstruction(SI);
560+
SI->eraseFromParent();
561+
MD->removeInstruction(LI);
562+
LI->eraseFromParent();
563+
++NumMemCpyInstr;
564+
565+
// Make sure we do not invalidate the iterator.
566+
BBI = M->getIterator();
567+
return true;
568+
}
569+
}
570+
571+
// Detect cases where we're performing call slot forwarding, but
572+
// happen to be using a load-store pair to implement it, rather than
573+
// a memcpy.
505574
MemDepResult ldep = MD->getDependency(LI);
506575
CallInst *C = nullptr;
507576
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -522,18 +591,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
522591
}
523592

524593
if (C) {
525-
unsigned storeAlign = SI->getAlignment();
526-
if (!storeAlign)
527-
storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
528-
unsigned loadAlign = LI->getAlignment();
529-
if (!loadAlign)
530-
loadAlign = DL.getABITypeAlignment(LI->getType());
531-
532594
bool changed = performCallSlotOptzn(
533595
LI, SI->getPointerOperand()->stripPointerCasts(),
534596
LI->getPointerOperand()->stripPointerCasts(),
535597
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
536-
std::min(storeAlign, loadAlign), C);
598+
findCommonAlignment(DL, SI, LI), C);
537599
if (changed) {
538600
MD->removeInstruction(SI);
539601
SI->eraseFromParent();
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: opt -memcpyopt -S < %s | FileCheck %s
2+
3+
target datalayout = "e-i64:64-f80:128-n8:16:32:64"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
%S = type { i8*, i32 }
7+
8+
define void @copy(%S* %src, %S* %dst) {
9+
; CHECK-LABEL: copy
10+
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
11+
; CHECK-NEXT: ret void
12+
%1 = load %S, %S* %src
13+
store %S %1, %S* %dst
14+
ret void
15+
}
16+
17+
define void @noaliassrc(%S* noalias %src, %S* %dst) {
18+
; CHECK-LABEL: noaliassrc
19+
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
20+
; CHECK-NEXT: ret void
21+
%1 = load %S, %S* %src
22+
store %S %1, %S* %dst
23+
ret void
24+
}
25+
26+
define void @noaliasdst(%S* %src, %S* noalias %dst) {
27+
; CHECK-LABEL: noaliasdst
28+
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
29+
; CHECK-NEXT: ret void
30+
%1 = load %S, %S* %src
31+
store %S %1, %S* %dst
32+
ret void
33+
}
34+
35+
define void @copyalias(%S* %src, %S* %dst) {
36+
; CHECK-LABEL: copyalias
37+
; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
38+
; CHECK-NOT: load
39+
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
40+
; CHECK-NEXT: store %S [[LOAD]], %S* %dst
41+
; CHECK-NEXT: ret void
42+
%1 = load %S, %S* %src
43+
%2 = load %S, %S* %src
44+
store %S %1, %S* %dst
45+
store %S %2, %S* %dst
46+
ret void
47+
}

0 commit comments

Comments
 (0)