Skip to content

Commit

Permalink
Address one of the original FIXMEs for the new SROA pass by implementing
Browse files Browse the repository at this point in the history
integer promotion analogous to vector promotion. When there is an
integer alloca being accessed both as its integer type and as a narrower
integer type, promote the narrower access to "insert" and "extract" the
smaller integer from the larger one, and make the integer alloca
a candidate for promotion.

In the new formulation, we don't care about target legal integer or use
thresholds to control things. Instead, we only perform this promotion to
an integer type which the frontend has already emitted a load or store
for. This bounds the scope and prevents optimization passes from
coalescing larger and larger entities into a single integer.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164479 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
chandlerc committed Sep 24, 2012
1 parent 371e17c commit bc4021f
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 21 deletions.
119 changes: 118 additions & 1 deletion lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1723,6 +1723,54 @@ static bool isVectorPromotionViable(const TargetData &TD,
return true;
}

/// \brief Test whether the given alloca partition can be promoted to an int.
///
/// This is a quick test to check whether we can rewrite a particular alloca
/// partition (and its newly formed alloca) into an integer alloca suitable for
/// promotion to an SSA value. We only can ensure this for a limited set of
/// operations, and we don't want to do the rewrites unless we are confident
/// that the result will be promotable, so we have an early test here.
static bool isIntegerPromotionViable(const TargetData &TD,
Type *AllocaTy,
AllocaPartitioning &P,
AllocaPartitioning::const_use_iterator I,
AllocaPartitioning::const_use_iterator E) {
IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
if (!Ty)
return false;

// Check the uses to ensure the uses are (likely) promoteable integer uses.
// Also ensure that the alloca has a covering load or store. We don't want
// promote because of some other unsplittable entry (which we may make
// splittable later) and lose the ability to promote each element access.
bool WholeAllocaOp = false;
for (; I != E; ++I) {
if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
if (LI->isVolatile() || !LI->getType()->isIntegerTy())
return false;
if (LI->getType() == Ty)
WholeAllocaOp = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
return false;
if (SI->getValueOperand()->getType() == Ty)
WholeAllocaOp = true;
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
if (MI->isVolatile())
return false;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
} else {
return false;
}
}
return WholeAllocaOp;
}

namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
Expand Down Expand Up @@ -1754,6 +1802,12 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
Type *ElementTy;
uint64_t ElementSize;

// This is a convenience and flag variable that will be null unless the new
// alloca has a promotion-targeted integer type due to passing
// isIntegerPromotionViable above. If it is non-null does, the desired
// integer type will be stored here for easy access during rewriting.
IntegerType *IntPromotionTy;

// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
Instruction *OldPtr;
Expand All @@ -1770,7 +1824,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewBeginOffset),
NewAllocaEndOffset(NewEndOffset),
VecTy(), ElementTy(), ElementSize(),
VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
BeginOffset(), EndOffset() {
}

Expand All @@ -1786,6 +1840,9 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
ElementSize = VecTy->getScalarSizeInBits() / 8;
} else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
P, I, E)) {
IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
}
bool CanSROA = true;
for (; I != E; ++I) {
Expand Down Expand Up @@ -1830,6 +1887,43 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
return IRB.getInt32(Index);
}

Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
uint64_t Offset) {
assert(IntPromotionTy && "Alloca is not an integer we can extract from");
Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
if (RelOffset)
V = IRB.CreateLShr(V, RelOffset*8, getName(".shift"));
if (TargetTy != IntPromotionTy) {
assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
"Cannot extract to a larger integer!");
V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
}
return V;
}

StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
IntegerType *Ty = cast<IntegerType>(V->getType());
if (Ty == IntPromotionTy)
return IRB.CreateStore(V, &NewAI);

assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
"Cannot insert a larger integer!");
V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
if (RelOffset)
V = IRB.CreateShl(V, RelOffset*8, getName(".shift"));

APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
.shl(RelOffset*8);
Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
Mask, getName(".mask"));
return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
&NewAI);
}

void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
Expand Down Expand Up @@ -1865,6 +1959,16 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
return true;
}

bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
assert(!LI.isVolatile());
Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
BeginOffset);
LI.replaceAllUsesWith(Result);
Pass.DeadInsts.push_back(&LI);
DEBUG(dbgs() << " to: " << *Result << "\n");
return true;
}

bool visitLoadInst(LoadInst &LI) {
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
Expand All @@ -1873,6 +1977,8 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,

if (VecTy)
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
if (IntPromotionTy)
return rewriteIntegerLoad(IRB, LI);

Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
Expand Down Expand Up @@ -1904,6 +2010,15 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
return true;
}

bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
assert(!SI.isVolatile());
StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
Pass.DeadInsts.push_back(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}

bool visitStoreInst(StoreInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
Expand All @@ -1912,6 +2027,8 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,

if (VecTy)
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
if (IntPromotionTy)
return rewriteIntegerStore(IRB, SI);

Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
Expand Down
63 changes: 43 additions & 20 deletions test/Transforms/SROA/basictest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -553,30 +553,53 @@ bad:
ret i32 %Z2
}

define i32 @test12() {
; CHECK: @test12
; CHECK: alloca i24
;
; FIXME: SROA should promote accesses to this into whole i24 operations instead
; of i8 operations.
; CHECK: store i8 0
; CHECK: store i8 0
; CHECK: store i8 0
define i8 @test12() {
; We fully promote these to the i24 load or store size, resulting in just masks
; and other operations that instcombine will fold, but no alloca.
;
; CHECK: load i24*
; CHECK: @test12

entry:
%a = alloca [3 x i8]
%b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
store i8 0, i8* %b0ptr
%b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
store i8 0, i8* %b1ptr
%b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
store i8 0, i8* %b2ptr
%iptr = bitcast [3 x i8]* %a to i24*
%i = load i24* %iptr
%ret = zext i24 %i to i32
ret i32 %ret
%b = alloca [3 x i8]
; CHECK-NOT: alloca

%a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
store i8 0, i8* %a0ptr
%a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
store i8 0, i8* %a1ptr
%a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
store i8 0, i8* %a2ptr
%aiptr = bitcast [3 x i8]* %a to i24*
%ai = load i24* %aiptr
; CHCEK-NOT: store
; CHCEK-NOT: load
; CHECK: %[[mask0:.*]] = and i24 undef, -256
; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535

%biptr = bitcast [3 x i8]* %b to i24*
store i24 %ai, i24* %biptr
%b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
%b0 = load i8* %b0ptr
%b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
%b1 = load i8* %b1ptr
%b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
%b2 = load i8* %b2ptr
; CHCEK-NOT: store
; CHCEK-NOT: load
; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8

%bsum0 = add i8 %b0, %b1
%bsum1 = add i8 %bsum0, %b2
ret i8 %bsum1
; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
; CHECK-NEXT: ret i8 %[[sum1]]
}

define i32 @test13() {
Expand Down

0 comments on commit bc4021f

Please sign in to comment.