Skip to content

Commit

Permalink
rewrite the memset_pattern pattern generation stuff to accept any 2/4…
Browse files Browse the repository at this point in the history
…/8/16-byte

constant, including globals.  This makes us generate much more "pretty" pattern
globals as well because it doesn't break it down to an array of bytes all the
time.

This enables us to handle stores of relocatable globals.  This kicks in about
48 times in 254.gap, giving us stuff like this:

@.memset_pattern40 = internal constant [2 x %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)*] [%struct.TypHeader* (%struct.TypHeader*, %struct
.TypHeader*)* @isFalse, %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)* @isFalse], align 16

...
  call void @memset_pattern16(i8* %scevgep5859, i8* bitcast ([2 x %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)*]* @.memset_pattern40 to i8*
), i64 %tmp75) nounwind



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126044 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
lattner committed Feb 19, 2011
1 parent 41bfbb0 commit 80e8b50
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 34 deletions.
44 changes: 12 additions & 32 deletions lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,43 +388,24 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
return 0;

// Convert the constant to an integer type of the appropriate size so we can
// start hacking on it.
if (isa<PointerType>(V->getType()))
C = ConstantExpr::getPtrToInt(C, IntegerType::get(C->getContext(), Size));
else if (isa<VectorType>(V->getType()) || V->getType()->isFloatingPointTy())
C = ConstantExpr::getBitCast(C, IntegerType::get(C->getContext(), Size));
else if (!isa<IntegerType>(V->getType()))
return 0; // Unhandled type.
// Don't care enough about darwin/ppc to implement this.
if (TD.isBigEndian())
return 0;

// Convert to size in bytes.
Size /= 8;

// If we couldn't fold this to an integer, we fail. We don't bother to handle
// relocatable expressions like the address of a global yet.
// FIXME!
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (CI == 0) return 0;

APInt CVal = CI->getValue();

// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same.
// if the top and bottom are the same (e.g. for vectors and large integers).
if (Size > 16) return 0;

// If this is a big endian target (PPC) then we need to bswap.
if (TD.isBigEndian())
CVal = CVal.byteSwap();

// Determine what each byte of the pattern value should be.
char Value[16];
for (unsigned i = 0; i != 16; ++i) {
// Get the byte value we're indexing into.
unsigned CByte = i % Size;
Value[i] = (unsigned char)(CVal.getZExtValue() >> CByte);
}

return ConstantArray::get(V->getContext(), StringRef(Value, 16), false);
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;

// Otherwise, we'll use an array of the constants.
unsigned ArraySize = 16/Size;
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
}


Expand Down Expand Up @@ -518,8 +499,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
Value *PatternPtr = Builder.CreateConstInBoundsGEP2_32(GV, 0, 0, "pattern");

Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}

Expand Down
29 changes: 27 additions & 2 deletions test/Transforms/LoopIdiom/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ for.end13: ; preds = %for.inc10
; On darwin10 (which is the triple in this .ll file) this loop can be turned
; into a memset_pattern call.
; rdar://9009151
define void @test11(i32* nocapture %P) nounwind ssp {
define void @test11_pattern(i32* nocapture %P) nounwind ssp {
entry:
br label %for.body

Expand All @@ -291,7 +291,7 @@ for.body: ; preds = %entry, %for.body

for.end: ; preds = %for.body
ret void
; CHECK: @test11
; CHECK: @test11_pattern
; CHECK-NEXT: entry:
; CHECK-NEXT: bitcast
; CHECK-NEXT: memset_pattern
Expand Down Expand Up @@ -322,3 +322,28 @@ for.end: ; preds = %for.body
; CHECK: ret void
}

@G = global i32 5

; This store-of-address loop can be turned into a memset_pattern call.
; rdar://9009151
define void @test13_pattern(i32** nocapture %P) nounwind ssp {
entry:
br label %for.body

for.body: ; preds = %entry, %for.body
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
%arrayidx = getelementptr i32** %P, i64 %indvar
store i32* @G, i32** %arrayidx, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10000
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body
ret void
; CHECK: @test13_pattern
; CHECK-NEXT: entry:
; CHECK-NEXT: bitcast
; CHECK-NEXT: memset_pattern
; CHECK-NOT: store
; CHECK: ret void
}

0 comments on commit 80e8b50

Please sign in to comment.