Skip to content

Commit

Permalink
Change ConstantArray::get to form a ConstantDataArray when possible,
Browse files Browse the repository at this point in the history
kicking in the big win of ConstantDataArray.  As part of this, change
the implementation of GetConstantStringInfo in ValueTracking to work
with ConstantDataArray (and not ConstantArray) making it dramatically,
amazingly, more efficient in the process and renaming it to 
getConstantStringInfo.

This keeps around a GetConstantStringInfo entrypoint that (grossly)
forwards to getConstantStringInfo and constructs the std::string 
required, but existing clients should move over to 
getConstantStringInfo instead.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149351 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
lattner committed Jan 31, 2012
1 parent 4a6facb commit 3ef9cfe
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 187 deletions.
20 changes: 10 additions & 10 deletions include/llvm/Analysis/ValueTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@

#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/DataTypes.h"
#include <string>

namespace llvm {
template <typename T> class SmallVectorImpl;
class Value;
class Instruction;
class APInt;
class TargetData;
class StringRef;

/// ComputeMaskedBits - Determine which of the bits specified in Mask are
/// known to be either zero or one and return them in the KnownZero/KnownOne
Expand Down Expand Up @@ -125,16 +124,17 @@ namespace llvm {
return GetPointerBaseWithConstantOffset(const_cast<Value*>(Ptr), Offset,TD);
}

/// GetConstantStringInfo - This function computes the length of a
/// getConstantStringInfo - This function computes the length of a
/// null-terminated C string pointed to by V. If successful, it returns true
/// and returns the string in Str. If unsuccessful, it returns false. If
/// StopAtNul is set to true (the default), the returned string is truncated
/// by a nul character in the global. If StopAtNul is false, the nul
/// character is included in the result string.
/// and returns the string in Str. If unsuccessful, it returns false. This
/// does not include the trailing nul character.
bool getConstantStringInfo(const Value *V, StringRef &Str,
uint64_t Offset = 0);

// FIXME: Remove this.
bool GetConstantStringInfo(const Value *V, std::string &Str,
uint64_t Offset = 0,
bool StopAtNul = true);

uint64_t Offset = 0);

/// GetStringLength - If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
uint64_t GetStringLength(Value *V);
Expand Down
6 changes: 3 additions & 3 deletions lib/Analysis/ConstantFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,9 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,

// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
std::string Str;
if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.length();
StringRef Str;
if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.size();
Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
// Replace load with immediate integer if the result is an integer or fp
Expand Down
187 changes: 51 additions & 136 deletions lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1369,25 +1369,21 @@ Value *llvm::isBytewiseValue(Value *V) {
}
}

// A ConstantArray is splatable if all its members are equal and also
// splatable.
if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
if (CA->getNumOperands() == 0)
return 0;

Value *Val = isBytewiseValue(CA->getOperand(0));
// A ConstantDataArray/Vector is splatable if all its members are equal and
// also splatable.
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
Value *Elt = CA->getElementAsConstant(0);
Value *Val = isBytewiseValue(Elt);
if (!Val)
return 0;

for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
if (CA->getOperand(I-1) != CA->getOperand(I))
for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
if (CA->getElementAsConstant(I) != Elt)
return 0;

return Val;
}

// FIXME: Vector types (e.g., <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>).

// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
// %b = shl i16 %a, 8
Expand Down Expand Up @@ -1607,33 +1603,29 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}


/// GetConstantStringInfo - This function computes the length of a
// FIXME: Remove this.
bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
uint64_t Offset) {
StringRef Tmp;
if (!getConstantStringInfo(V, Tmp, Offset))
return false;
Str = Tmp.str();
return true;
}

/// getConstantStringInfo - This function computes the length of a
/// null-terminated C string pointed to by V. If successful, it returns true
/// and returns the string in Str. If unsuccessful, it returns false.
bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
uint64_t Offset, bool StopAtNul) {
// If V is NULL then return false;
if (V == NULL) return false;
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
uint64_t Offset) {
assert(V);

// Look through bitcast instructions.
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);

// If the value is not a GEP instruction nor a constant expression with a
// GEP instruction, then return false because ConstantArray can't occur
// any other way.
const User *GEP = 0;
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
GEP = GEPI;
} else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::BitCast)
return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
if (CE->getOpcode() != Instruction::GetElementPtr)
return false;
GEP = CE;
}
// Look through bitcast instructions and geps.
V = V->stripPointerCasts();

if (GEP) {
// If the value is a GEP instructionor constant expression, treat it as an
// offset.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Make sure the GEP has exactly three arguments.
if (GEP->getNumOperands() != 3)
return false;
Expand All @@ -1658,51 +1650,45 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
StartIdx = CI->getZExtValue();
else
return false;
return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
StopAtNul);
return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
}

// The GEP instruction, constant or instruction, must reference a global
// variable that is a constant and is initialized. The referenced constant
// initializer is the array that we'll use for optimization.
const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return false;
const Constant *GlobalInit = GV->getInitializer();


// Handle the all-zeros case
if (GlobalInit->isNullValue()) {
if (GV->getInitializer()->isNullValue()) {
// This is a degenerate case. The initializer is constant zero so the
// length of the string must be zero.
Str.clear();
Str = "";
return true;
}

// Must be a Constant Array
const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
const ConstantDataArray *Array =
dyn_cast<ConstantDataArray>(GV->getInitializer());
if (Array == 0 || !Array->isString())
return false;

// Get the number of elements in the array
uint64_t NumElts = Array->getType()->getNumElements();

uint64_t NumElts = Array->getType()->getArrayNumElements();

// Start out with the entire array in the StringRef.
Str = Array->getAsString();

if (Offset > NumElts)
return false;

// Traverse the constant array from 'Offset' which is the place the GEP refers
// to in the array.
Str.reserve(NumElts-Offset);
for (unsigned i = Offset; i != NumElts; ++i) {
const Constant *Elt = Array->getOperand(i);
const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
if (!CI) // This array isn't suitable, non-int initializer.
return false;
if (StopAtNul && CI->isZero())
return true; // we found end of string, success!
Str += (char)CI->getZExtValue();
}

// The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
// Skip over 'offset' bytes.
Str = Str.substr(Offset);
// Trim off the \0 and anything after it. If the array is not nul terminated,
// we just return the whole end of string. The client may know some other way
// that the string is length-bound.
Str = Str.substr(0, Str.find('\0'));
return true;
}

Expand All @@ -1714,8 +1700,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
/// the specified pointer, return 'len+1'. If we can't, return 0.
static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
// Look through noop bitcast instructions.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
return GetStringLengthH(BCI->getOperand(0), PHIs);
V = V->stripPointerCasts();

// If this is a PHI node, there are two cases: either we have already seen it
// or we haven't.
Expand Down Expand Up @@ -1751,83 +1736,13 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
if (Len1 != Len2) return 0;
return Len1;
}

// As a special-case, "@string = constant i8 0" is also a string with zero
// length, not wrapped in a bitcast or GEP.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (GV->getInitializer()->isNullValue()) return 1;
return 0;
}

// If the value is not a GEP instruction nor a constant expression with a
// GEP instruction, then return unknown.
User *GEP = 0;
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
GEP = GEPI;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() != Instruction::GetElementPtr)
return 0;
GEP = CE;
} else {
return 0;
}

// Make sure the GEP has exactly three arguments.
if (GEP->getNumOperands() != 3)
return 0;

// Check to make sure that the first operand of the GEP is an integer and
// has value 0 so that we are sure we're indexing into the initializer.
if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
if (!Idx->isZero())
return 0;
} else
return 0;

// If the second index isn't a ConstantInt, then this is a variable index
// into the array. If this occurs, we can't say anything meaningful about
// the string.
uint64_t StartIdx = 0;
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
StartIdx = CI->getZExtValue();
else
return 0;

// The GEP instruction, constant or instruction, must reference a global
// variable that is a constant and is initialized. The referenced constant
// initializer is the array that we'll use for optimization.
GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
GV->mayBeOverridden())

// Otherwise, see if we can read the string.
StringRef StrData;
if (!getConstantStringInfo(V, StrData))
return 0;
Constant *GlobalInit = GV->getInitializer();

// Handle the ConstantAggregateZero case, which is a degenerate case. The
// initializer is constant zero so the length of the string must be zero.
if (isa<ConstantAggregateZero>(GlobalInit))
return 1; // Len = 0 offset by 1.

// Must be a Constant Array
ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
return false;

// Get the number of elements in the array
uint64_t NumElts = Array->getType()->getNumElements();

// Traverse the constant array from StartIdx (derived above) which is
// the place the GEP refers to in the array.
for (unsigned i = StartIdx; i != NumElts; ++i) {
Constant *Elt = Array->getOperand(i);
ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
if (!CI) // This array isn't suitable, non-int initializer.
return 0;
if (CI->isZero())
return i-StartIdx+1; // We found end of string, success!
}

return 0; // The array isn't null terminated, conservatively return 'unknown'.
return StrData.size()+1;
}

/// GetStringLength - If we can compute the length of the string pointed to by
Expand Down
Loading

0 comments on commit 3ef9cfe

Please sign in to comment.