Skip to content

Commit

Permalink
ProfileData: Add support for the indexed instrprof format
Browse files Browse the repository at this point in the history
This adds support for an indexed instrumentation based profiling
format, which is just a small header and an on disk hash table.  This
format will be used by clang's -fprofile-instr-use= for PGO.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206656 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
bogner committed Apr 18, 2014
1 parent 4c464de commit e153fb3
Show file tree
Hide file tree
Showing 10 changed files with 377 additions and 35 deletions.
1 change: 1 addition & 0 deletions include/llvm/ProfileData/InstrProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct instrprof_error {
bad_magic,
bad_header,
unsupported_version,
unsupported_hash_type,
too_large,
truncated,
malformed,
Expand Down
107 changes: 106 additions & 1 deletion include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
#define LLVM_PROFILEDATA_INSTRPROF_READER_H_

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/OnDiskHashTable.h"

#include <iterator>

Expand All @@ -29,6 +31,9 @@ class InstrProfReader;

/// Profiling information for a single function.
struct InstrProfRecord {
InstrProfRecord() {}
InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
: Name(Name), Hash(Hash), Counts(Counts) {}
StringRef Name;
uint64_t Hash;
ArrayRef<uint64_t> Counts;
Expand Down Expand Up @@ -191,6 +196,106 @@ class RawInstrProfReader : public InstrProfReader {
typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;

namespace IndexedInstrProf {
enum class HashT : uint32_t;
uint64_t ComputeHash(HashT Type, StringRef K);
}

/// Trait for lookups into the on-disk hash table for the binary instrprof
/// format.
class InstrProfLookupTrait {
std::vector<uint64_t> CountBuffer;
IndexedInstrProf::HashT HashType;
public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}

typedef InstrProfRecord data_type;
typedef StringRef internal_key_type;
typedef StringRef external_key_type;
typedef uint64_t hash_value_type;
typedef uint64_t offset_type;

static bool EqualKey(StringRef A, StringRef B) { return A == B; }
static StringRef GetInternalKey(StringRef K) { return K; }

hash_value_type ComputeHash(StringRef K) {
return IndexedInstrProf::ComputeHash(HashType, K);
}

static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char *&D) {
using namespace support;
return std::make_pair(endian::readNext<offset_type, little, unaligned>(D),
endian::readNext<offset_type, little, unaligned>(D));
}

StringRef ReadKey(const unsigned char *D, unsigned N) {
return StringRef((const char *)D, N);
}

InstrProfRecord ReadData(StringRef K, const unsigned char *D, unsigned N) {
if (N < 2 * sizeof(uint64_t) || N % sizeof(uint64_t)) {
// The data is corrupt, don't try to read it.
CountBuffer.clear();
return InstrProfRecord("", 0, CountBuffer);
}

using namespace support;

// The first stored value is the hash.
uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
// Each counter follows.
unsigned NumCounters = N / sizeof(uint64_t) - 1;
CountBuffer.clear();
CountBuffer.reserve(NumCounters - 1);
for (unsigned I = 0; I < NumCounters; ++I)
CountBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));

return InstrProfRecord(K, Hash, CountBuffer);
}
};
typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
InstrProfReaderIndex;

/// Reader for the indexed binary instrprof format.
class IndexedInstrProfReader : public InstrProfReader {
private:
/// The profile data file contents.
std::unique_ptr<MemoryBuffer> DataBuffer;
/// The index into the profile data.
std::unique_ptr<InstrProfReaderIndex> Index;
/// Iterator over the profile data.
InstrProfReaderIndex::data_iterator RecordIterator;
/// The maximal execution count among all fucntions.
uint64_t MaxFunctionCount;

IndexedInstrProfReader(const IndexedInstrProfReader &) LLVM_DELETED_FUNCTION;
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &)
LLVM_DELETED_FUNCTION;
public:
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
: DataBuffer(std::move(DataBuffer)), Index(nullptr),
RecordIterator(InstrProfReaderIndex::data_iterator()) {}

/// Return true if the given buffer is in an indexed instrprof format.
static bool hasFormat(const MemoryBuffer &DataBuffer);

/// Read the file header.
error_code readHeader() override;
/// Read a single record.
error_code readNextRecord(InstrProfRecord &Record) override;

/// Fill Counts with the profile data for the given function name.
error_code getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
std::vector<uint64_t> &Counts);
/// Return the maximum of all known function counts.
uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }

/// Factory method to create an indexed reader.
static error_code create(std::string Path,
std::unique_ptr<IndexedInstrProfReader> &Result);
};

} // end namespace llvm

#endif // LLVM_PROFILEDATA_INSTRPROF_READER_H_
2 changes: 1 addition & 1 deletion include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class InstrProfWriter {
error_code addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash,
ArrayRef<uint64_t> Counters);
/// Ensure that all data is written to disk.
void write(raw_ostream &OS);
void write(raw_fd_ostream &OS);
};

} // end namespace llvm
Expand Down
2 changes: 2 additions & 0 deletions lib/ProfileData/InstrProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category {
return "Invalid header";
case instrprof_error::unsupported_version:
return "Unsupported format version";
case instrprof_error::unsupported_hash_type:
return "Unsupported hash function";
case instrprof_error::too_large:
return "Too much profile data";
case instrprof_error::truncated:
Expand Down
54 changes: 54 additions & 0 deletions lib/ProfileData/InstrProfIndexed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Shared header for the instrumented profile data reader and writer.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_

#include "llvm/Support/MD5.h"

namespace llvm {

namespace IndexedInstrProf {
enum class HashT : uint32_t {
MD5,

Last = MD5
};

static inline uint64_t MD5Hash(StringRef Str) {
MD5 Hash;
Hash.update(Str);
llvm::MD5::MD5Result Result;
Hash.final(Result);
// Return the least significant 8 bytes. Our MD5 implementation returns the
// result in little endian, so we may need to swap bytes.
using namespace llvm::support;
return endian::read<uint64_t, little, unaligned>(Result);
}

uint64_t ComputeHash(HashT Type, StringRef K) {
switch (Type) {
case HashT::MD5:
return IndexedInstrProf::MD5Hash(K);
}
llvm_unreachable("Unhandled hash type");
}

const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
const uint64_t Version = 1;
const HashT HashType = HashT::MD5;
}

} // end namespace llvm

#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
118 changes: 112 additions & 6 deletions lib/ProfileData/InstrProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,62 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProf.h"

#include "InstrProfIndexed.h"

#include <cassert>

using namespace llvm;

error_code InstrProfReader::create(std::string Path,
std::unique_ptr<InstrProfReader> &Result) {
std::unique_ptr<MemoryBuffer> Buffer;
static error_code setupMemoryBuffer(std::string Path,
std::unique_ptr<MemoryBuffer> &Buffer) {
if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
return EC;

// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
return instrprof_error::success;
}

static error_code initializeReader(InstrProfReader &Reader) {
return Reader.readHeader();
}

error_code InstrProfReader::create(std::string Path,
std::unique_ptr<InstrProfReader> &Result) {
// Set up the buffer to read.
std::unique_ptr<MemoryBuffer> Buffer;
if (error_code EC = setupMemoryBuffer(Path, Buffer))
return EC;

// Create the reader.
if (RawInstrProfReader64::hasFormat(*Buffer))
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
else if (RawInstrProfReader64::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
else if (RawInstrProfReader32::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
else
Result.reset(new TextInstrProfReader(std::move(Buffer)));

// Read the header and return the result.
return Result->readHeader();
// Initialize the reader and return the result.
return initializeReader(*Result);
}

error_code IndexedInstrProfReader::create(
std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
// Set up the buffer to read.
std::unique_ptr<MemoryBuffer> Buffer;
if (error_code EC = setupMemoryBuffer(Path, Buffer))
return EC;

// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return instrprof_error::bad_magic;
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));

// Initialize the reader and return the result.
return initializeReader(*Result);
}

void InstrProfIterator::Increment() {
Expand Down Expand Up @@ -210,3 +242,77 @@ namespace llvm {
template class RawInstrProfReader<uint32_t>;
template class RawInstrProfReader<uint64_t>;
}

bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
if (DataBuffer.getBufferSize() < 8)
return false;
using namespace support;
uint64_t Magic =
endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
return Magic == IndexedInstrProf::Magic;
}

error_code IndexedInstrProfReader::readHeader() {
const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart();
const unsigned char *Cur = Start;
if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
return error(instrprof_error::truncated);

using namespace support;

// Check the magic number.
uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
if (Magic != IndexedInstrProf::Magic)
return error(instrprof_error::bad_magic);

// Read the version.
uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
if (Version != IndexedInstrProf::Version)
return error(instrprof_error::unsupported_version);

// Read the maximal function count.
MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);

// Read the hash type and start offset.
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
endian::readNext<uint64_t, little, unaligned>(Cur));
if (HashType > IndexedInstrProf::HashT::Last)
return error(instrprof_error::unsupported_hash_type);
uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);

// The rest of the file is an on disk hash table.
Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
InstrProfLookupTrait(HashType)));
// Set up our iterator for readNextRecord.
RecordIterator = Index->data_begin();

return success();
}

error_code IndexedInstrProfReader::getFunctionCounts(
StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
const auto &Iter = Index->find(FuncName);
if (Iter == Index->end())
return error(instrprof_error::unknown_function);

// Found it. Make sure it's valid before giving back a result.
const InstrProfRecord &Record = *Iter;
if (Record.Name.empty())
return error(instrprof_error::malformed);
FuncHash = Record.Hash;
Counts = Record.Counts;
return success();
}

error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Are we out of records?
if (RecordIterator == Index->data_end())
return error(instrprof_error::eof);

// Read the next one.
Record = *RecordIterator;
++RecordIterator;
if (Record.Name.empty())
return error(instrprof_error::malformed);
return success();
}
Loading

0 comments on commit e153fb3

Please sign in to comment.