Re-apply "InstrProf: When reading, copy the data instead of taking a …

…reference. NFC" This version fixes a missing include that MSVC noticed and clarifies the ownership of the counter buffer that's passed to InstrProfRecord. This restores r240206, which was reverted in r240208. Patch by Betul Buyukkurt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240360 91177308-0d34-0410-b5e6-96231b3b80d8
CIB · Jun 22, 2015 · b06f3b4 · b06f3b4
1 parent fe3176f
commit b06f3b4
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 87 deletions.
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
@@ -16,7 +16,10 @@
 #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
 #define LLVM_PROFILEDATA_INSTRPROF_H_
 
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
 #include <system_error>
+#include <vector>
 
 namespace llvm {
 const std::error_category &instrprof_category();
@@ -41,6 +44,16 @@ inline std::error_code make_error_code(instrprof_error E) {
   return std::error_code(static_cast<int>(E), instrprof_category());
 }
 
+/// Profiling information for a single function.
+struct InstrProfRecord {
+  InstrProfRecord() {}
+  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
+      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
+  StringRef Name;
+  uint64_t Hash;
+  std::vector<uint64_t> Counts;
+};
+
 } // end namespace llvm
 
 namespace std {

diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
@@ -29,16 +29,6 @@ namespace llvm {
 
 class InstrProfReader;
 
-/// Profiling information for a single function.
-struct InstrProfRecord {
-  InstrProfRecord() {}
-  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
-      : Name(Name), Hash(Hash), Counts(Counts) {}
-  StringRef Name;
-  uint64_t Hash;
-  ArrayRef<uint64_t> Counts;
-};
-
 /// A file format agnostic iterator over profiling data.
 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
                                                InstrProfRecord> {
@@ -114,8 +104,6 @@ class TextInstrProfReader : public InstrProfReader {
   std::unique_ptr<MemoryBuffer> DataBuffer;
   /// Iterator over the profile data.
   line_iterator Line;
-  /// The current set of counter values.
-  std::vector<uint64_t> Counts;
 
   TextInstrProfReader(const TextInstrProfReader &) = delete;
   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
@@ -141,8 +129,6 @@ class RawInstrProfReader : public InstrProfReader {
 private:
   /// The profile data file contents.
   std::unique_ptr<MemoryBuffer> DataBuffer;
-  /// The current set of counter values.
-  std::vector<uint64_t> Counts;
   struct ProfileData {
     const uint32_t NameSize;
     const uint32_t NumCounters;
@@ -206,17 +192,16 @@ enum class HashT : uint32_t;
 /// Trait for lookups into the on-disk hash table for the binary instrprof
 /// format.
 class InstrProfLookupTrait {
-  std::vector<uint64_t> DataBuffer;
+  std::vector<InstrProfRecord> DataBuffer;
   IndexedInstrProf::HashT HashType;
+  unsigned FormatVersion;
+
 public:
-  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
+  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
+      : HashType(HashType), FormatVersion(FormatVersion) {}
+
+  typedef ArrayRef<InstrProfRecord> data_type;
 
-  struct data_type {
-    data_type(StringRef Name, ArrayRef<uint64_t> Data)
-        : Name(Name), Data(Data) {}
-    StringRef Name;
-    ArrayRef<uint64_t> Data;
-  };
   typedef StringRef internal_key_type;
   typedef StringRef external_key_type;
   typedef uint64_t hash_value_type;
@@ -239,22 +224,9 @@ class InstrProfLookupTrait {
     return StringRef((const char *)D, N);
   }
 
-  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
-    DataBuffer.clear();
-    if (N % sizeof(uint64_t))
-      // The data is corrupt, don't try to read it.
-      return data_type("", DataBuffer);
-
-    using namespace support;
-    // We just treat the data as opaque here. It's simpler to handle in
-    // IndexedInstrProfReader.
-    unsigned NumEntries = N / sizeof(uint64_t);
-    DataBuffer.reserve(NumEntries);
-    for (unsigned I = 0; I < NumEntries; ++I)
-      DataBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
-    return data_type(K, DataBuffer);
-  }
+  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
 };
+
 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
     InstrProfReaderIndex;
 
@@ -267,8 +239,6 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<InstrProfReaderIndex> Index;
   /// Iterator over the profile data.
   InstrProfReaderIndex::data_iterator RecordIterator;
-  /// Offset into our current data set.
-  size_t CurrentOffset;
   /// The file format version of the profile data.
   uint64_t FormatVersion;
   /// The maximal execution count among all functions.
@@ -278,7 +248,7 @@ class IndexedInstrProfReader : public InstrProfReader {
   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
 public:
   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {}
+      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
 
   /// Return true if the given buffer is in an indexed instrprof format.
   static bool hasFormat(const MemoryBuffer &DataBuffer);

diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
@@ -15,7 +15,6 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "InstrProfIndexed.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ProfileData/InstrProf.h"
 #include <cassert>
 
 using namespace llvm;
@@ -126,18 +125,16 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
     return error(instrprof_error::malformed);
 
   // Read each counter and fill our internal storage with the values.
-  Counts.clear();
-  Counts.reserve(NumCounters);
+  Record.Counts.clear();
+  Record.Counts.reserve(NumCounters);
   for (uint64_t I = 0; I < NumCounters; ++I) {
     if (Line.is_at_end())
       return error(instrprof_error::truncated);
     uint64_t Count;
     if ((Line++)->getAsInteger(10, Count))
       return error(instrprof_error::malformed);
-    Counts.push_back(Count);
+    Record.Counts.push_back(Count);
   }
-  // Give the record a reference to our internal counter storage.
-  Record.Counts = Counts;
 
   return success();
 }
@@ -280,11 +277,10 @@ RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
   Record.Hash = swap(Data->FuncHash);
   Record.Name = RawName;
   if (ShouldSwapBytes) {
-    Counts.clear();
-    Counts.reserve(RawCounts.size());
+    Record.Counts.clear();
+    Record.Counts.reserve(RawCounts.size());
     for (uint64_t Count : RawCounts)
-      Counts.push_back(swap(Count));
-    Record.Counts = Counts;
+      Record.Counts.push_back(swap(Count));
   } else
     Record.Counts = RawCounts;
 
@@ -303,6 +299,49 @@ InstrProfLookupTrait::ComputeHash(StringRef K) {
   return IndexedInstrProf::ComputeHash(HashType, K);
 }
 
+typedef InstrProfLookupTrait::data_type data_type;
+typedef InstrProfLookupTrait::offset_type offset_type;
+
+data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
+                                         offset_type N) {
+
+  // Check if the data is corrupt. If so, don't try to read it.
+  if (N % sizeof(uint64_t))
+    return data_type();
+
+  DataBuffer.clear();
+  uint64_t NumCounts;
+  uint64_t NumEntries = N / sizeof(uint64_t);
+  std::vector<uint64_t> CounterBuffer;
+  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
+    using namespace support;
+    // The function hash comes first.
+    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
+
+    if (++I >= NumEntries)
+      return data_type();
+
+    // In v1, we have at least one count.
+    // Later, we have the number of counts.
+    NumCounts = (1 == FormatVersion)
+                    ? NumEntries - I
+                    : endian::readNext<uint64_t, little, unaligned>(D);
+    if (1 != FormatVersion)
+      ++I;
+
+    // If we have more counts than data, this is bogus.
+    if (I + NumCounts > NumEntries)
+      return data_type();
+
+    CounterBuffer.clear();
+    for (unsigned J = 0; J < NumCounts; ++J)
+      CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
+
+    DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
+  }
+  return DataBuffer;
+}
+
 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
   if (DataBuffer.getBufferSize() < 8)
     return false;
@@ -342,8 +381,9 @@ std::error_code IndexedInstrProfReader::readHeader() {
   uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
 
   // The rest of the file is an on disk hash table.
-  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
-                                           InstrProfLookupTrait(HashType)));
+  Index.reset(InstrProfReaderIndex::Create(
+      Start + HashOffset, Cur, Start,
+      InstrProfLookupTrait(HashType, FormatVersion)));
   // Set up our iterator for readNextRecord.
   RecordIterator = Index->data_begin();
 
@@ -357,21 +397,14 @@ std::error_code IndexedInstrProfReader::getFunctionCounts(
     return error(instrprof_error::unknown_function);
 
   // Found it. Look for counters with the right hash.
-  ArrayRef<uint64_t> Data = (*Iter).Data;
-  uint64_t NumCounts;
-  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
-    // The function hash comes first.
-    uint64_t FoundHash = Data[I++];
-    // In v1, we have at least one count. Later, we have the number of counts.
-    if (I == E)
-      return error(instrprof_error::malformed);
-    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
-    // If we have more counts than data, this is bogus.
-    if (I + NumCounts > E)
-      return error(instrprof_error::malformed);
+  ArrayRef<InstrProfRecord> Data = (*Iter);
+  if (Data.empty())
+    return error(instrprof_error::malformed);
+
+  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
     // Check for a match and fill the vector if there is one.
-    if (FoundHash == FuncHash) {
-      Counts = Data.slice(I, NumCounts);
+    if (Data[I].Hash == FuncHash) {
+      Counts = Data[I].Counts;
       return success();
     }
   }
@@ -384,30 +417,15 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
   if (RecordIterator == Index->data_end())
     return error(instrprof_error::eof);
 
-  // Record the current function name.
-  Record.Name = (*RecordIterator).Name;
-
-  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
-  // Valid data starts with a hash and either a count or the number of counts.
-  if (CurrentOffset + 1 > Data.size())
-    return error(instrprof_error::malformed);
-  // First we have a function hash.
-  Record.Hash = Data[CurrentOffset++];
-  // In version 1 we knew the number of counters implicitly, but in newer
-  // versions we store the number of counters next.
-  uint64_t NumCounts =
-      FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
-  if (CurrentOffset + NumCounts > Data.size())
+  if ((*RecordIterator).empty())
     return error(instrprof_error::malformed);
-  // And finally the counts themselves.
-  Record.Counts = Data.slice(CurrentOffset, NumCounts);
 
-  // If we've exhausted this function's data, increment the record.
-  CurrentOffset += NumCounts;
-  if (CurrentOffset == Data.size()) {
+  static unsigned RecordIndex = 0;
+  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
+  Record = Data[RecordIndex++];
+  if (RecordIndex >= Data.size()) {
     ++RecordIterator;
-    CurrentOffset = 0;
+    RecordIndex = 0;
   }
-
   return success();
 }