From ff9459966486be8b7aeef514387a01acd9c87dda Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Wed, 3 May 2017 17:11:40 +0000 Subject: [PATCH] [CodeView] Use actual strings for dealing with checksums and lines. The raw CodeView format references strings by "offsets", but it's confusing what table the offset refers to. In the case of line number information, it's an offset into a buffer of records, and an indirection is required to get another offset into a different table to find the final string. And in the case of checksum information, there is no indirection, and the offset refers directly to the location of the string in another buffer. This would be less confusing if we always just referred to the strings by their value, and have the library be smart enough to correctly resolve the offsets on its own from the right location. This patch makes that possible. When either reading or writing, all the user deals with are strings, and the library does the appropriate translations behind the scenes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302053 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ModuleDebugFileChecksumFragment.h | 10 ++-- .../ModuleDebugInlineeLinesFragment.h | 13 +++-- .../CodeView/ModuleDebugLineFragment.h | 11 +++- include/llvm/DebugInfo/CodeView/StringTable.h | 3 ++ .../PDB/Native/PDBStringTableBuilder.h | 3 ++ include/llvm/Support/BinaryStreamArray.h | 2 + .../ModuleDebugFileChecksumFragment.cpp | 21 +++++--- .../ModuleDebugInlineeLinesFragment.cpp | 17 ++++-- .../CodeView/ModuleDebugLineFragment.cpp | 14 +++-- lib/DebugInfo/CodeView/StringTable.cpp | 6 +++ tools/llvm-pdbdump/llvm-pdbdump.cpp | 53 +++++-------------- 11 files changed, 87 insertions(+), 66 deletions(-) diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h index 10cea27cc92a..6c08c9aa2137 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h @@ -21,6 +21,8 @@ namespace llvm { namespace codeview { +class StringTable; + struct FileChecksumEntry { uint32_t FileNameOffset; // Byte offset of filename in global stringtable. FileChecksumKind Kind; // The type of checksum. @@ -66,20 +68,22 @@ class ModuleDebugFileChecksumFragmentRef final : public ModuleDebugFragmentRef { class ModuleDebugFileChecksumFragment final : public ModuleDebugFragment { public: - ModuleDebugFileChecksumFragment(); + explicit ModuleDebugFileChecksumFragment(StringTable &Strings); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::FileChecksums; } - void addChecksum(uint32_t StringTableOffset, FileChecksumKind Kind, + void addChecksum(StringRef FileName, FileChecksumKind Kind, ArrayRef Bytes); uint32_t calculateSerializedLength() override; Error commit(BinaryStreamWriter &Writer) override; - uint32_t mapChecksumOffset(uint32_t StringTableOffset) const; + uint32_t mapChecksumOffset(StringRef FileName) const; private: + StringTable &Strings; + DenseMap OffsetMap; uint32_t SerializedSize = 0; llvm::BumpPtrAllocator Storage; diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h index 36675f498257..da06de98ca79 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h @@ -20,6 +20,8 @@ namespace llvm { namespace codeview { class ModuleDebugInlineeLineFragmentRef; +class ModuleDebugFileChecksumFragment; +class StringTable; enum class InlineeLinesSignature : uint32_t { Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE @@ -73,7 +75,8 @@ class ModuleDebugInlineeLineFragmentRef final : public ModuleDebugFragmentRef { class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment { public: - explicit ModuleDebugInlineeLineFragment(bool HasExtraFiles); + ModuleDebugInlineeLineFragment(ModuleDebugFileChecksumFragment &Checksums, + StringTable &Strings, bool HasExtraFiles); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::InlineeLines; @@ -82,11 +85,13 @@ class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment { Error commit(BinaryStreamWriter &Writer) override; uint32_t calculateSerializedLength() override; - void addInlineSite(TypeIndex FuncId, uint32_t FileOffset, - uint32_t SourceLine); - void addExtraFile(uint32_t FileOffset); + void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine); + void addExtraFile(StringRef FileName); private: + ModuleDebugFileChecksumFragment &Checksums; + StringTable &Strings; + bool HasExtraFiles = false; uint32_t ExtraFileCount = 0; diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h index dfd8b7c2921a..b35c88e26623 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h @@ -19,6 +19,9 @@ namespace llvm { namespace codeview { +class ModuleDebugFileChecksumFragment; +class StringTable; + // Corresponds to the `CV_DebugSLinesHeader_t` structure. struct LineFragmentHeader { support::ulittle32_t RelocOffset; // Code offset of line contribution. @@ -104,13 +107,14 @@ class ModuleDebugLineFragment final : public ModuleDebugFragment { }; public: - ModuleDebugLineFragment(); + ModuleDebugLineFragment(ModuleDebugFileChecksumFragment &Checksums, + StringTable &Strings); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::Lines; } - void createBlock(uint32_t ChecksumBufferOffset); + void createBlock(StringRef FileName); void addLineInfo(uint32_t Offset, const LineInfo &Line); void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line, uint32_t ColStart, uint32_t ColEnd); @@ -125,6 +129,9 @@ class ModuleDebugLineFragment final : public ModuleDebugFragment { bool hasColumnInfo() const; private: + ModuleDebugFileChecksumFragment &Checksums; + StringTable &Strings; + uint16_t RelocOffset = 0; uint16_t RelocSegment = 0; uint32_t CodeSize = 0; diff --git a/include/llvm/DebugInfo/CodeView/StringTable.h b/include/llvm/DebugInfo/CodeView/StringTable.h index a81d0095f899..05dc02ee849f 100644 --- a/include/llvm/DebugInfo/CodeView/StringTable.h +++ b/include/llvm/DebugInfo/CodeView/StringTable.h @@ -53,6 +53,9 @@ class StringTable { // Returns the ID for S. uint32_t insert(StringRef S); + // Return the ID for string S. Assumes S exists in the table. + uint32_t getStringId(StringRef S) const; + uint32_t calculateSerializedSize() const; Error commit(BinaryStreamWriter &Writer) const; diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h index 198c35c1aa42..6f85e7a4a074 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h @@ -41,6 +41,9 @@ class PDBStringTableBuilder { uint32_t calculateSerializedSize() const; Error commit(BinaryStreamWriter &Writer) const; + codeview::StringTable &getStrings() { return Strings; } + const codeview::StringTable &getStrings() const { return Strings; } + private: uint32_t calculateHashTableSize() const; Error writeHeader(BinaryStreamWriter &Writer) const; diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 93de13597660..f141c30f16c7 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -223,6 +223,8 @@ class VarStreamArrayBase { return Iterator(*this, Ctx, Stream, HadError); } + bool valid() const { return Stream.valid(); } + Iterator end() const { return Iterator(Ctx); } bool empty() const { return Stream.getLength() == 0; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp index 87763248391b..42f0afc3e2d7 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/BinaryStreamReader.h" using namespace llvm; @@ -49,10 +50,12 @@ Error ModuleDebugFileChecksumFragmentRef::initialize( return Error::success(); } -ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums) {} +ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment( + StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums), + Strings(Strings) {} -void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, +void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, FileChecksumKind Kind, ArrayRef Bytes) { FileChecksumEntry Entry; @@ -61,13 +64,14 @@ void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, ::memcpy(Copy, Bytes.data(), Bytes.size()); Entry.Checksum = makeArrayRef(Copy, Bytes.size()); } - Entry.FileNameOffset = StringTableOffset; + + Entry.FileNameOffset = Strings.insert(FileName); Entry.Kind = Kind; Checksums.push_back(Entry); // This maps the offset of this string in the string table to the offset // of this checksum entry in the checksum buffer. - OffsetMap[StringTableOffset] = SerializedSize; + OffsetMap[Entry.FileNameOffset] = SerializedSize; assert(SerializedSize % 4 == 0); uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4); @@ -94,9 +98,10 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -uint32_t ModuleDebugFileChecksumFragment::mapChecksumOffset( - uint32_t StringTableOffset) const { - auto Iter = OffsetMap.find(StringTableOffset); +uint32_t +ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const { + uint32_t Offset = Strings.getStringId(FileName); + auto Iter = OffsetMap.find(Offset); assert(Iter != OffsetMap.end()); return Iter->second; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp index c54fb2d784a3..a908e7df28b3 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp @@ -10,7 +10,9 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; @@ -55,9 +57,10 @@ bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const { } ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment( + ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings, bool HasExtraFiles) : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines), - HasExtraFiles(HasExtraFiles) {} + Checksums(Checksums), Strings(Strings), HasExtraFiles(HasExtraFiles) {} uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { // 4 bytes for the signature @@ -100,18 +103,22 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -void ModuleDebugInlineeLineFragment::addExtraFile(uint32_t FileOffset) { +void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + auto &Entry = Entries.back(); - Entry.ExtraFiles.push_back(ulittle32_t(FileOffset)); + Entry.ExtraFiles.push_back(ulittle32_t(Offset)); ++ExtraFileCount; } void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId, - uint32_t FileOffset, + StringRef FileName, uint32_t SourceLine) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + Entries.emplace_back(); auto &Entry = Entries.back(); - Entry.Header.FileID = FileOffset; + Entry.Header.FileID = Offset; Entry.Header.SourceLineNum = SourceLine; Entry.Header.Inlinee = FuncId; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp index 103010ca2833..fdba1147f0b3 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp @@ -10,7 +10,9 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; @@ -65,11 +67,15 @@ bool ModuleDebugLineFragmentRef::hasColumnInfo() const { return !!(Header->Flags & LF_HaveColumns); } -ModuleDebugLineFragment::ModuleDebugLineFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::Lines) {} +ModuleDebugLineFragment::ModuleDebugLineFragment( + ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::Lines), Checksums(Checksums), + Strings(Strings) {} -void ModuleDebugLineFragment::createBlock(uint32_t ChecksumBufferOffset) { - Blocks.emplace_back(ChecksumBufferOffset); +void ModuleDebugLineFragment::createBlock(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Blocks.emplace_back(Offset); } void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, diff --git a/lib/DebugInfo/CodeView/StringTable.cpp b/lib/DebugInfo/CodeView/StringTable.cpp index f496854ffaf3..21f11204686b 100644 --- a/lib/DebugInfo/CodeView/StringTable.cpp +++ b/lib/DebugInfo/CodeView/StringTable.cpp @@ -63,3 +63,9 @@ Error StringTable::commit(BinaryStreamWriter &Writer) const { } uint32_t StringTable::size() const { return Strings.size(); } + +uint32_t StringTable::getStringId(StringRef S) const { + auto P = Strings.find(S); + assert(P != Strings.end()); + return P->second; +} diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp index adc739b034ac..65387afa583e 100644 --- a/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -424,21 +424,6 @@ cl::list InputFilename(cl::Positional, static ExitOnError ExitOnErr; -static uint32_t -getFileChecksumOffset(StringRef FileName, - ModuleDebugFileChecksumFragment &Checksums, - PDBStringTableBuilder &Strings) { - // The offset in the line info record is the offset of the checksum - // entry for the corresponding file. That entry then contains an - // offset into the global string table of the file name. So to - // compute the proper offset to write into the line info record, we - // must first get its offset in the global string table, then ask the - // checksum builder to find the offset in its serialized buffer that - // it mapped that filename string table offset to. - uint32_t StringOffset = Strings.insert(FileName); - return Checksums.mapChecksumOffset(StringOffset); -} - static void yamlToPdb(StringRef Path) { BumpPtrAllocator Allocator; ErrorOr> ErrorOrBuffer = @@ -490,6 +475,8 @@ static void yamlToPdb(StringRef Path) { for (auto F : Info.Features) InfoBuilder.addFeature(F); + auto &Strings = Builder.getStringTableBuilder().getStrings(); + const auto &Dbi = YamlObj.DbiStream.getValueOr(DefaultDbiStream); auto &DbiBuilder = Builder.getDbiBuilder(); DbiBuilder.setAge(Dbi.Age); @@ -516,35 +503,24 @@ static void yamlToPdb(StringRef Path) { // File Checksums must be emitted before line information, because line // info records use offsets into the checksum buffer to reference a file's // source file name. - auto Checksums = llvm::make_unique(); + auto Checksums = + llvm::make_unique(Strings); auto &ChecksumRef = *Checksums; if (!FLI.FileChecksums.empty()) { - auto &Strings = Builder.getStringTableBuilder(); - for (auto &FC : FLI.FileChecksums) { - uint32_t STOffset = Strings.insert(FC.FileName); - Checksums->addChecksum(STOffset, FC.Kind, FC.ChecksumBytes.Bytes); - } + for (auto &FC : FLI.FileChecksums) + Checksums->addChecksum(FC.FileName, FC.Kind, FC.ChecksumBytes.Bytes); } ModiBuilder.setC13FileChecksums(std::move(Checksums)); - // FIXME: StringTable / StringTableBuilder should really be in - // DebugInfoCodeView. This would allow us to construct the - // ModuleDebugLineFragment with a reference to the string table, - // and we could just pass strings around rather than having to - // remember how to calculate the right offset. - auto &Strings = Builder.getStringTableBuilder(); - for (const auto &Fragment : FLI.LineFragments) { - auto Lines = llvm::make_unique(); + auto Lines = + llvm::make_unique(ChecksumRef, Strings); Lines->setCodeSize(Fragment.CodeSize); Lines->setRelocationAddress(Fragment.RelocSegment, Fragment.RelocOffset); Lines->setFlags(Fragment.Flags); for (const auto &LC : Fragment.Blocks) { - uint32_t ChecksumOffset = - getFileChecksumOffset(LC.FileName, ChecksumRef, Strings); - - Lines->createBlock(ChecksumOffset); + Lines->createBlock(LC.FileName); if (Lines->hasColumnInfo()) { for (const auto &Item : zip(LC.Lines, LC.Columns)) { auto &L = std::get<0>(Item); @@ -567,18 +543,15 @@ static void yamlToPdb(StringRef Path) { for (const auto &Inlinee : FLI.Inlinees) { auto Inlinees = llvm::make_unique( - Inlinee.HasExtraFiles); + ChecksumRef, Strings, Inlinee.HasExtraFiles); for (const auto &Site : Inlinee.Sites) { - uint32_t FileOff = - getFileChecksumOffset(Site.FileName, ChecksumRef, Strings); - - Inlinees->addInlineSite(Site.Inlinee, FileOff, Site.SourceLineNum); + Inlinees->addInlineSite(Site.Inlinee, Site.FileName, + Site.SourceLineNum); if (!Inlinee.HasExtraFiles) continue; for (auto EF : Site.ExtraFiles) { - FileOff = getFileChecksumOffset(EF, ChecksumRef, Strings); - Inlinees->addExtraFile(FileOff); + Inlinees->addExtraFile(EF); } } ModiBuilder.addC13Fragment(std::move(Inlinees));