Skip to content

Commit

Permalink
[PDB] Don't build the entire source file list up front.
Browse files Browse the repository at this point in the history
I tried to run llvm-pdbdump on a very large (~1.5GB) PDB to
try and identify show-stopping performance problems.  This
patch addresses the first such problem.

When loading the DBI stream, before anyone has even tried to
access a single record, we build an in memory map of every
source file for every module.  In the particular PDB I was
using, this was over 85 million files.  Specifically, the
complexity is O(m*n) where m is the number of modules and
n is the average number of source files (including headers)
per module.

The whole reason for doing this was so that we could have
constant time access to any module and any of its source
file lists.  However, we can still get O(1) access to the
source file list for a given module with a simple O(m)
precomputation, and access to the list of modules is
already O(1) anyway.

So this patches reduces the O(m*n) up-front precomputation
to an O(m) one, where n is ~6,500 and n*m is about 85 million
in my pathological test case.

Differential Revision: https://reviews.llvm.org/D32870

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302205 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Zachary Turner committed May 4, 2017
1 parent b18693c commit 505c76a
Show file tree
Hide file tree
Showing 16 changed files with 473 additions and 181 deletions.
8 changes: 0 additions & 8 deletions include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,6 @@ class DbiModuleDescriptor {
const ModuleInfoHeader *Layout = nullptr;
};

struct ModuleInfoEx {
ModuleInfoEx(const DbiModuleDescriptor &Info) : Info(Info) {}
ModuleInfoEx(const ModuleInfoEx &Ex) = default;

DbiModuleDescriptor Info;
std::vector<StringRef> SourceFiles;
};

} // end namespace pdb

template <> struct VarStreamArrayExtractor<pdb::DbiModuleDescriptor> {
Expand Down
114 changes: 114 additions & 0 deletions include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <vector>

namespace llvm {
namespace pdb {

class DbiModuleList;
struct FileInfoSubstreamHeader;

class DbiModuleSourceFilesIterator
: public iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef> {
typedef iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef>
BaseType;

public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
uint16_t Filei);
DbiModuleSourceFilesIterator() = default;
DbiModuleSourceFilesIterator &
operator=(const DbiModuleSourceFilesIterator &R) = default;

bool operator==(const DbiModuleSourceFilesIterator &R) const;

const StringRef &operator*() const { return ThisValue; }
StringRef &operator*() { return ThisValue; }

bool operator<(const DbiModuleSourceFilesIterator &RHS) const;
std::ptrdiff_t operator-(const DbiModuleSourceFilesIterator &R) const;
DbiModuleSourceFilesIterator &operator+=(std::ptrdiff_t N);
DbiModuleSourceFilesIterator &operator-=(std::ptrdiff_t N);

private:
void setValue();

bool isEnd() const;
bool isCompatible(const DbiModuleSourceFilesIterator &R) const;
bool isUniversalEnd() const;

StringRef ThisValue;
const DbiModuleList *Modules{nullptr};
uint32_t Modi{0};
uint16_t Filei{0};
};

class DbiModuleList {
friend DbiModuleSourceFilesIterator;

public:
Error initialize(BinaryStreamRef ModInfo, BinaryStreamRef FileInfo);

Expected<StringRef> getFileName(uint32_t Index) const;
uint32_t getModuleCount() const;
uint32_t getSourceFileCount() const;
uint16_t getSourceFileCount(uint32_t Modi) const;

iterator_range<DbiModuleSourceFilesIterator>
source_files(uint32_t Modi) const;

DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const;

private:
Error initializeModInfo(BinaryStreamRef ModInfo);
Error initializeFileInfo(BinaryStreamRef FileInfo);

VarStreamArray<DbiModuleDescriptor> Descriptors;

FixedStreamArray<support::little32_t> FileNameOffsets;
FixedStreamArray<support::ulittle16_t> ModFileCountArray;

// For each module, there are multiple filenames, which can be obtained by
// knowing the index of the file. Given the index of the file, one can use
// that as an offset into the FileNameOffsets array, which contains the
// absolute offset of the file name in NamesBuffer. Thus, for each module
// we store the first index in the FileNameOffsets array for this module.
// The number of files for the corresponding module is stored in
// ModFileCountArray.
std::vector<uint32_t> ModuleInitialFileIndex;

// In order to provide random access into the Descriptors array, we iterate it
// once up front to find the offsets of the individual items and store them in
// this array.
std::vector<uint32_t> ModuleDescriptorOffsets;

const FileInfoSubstreamHeader *FileInfoHeader = nullptr;

BinaryStreamRef ModInfoSubstream;
BinaryStreamRef FileInfoSubstream;
BinaryStreamRef NamesBuffer;
};
}
}

#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
13 changes: 3 additions & 10 deletions include/llvm/DebugInfo/PDB/Native/DbiStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
Expand Down Expand Up @@ -68,9 +69,7 @@ class DbiStream {
/// not present, returns InvalidStreamIndex.
uint32_t getDebugStreamIndex(DbgHeaderType Type) const;

ArrayRef<ModuleInfoEx> modules() const;

Expected<StringRef> getFileNameForIndex(uint32_t Index) const;
const DbiModuleList &modules() const;

FixedStreamArray<object::coff_section> getSectionHeaders();

Expand All @@ -80,35 +79,29 @@ class DbiStream {
void visitSectionContributions(ISectionContribVisitor &Visitor) const;

private:
Error initializeModInfoArray();
Error initializeSectionContributionData();
Error initializeSectionHeadersData();
Error initializeSectionMapData();
Error initializeFileInfo();
Error initializeFpoRecords();

PDBFile &Pdb;
std::unique_ptr<msf::MappedBlockStream> Stream;

std::vector<ModuleInfoEx> ModuleInfos;
PDBStringTable ECNames;

BinaryStreamRef ModInfoSubstream;
BinaryStreamRef SecContrSubstream;
BinaryStreamRef SecMapSubstream;
BinaryStreamRef FileInfoSubstream;
BinaryStreamRef TypeServerMapSubstream;
BinaryStreamRef ECSubstream;

BinaryStreamRef NamesBuffer;
DbiModuleList Modules;

FixedStreamArray<support::ulittle16_t> DbgStreams;

PdbRaw_DbiSecContribVer SectionContribVersion;
FixedStreamArray<SectionContrib> SectionContribs;
FixedStreamArray<SectionContrib2> SectionContribs2;
FixedStreamArray<SecMapEntry> SectionMap;
FixedStreamArray<support::little32_t> FileNameOffsets;

std::unique_ptr<msf::MappedBlockStream> SectionHeaderStream;
FixedStreamArray<object::coff_section> SectionHeaders;
Expand Down
4 changes: 2 additions & 2 deletions include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ namespace pdb {

class NativeCompilandSymbol : public NativeRawSymbol {
public:
NativeCompilandSymbol(NativeSession &Session, const ModuleInfoEx &MI);
NativeCompilandSymbol(NativeSession &Session, DbiModuleDescriptor MI);
PDB_SymType getSymTag() const override;
bool isEditAndContinueEnabled() const override;
uint32_t getLexicalParentId() const override;
std::string getLibraryName() const override;
std::string getName() const override;

private:
ModuleInfoEx Module;
DbiModuleDescriptor Module;
};

} // namespace pdb
Expand Down
8 changes: 4 additions & 4 deletions include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
namespace llvm {
namespace pdb {

class DbiModuleList;
class NativeSession;

class NativeEnumModules : public IPDBEnumChildren<PDBSymbol> {
public:
explicit NativeEnumModules(NativeSession &Session,
ArrayRef<ModuleInfoEx> Modules,
uint32_t Index = 0);
NativeEnumModules(NativeSession &Session, const DbiModuleList &Modules,
uint32_t Index = 0);

uint32_t getChildCount() const override;
std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
Expand All @@ -32,7 +32,7 @@ class NativeEnumModules : public IPDBEnumChildren<PDBSymbol> {

private:
NativeSession &Session;
ArrayRef<ModuleInfoEx> Modules;
const DbiModuleList &Modules;
uint32_t Index;
};
}
Expand Down
11 changes: 7 additions & 4 deletions include/llvm/DebugInfo/PDB/Native/RawTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ struct ModInfoFlags {
};

/// The header preceeding each entry in the Module Info substream of the DBI
/// stream.
/// stream. Corresponds to the type MODI in the reference implementation.
struct ModuleInfoHeader {
/// Currently opened module. This field is a pointer in the reference
/// implementation, but that won't work on 64-bit systems, and anyway it
Expand Down Expand Up @@ -243,9 +243,12 @@ struct ModuleInfoHeader {
/// Padding so the next field is 4-byte aligned.
char Padding1[2];

/// Array of [0..NumFiles) DBI name buffer offsets. This field is a pointer
/// in the reference implementation, but as with `Mod`, we ignore it for now
/// since it is unused.
/// Array of [0..NumFiles) DBI name buffer offsets. In the reference
/// implementation this field is a pointer. But since you can't portably
/// serialize a pointer, on 64-bit platforms they copy all the values except
/// this one into the 32-bit version of the struct and use that for
/// serialization. Regardless, this field is unused, it is only there to
/// store a pointer that can be accessed at runtime.
support::ulittle32_t FileNameOffs;

/// Name Index for src file name
Expand Down
4 changes: 4 additions & 0 deletions include/llvm/Support/BinaryStreamArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class VarStreamArrayIterator
for (unsigned I = 0; I < N; ++I) {
// We are done with the current record, discard it so that we are
// positioned at the next record.
AbsOffset += ThisLen;
IterRef = IterRef.drop_front(ThisLen);
if (IterRef.getLength() == 0) {
// There is nothing after the current record, we must make this an end
Expand All @@ -135,6 +136,8 @@ class VarStreamArrayIterator
return *this;
}

uint32_t offset() const { return AbsOffset; }

private:
void moveToEnd() {
Array = nullptr;
Expand All @@ -152,6 +155,7 @@ class VarStreamArrayIterator
const WrappedCtx *Ctx{nullptr};
const ArrayType *Array{nullptr};
uint32_t ThisLen{0};
uint32_t AbsOffset{0};
bool HasError{false};
bool *HadError{nullptr};
};
Expand Down
1 change: 1 addition & 0 deletions lib/DebugInfo/PDB/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ endif()
add_pdb_impl_folder(Native
Native/DbiModuleDescriptor.cpp
Native/DbiModuleDescriptorBuilder.cpp
Native/DbiModuleList.cpp
Native/DbiStream.cpp
Native/DbiStreamBuilder.cpp
Native/EnumTables.cpp
Expand Down
Loading

0 comments on commit 505c76a

Please sign in to comment.