Skip to content

Commit

Permalink
LTO: Reduce memory consumption by creating an in-memory symbol table …
Browse files Browse the repository at this point in the history
…for InputFiles. NFCI.

Introduce symbol table data structures that can be potentially written to
disk, have the LTO library build those data structures using temporarily
constructed modules and redirect the LTO library implementation to go through
those data structures. This allows us to remove the LLVMContext and Modules
owned by InputFile.

With this change I measured a peak memory consumption decrease from 5.4GB to
2.8GB in a no-op incremental ThinLTO link of Chromium on Linux. The impact on
memory consumption is larger in COFF linkers where we are currently forced
to materialize all metadata in order to read linker options. Peak memory
consumption linking a large piece of Chromium for Windows with full LTO and
debug info decreases from >64GB (OOM) to 15GB.

Part of PR27551.

Differential Revision: https://reviews.llvm.org/D31364

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299168 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
pcc committed Mar 31, 2017
1 parent f43d588 commit 9f482fc
Show file tree
Hide file tree
Showing 6 changed files with 659 additions and 291 deletions.
207 changes: 45 additions & 162 deletions include/llvm/LTO/LTO.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/Config.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/thread.h"
Expand Down Expand Up @@ -79,192 +79,75 @@ class LTO;
struct SymbolResolution;
class ThinBackendProc;

/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the
/// An input file. This is a symbol table wrapper that only exposes the
/// information that an LTO client should need in order to do symbol resolution.
class InputFile {
public:
class Symbol;

private:
// FIXME: Remove LTO class friendship once we have bitcode symbol tables.
friend LTO;
InputFile() = default;

// FIXME: Remove the LLVMContext once we have bitcode symbol tables.
LLVMContext Ctx;
struct InputModule;
std::vector<InputModule> Mods;
ModuleSymbolTable SymTab;
std::vector<BitcodeModule> Mods;
SmallVector<char, 0> Strtab;
std::vector<Symbol> Symbols;

// [begin, end) for each module
std::vector<std::pair<size_t, size_t>> ModuleSymIndices;

std::vector<StringRef> Comdats;
DenseMap<const Comdat *, unsigned> ComdatMap;
StringRef SourceFileName, COFFLinkerOpts;
std::vector<StringRef> ComdatTable;

public:
~InputFile();

/// Create an InputFile.
static Expected<std::unique_ptr<InputFile>> create(MemoryBufferRef Object);

class symbol_iterator;

/// This is a wrapper for ArrayRef<ModuleSymbolTable::Symbol>::iterator that
/// exposes only the information that an LTO client should need in order to do
/// symbol resolution.
///
/// This object is ephemeral; it is only valid as long as an iterator obtained
/// from symbols() refers to it.
class Symbol {
friend symbol_iterator;
/// The purpose of this class is to only expose the symbol information that an
/// LTO client should need in order to do symbol resolution.
class Symbol : irsymtab::Symbol {
friend LTO;

ArrayRef<ModuleSymbolTable::Symbol>::iterator I;
const ModuleSymbolTable &SymTab;
const InputFile *File;
uint32_t Flags;
SmallString<64> Name;

bool shouldSkip() {
return !(Flags & object::BasicSymbolRef::SF_Global) ||
(Flags & object::BasicSymbolRef::SF_FormatSpecific);
}

void skip() {
ArrayRef<ModuleSymbolTable::Symbol>::iterator E = SymTab.symbols().end();
while (I != E) {
Flags = SymTab.getSymbolFlags(*I);
if (!shouldSkip())
break;
++I;
}
if (I == E)
return;

Name.clear();
{
raw_svector_ostream OS(Name);
SymTab.printSymbolName(OS, *I);
}
}

bool isGV() const { return I->is<GlobalValue *>(); }
GlobalValue *getGV() const { return I->get<GlobalValue *>(); }

public:
Symbol(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
const ModuleSymbolTable &SymTab, const InputFile *File)
: I(I), SymTab(SymTab), File(File) {
skip();
}

bool isUndefined() const {
return Flags & object::BasicSymbolRef::SF_Undefined;
}
bool isCommon() const { return Flags & object::BasicSymbolRef::SF_Common; }
bool isWeak() const { return Flags & object::BasicSymbolRef::SF_Weak; }
bool isIndirect() const {
return Flags & object::BasicSymbolRef::SF_Indirect;
}

/// For COFF weak externals, returns the name of the symbol that is used
/// as a fallback if the weak external remains undefined.
std::string getCOFFWeakExternalFallback() const {
assert((Flags & object::BasicSymbolRef::SF_Weak) &&
(Flags & object::BasicSymbolRef::SF_Indirect) &&
"symbol is not a weak external");
std::string Name;
raw_string_ostream OS(Name);
SymTab.printSymbolName(
OS,
cast<GlobalValue>(
cast<GlobalAlias>(getGV())->getAliasee()->stripPointerCasts()));
OS.flush();
return Name;
}

/// Returns the mangled name of the global.
StringRef getName() const { return Name; }

GlobalValue::VisibilityTypes getVisibility() const {
if (isGV())
return getGV()->getVisibility();
return GlobalValue::DefaultVisibility;
}
bool canBeOmittedFromSymbolTable() const {
return isGV() && llvm::canBeOmittedFromSymbolTable(getGV());
}
bool isTLS() const {
// FIXME: Expose a thread-local flag for module asm symbols.
return isGV() && getGV()->isThreadLocal();
}

// Returns the index of the comdat this symbol is in or -1 if the symbol
// is not in a comdat.
// FIXME: We have to return Expected<int> because aliases point to an
// arbitrary ConstantExpr and that might not actually be a constant. That
// means we might not be able to find what an alias is aliased to and
// so find its comdat.
Expected<int> getComdatIndex() const;

uint64_t getCommonSize() const {
assert(Flags & object::BasicSymbolRef::SF_Common);
if (!isGV())
return 0;
return getGV()->getParent()->getDataLayout().getTypeAllocSize(
getGV()->getType()->getElementType());
}
unsigned getCommonAlignment() const {
assert(Flags & object::BasicSymbolRef::SF_Common);
if (!isGV())
return 0;
return getGV()->getAlignment();
}
};

class symbol_iterator {
Symbol Sym;

public:
symbol_iterator(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
const ModuleSymbolTable &SymTab, const InputFile *File)
: Sym(I, SymTab, File) {}

symbol_iterator &operator++() {
++Sym.I;
Sym.skip();
return *this;
}

symbol_iterator operator++(int) {
symbol_iterator I = *this;
++*this;
return I;
}

const Symbol &operator*() const { return Sym; }
const Symbol *operator->() const { return &Sym; }

bool operator!=(const symbol_iterator &Other) const {
return Sym.I != Other.Sym.I;
}
Symbol(const irsymtab::Symbol &S) : irsymtab::Symbol(S) {}

using irsymtab::Symbol::isUndefined;
using irsymtab::Symbol::isCommon;
using irsymtab::Symbol::isWeak;
using irsymtab::Symbol::isIndirect;
using irsymtab::Symbol::getName;
using irsymtab::Symbol::getVisibility;
using irsymtab::Symbol::canBeOmittedFromSymbolTable;
using irsymtab::Symbol::isTLS;
using irsymtab::Symbol::getComdatIndex;
using irsymtab::Symbol::getCommonSize;
using irsymtab::Symbol::getCommonAlignment;
using irsymtab::Symbol::getCOFFWeakExternalFallback;
};

/// A range over the symbols in this InputFile.
iterator_range<symbol_iterator> symbols() {
return llvm::make_range(
symbol_iterator(SymTab.symbols().begin(), SymTab, this),
symbol_iterator(SymTab.symbols().end(), SymTab, this));
}
ArrayRef<Symbol> symbols() const { return Symbols; }

/// Returns linker options specified in the input file.
Expected<std::string> getLinkerOpts();
StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; }

/// Returns the path to the InputFile.
StringRef getName() const;

/// Returns the source file path specified at compile time.
StringRef getSourceFileName() const;
StringRef getSourceFileName() const { return SourceFileName; }

// Returns a table with all the comdats used by this file.
ArrayRef<StringRef> getComdatTable() const { return Comdats; }
ArrayRef<StringRef> getComdatTable() const { return ComdatTable; }

private:
iterator_range<symbol_iterator> module_symbols(InputModule &IM);
ArrayRef<Symbol> module_symbols(unsigned I) const {
const auto &Indices = ModuleSymIndices[I];
return {Symbols.data() + Indices.first, Symbols.data() + Indices.second};
}
};

/// This class wraps an output stream for a native object. Most clients should
Expand Down Expand Up @@ -452,20 +335,20 @@ class LTO {
// Global mapping from mangled symbol names to resolutions.
StringMap<GlobalResolution> GlobalResolutions;

void addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
const InputFile::Symbol &Sym, SymbolResolution Res,
void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res,
unsigned Partition);

// These functions take a range of symbol resolutions [ResI, ResE) and consume
// the resolutions used by a single input module by incrementing ResI. After
// these functions return, [ResI, ResE) will refer to the resolution range for
// the remaining modules in the InputFile.
Error addModule(InputFile &Input, InputFile::InputModule &IM,
Error addModule(InputFile &Input, unsigned ModI,
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
Error addRegularLTO(BitcodeModule BM,
ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE);
Error addThinLTO(BitcodeModule BM, Module &M,
iterator_range<InputFile::symbol_iterator> Syms,
Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI, const SymbolResolution *ResE);

Error runRegularLTO(AddStreamFn AddStream);
Expand Down
Loading

0 comments on commit 9f482fc

Please sign in to comment.