Skip to content

Commit

Permalink
[lld][WebAssembly] Match the ELF linker in transitioning away from ar…
Browse files Browse the repository at this point in the history
…chive indexes. (llvm#78658)

The ELF linker transitioned away from archive indexes in
https://reviews.llvm.org/D117284.

This paves the way for supporting `--start-lib`/`--end-lib` (See llvm#77960)

The ELF linker unified library handling with `--start-lib`/`--end-lib` and removed
the ArchiveFile class in https://reviews.llvm.org/D119074.
  • Loading branch information
sbc100 authored Jan 20, 2024
1 parent c71a5bf commit bcc9b9d
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 142 deletions.
4 changes: 4 additions & 0 deletions lld/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,9 @@ MachO Improvements
WebAssembly Improvements
------------------------

* Indexes are no longer required on archive files. Instead symbol information
is read from object files within the archive. This matches the behaviour of
the ELF linker.

Fixes
#####
14 changes: 0 additions & 14 deletions lld/test/wasm/archive-no-index.s

This file was deleted.

2 changes: 1 addition & 1 deletion lld/test/wasm/bad-archive-member.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s
# RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o
# RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o)
# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode

.globl _start
_start:
29 changes: 14 additions & 15 deletions lld/wasm/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) {
if (fs::exists(importFile))
readImportFile(importFile.str());

auto members = getArchiveMembers(mbref);

// Handle -whole-archive.
if (inWholeArchive) {
for (const auto &[m, offset] : getArchiveMembers(mbref)) {
for (const auto &[m, offset] : members) {
auto *object = createObjectFile(m, path, offset);
// Mark object as live; object members are normally not
// live by default but -whole-archive is designed to treat
Expand All @@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) {
std::unique_ptr<Archive> file =
CHECK(Archive::create(mbref), path + ": failed to parse archive");

if (!file->isEmpty() && !file->hasSymbolTable()) {
error(mbref.getBufferIdentifier() +
": archive has no index; run ranlib to add one");
for (const auto &[m, offset] : members) {
auto magic = identify_magic(m.getBuffer());
if (magic == file_magic::wasm_object || magic == file_magic::bitcode)
files.push_back(createObjectFile(m, path, offset, true));
else
warn(path + ": archive member '" + m.getBufferIdentifier() +
"' is neither Wasm object file nor LLVM bitcode");
}

files.push_back(make<ArchiveFile>(mbref));
return;
}
case file_magic::bitcode:
Expand Down Expand Up @@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) {

static void handleLibcall(StringRef name) {
Symbol *sym = symtab->find(name);
if (!sym)
return;

if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
MemoryBufferRef mb = lazySym->getMemberBuffer();
if (isBitcode(mb)) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
lazySym->extract();
}
if (sym && sym->isLazy() && isa<BitcodeFile>(sym->getFile())) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
cast<LazySymbol>(sym)->extract();
}
}

Expand Down
101 changes: 49 additions & 52 deletions lld/wasm/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,26 +75,19 @@ std::optional<MemoryBufferRef> readFile(StringRef path) {
}

InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive) {
uint64_t offsetInArchive, bool lazy) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::wasm_object) {
std::unique_ptr<Binary> bin =
CHECK(createBinary(mb), mb.getBufferIdentifier());
auto *obj = cast<WasmObjectFile>(bin.get());
if (obj->isSharedObject())
return make<SharedFile>(mb);
return make<ObjFile>(mb, archiveName);
return make<ObjFile>(mb, archiveName, lazy);
}

if (magic == file_magic::bitcode)
return make<BitcodeFile>(mb, archiveName, offsetInArchive);

std::string name = mb.getBufferIdentifier().str();
if (!archiveName.empty()) {
name = archiveName.str() + "(" + name + ")";
}

fatal("unknown file type: " + name);
assert(magic == file_magic::bitcode);
return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
}

// Relocations contain either symbol or type indices. This function takes a
Expand Down Expand Up @@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) {
return true;
}

void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
void ObjFile::parseLazy() {
LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n");
for (const SymbolRef &sym : wasmObj->symbols()) {
const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
if (!wasmSym.isDefined())
continue;
symtab->addLazy(wasmSym.Info.Name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}

ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
: InputFile(ObjectKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);

// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();

std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));

auto *obj = dyn_cast<WasmObjectFile>(bin.get());
Expand All @@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) {
wasmObj.reset(obj);

checkArch(obj->getArch());
}

void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");

// Build up a map of function indices to table indices for use when
// verifying the existing table index relocations
Expand Down Expand Up @@ -717,43 +736,6 @@ void StubFile::parse() {
}
}

void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
file = CHECK(Archive::create(mb), toString(this));

// Read the symbol table to construct Lazy symbols.
int count = 0;
for (const Archive::Symbol &sym : file->symbols()) {
symtab->addLazy(this, &sym);
++count;
}
LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
(void) count;
}

void ArchiveFile::addMember(const Archive::Symbol *sym) {
const Archive::Child &c =
CHECK(sym->getMember(),
"could not get the member for symbol " + sym->getName());

// Don't try to load the same member twice (this can happen when members
// mutually reference each other).
if (!seen.insert(c.getChildOffset()).second)
return;

LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");

MemoryBufferRef mb =
CHECK(c.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
sym->getName());

InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset());
symtab->addFile(obj, sym->getName());
}

static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
switch (gvVisibility) {
case GlobalValue::DefaultVisibility:
Expand Down Expand Up @@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
}

BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive)
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);

std::string path = mb.getBufferIdentifier().str();
Expand All @@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,

bool BitcodeFile::doneLTO = false;

void BitcodeFile::parseLazy() {
for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
if (irSym.isUndefined())
continue;
StringRef name = saver().save(irSym.getName());
symtab->addLazy(name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}

void BitcodeFile::parse(StringRef symName) {
if (doneLTO) {
error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");
Expand Down
41 changes: 10 additions & 31 deletions lld/wasm/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TargetParser/Triple.h"
Expand Down Expand Up @@ -45,7 +44,6 @@ class InputFile {
enum Kind {
ObjectKind,
SharedKind,
ArchiveKind,
BitcodeKind,
StubKind,
};
Expand All @@ -69,6 +67,11 @@ class InputFile {
void markLive() { live = true; }
bool isLive() const { return live; }

// True if this file is exists as in an archive file and has not yet been
// extracted.
// TODO(sbc): Use this to implement --start-lib/--end-lib.
bool lazy = false;

protected:
InputFile(Kind k, MemoryBufferRef m)
: mb(m), fileKind(k), live(!config->gcSections) {}
Expand All @@ -85,35 +88,14 @@ class InputFile {
bool live;
};

// .a file (ar archive)
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }

void addMember(const llvm::object::Archive::Symbol *sym);

void parse();

private:
std::unique_ptr<llvm::object::Archive> file;
llvm::DenseSet<uint64_t> seen;
};

// .o file (wasm object file)
class ObjFile : public InputFile {
public:
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(ObjectKind, m) {
this->archiveName = std::string(archiveName);

// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
}
ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false);
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }

void parse(bool ignoreComdats = false);
void parseLazy();

// Returns the underlying wasm file.
const WasmObjectFile *getWasmObj() const { return wasmObj.get(); }
Expand Down Expand Up @@ -173,10 +155,11 @@ class SharedFile : public InputFile {
class BitcodeFile : public InputFile {
public:
BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive);
uint64_t offsetInArchive, bool lazy);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }

void parse(StringRef symName);
void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;

// Set to true once LTO is complete in order prevent further bitcode objects
Expand All @@ -196,14 +179,10 @@ class StubFile : public InputFile {
llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
};

inline bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}

// Will report a fatal() error if the input buffer is not a valid bitcode
// or wasm object file.
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
uint64_t offsetInArchive = 0);
uint64_t offsetInArchive = 0, bool lazy = false);

// Opens a given file.
std::optional<MemoryBufferRef> readFile(StringRef path);
Expand Down
23 changes: 13 additions & 10 deletions lld/wasm/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ SymbolTable *symtab;
void SymbolTable::addFile(InputFile *file, StringRef symName) {
log("Processing: " + toString(file));

// .a file
if (auto *f = dyn_cast<ArchiveFile>(file)) {
f->parse();
// Lazy object file
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file)) {
f->parseLazy();
} else {
cast<ObjFile>(file)->parseLazy();
}
return;
}

Expand Down Expand Up @@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
return nullptr;
}

void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
StringRef name = sym->getName();
void SymbolTable::addLazy(StringRef name, InputFile *file) {
LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");

Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insertName(name);

if (wasInserted) {
replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
replaceSymbol<LazySymbol>(s, name, 0, file);
return;
}

Expand All @@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
if (auto *f = dyn_cast<UndefinedFunction>(s))
oldSig = f->signature;
LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
file, *sym);
auto newSym =
replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
newSym->signature = oldSig;
return;
}

LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
const InputFile *oldFile = s->getFile();
file->addMember(sym);
replaceSymbol<LazySymbol>(s, name, 0, file)->extract();
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
}
Expand Down
Loading

0 comments on commit bcc9b9d

Please sign in to comment.