Skip to content

Commit

Permalink
[BOLT] Use rewriter interface for updating binary build ID (llvm#94273)
Browse files Browse the repository at this point in the history
Move functionality for patching build ID into a separate rewriter class
and change the way we do the patching. Support build ID in different
note sections in order to update the build ID in the Linux kernel binary
which puts in into ".notes" section instead of ".note.gnu.build-id".
  • Loading branch information
maksfb authored Jun 4, 2024
1 parent 4973ad4 commit 8ea59ec
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 104 deletions.
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class BinarySection {
return true;
}
}
bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
bool isReordered() const { return IsReordered; }
bool isAnonymous() const { return IsAnonymous; }
bool isRelro() const { return IsRelro; }
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class MetadataManager {
/// Register a new \p Rewriter.
void registerRewriter(std::unique_ptr<MetadataRewriter> Rewriter);

/// Run initializers after sections are discovered.
void runSectionInitializers();

/// Execute initialization of rewriters while functions are disassembled, but
/// CFG is not yet built.
void runInitializersPreCFG();
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class MetadataRewriter {
/// Return name for the rewriter.
StringRef getName() const { return Name; }

/// Run initialization after the binary is read and sections are identified,
/// but before functions are discovered.
virtual Error sectionInitializer() { return Error::success(); }

/// Interface for modifying/annotating functions in the binary based on the
/// contents of the section. Functions are in pre-cfg state.
virtual Error preCFGInitializer() { return Error::success(); }
Expand Down
2 changes: 2 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriters.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class BinaryContext;

std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
Expand Down
23 changes: 3 additions & 20 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,6 @@ class RewriteInstance {
return InputFile->getFileName();
}

/// Set the build-id string if we did not fail to parse the contents of the
/// ELF note section containing build-id information.
void parseBuildID();

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form if they are available, or std::nullopt
/// otherwise.
std::optional<std::string> getPrintableBuildID() const;

/// If this instance uses a profile, return appropriate profile reader.
const ProfileReaderBase *getProfileReader() const {
return ProfileReader.get();
Expand Down Expand Up @@ -184,6 +175,9 @@ class RewriteInstance {
/// Link additional runtime code to support instrumentation.
void linkRuntime();

/// Process metadata in sections before functions are discovered.
void processSectionMetadata();

/// Process metadata in special sections before CFG is built for functions.
void processMetadataPreCFG();

Expand Down Expand Up @@ -368,11 +362,6 @@ class RewriteInstance {
/// Loop over now emitted functions to write translation maps
void encodeBATSection();

/// Update the ELF note section containing the binary build-id to reflect
/// a new build-id, so tools can differentiate between the old and the
/// rewritten binary.
void patchBuildID();

/// Return file offset corresponding to a virtual \p Address.
/// Return 0 if the address has no mapping in the file, including being
/// part of .bss section.
Expand Down Expand Up @@ -562,18 +551,12 @@ class RewriteInstance {
/// Exception handling and stack unwinding information in this binary.
ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address};

/// .note.gnu.build-id section.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};

/// Helper for accessing sections by name.
BinarySection *getSection(const Twine &Name) {
ErrorOr<BinarySection &> ErrOrSection = BC->getUniqueSectionByName(Name);
return ErrOrSection ? &ErrOrSection.get() : nullptr;
}

/// A reference to the build-id bytes in the original binary
StringRef BuildID;

/// Keep track of functions we fail to write in the binary. We need to avoid
/// rewriting CFI info for these functions.
std::vector<uint64_t> FailedAddresses;
Expand Down
113 changes: 113 additions & 0 deletions bolt/lib/Rewrite/BuildIDRewriter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//===- bolt/Rewrite/BuildIDRewriter.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Read and update build ID stored in ELF note section.
//
//===----------------------------------------------------------------------===//

#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "llvm/Support/Errc.h"

using namespace llvm;
using namespace bolt;

namespace {

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form.
std::string getPrintableBuildID(StringRef BuildID) {
std::string Str;
raw_string_ostream OS(Str);
for (const char &Char : BuildID)
OS << format("%.2x", static_cast<unsigned char>(Char));

return OS.str();
}

class BuildIDRewriter final : public MetadataRewriter {

/// Information about binary build ID.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
StringRef BuildID;
std::optional<uint64_t> BuildIDOffset;
std::optional<uint64_t> BuildIDSize;

public:
BuildIDRewriter(StringRef Name, BinaryContext &BC)
: MetadataRewriter(Name, BC) {}

Error sectionInitializer() override;

Error postEmitFinalizer() override;
};

Error BuildIDRewriter::sectionInitializer() {
// Typically, build ID will reside in .note.gnu.build-id section. Howerver,
// a linker script can change the section name and such is the case with
// the Linux kernel. Hence, we iterate over all note sections.
for (BinarySection &NoteSection : BC.sections()) {
if (!NoteSection.isNote())
continue;

StringRef Buf = NoteSection.getContents();
DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
BC.AsmInfo->getCodePointerSize());
DataExtractor::Cursor Cursor(0);
while (Cursor && !DE.eof(Cursor)) {
const uint32_t NameSz = DE.getU32(Cursor);
const uint32_t DescSz = DE.getU32(Cursor);
const uint32_t Type = DE.getU32(Cursor);

StringRef Name =
NameSz ? Buf.slice(Cursor.tell(), Cursor.tell() + NameSz) : "<empty>";
Cursor.seek(alignTo(Cursor.tell() + NameSz, 4));

const uint64_t DescOffset = Cursor.tell();
StringRef Desc =
DescSz ? Buf.slice(DescOffset, DescOffset + DescSz) : "<empty>";
Cursor.seek(alignTo(DescOffset + DescSz, 4));

if (!Cursor)
return createStringError(errc::executable_format_error,
"out of bounds while reading note section: %s",
toString(Cursor.takeError()).c_str());

if (Type == ELF::NT_GNU_BUILD_ID && Name.substr(0, 3) == "GNU" &&
DescSz) {
BuildIDSection = NoteSection;
BuildID = Desc;
BC.setFileBuildID(getPrintableBuildID(Desc));
BuildIDOffset = DescOffset;
BuildIDSize = DescSz;

return Error::success();
}
}
}

return Error::success();
}

Error BuildIDRewriter::postEmitFinalizer() {
if (!BuildIDSection || !BuildIDOffset)
return Error::success();

const uint8_t LastByte = BuildID[BuildID.size() - 1];
SmallVector<char, 1> Patch = {static_cast<char>(LastByte ^ 1)};
BuildIDSection->addPatch(*BuildIDOffset + BuildID.size() - 1, Patch);
BC.outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";

return Error::success();
}
} // namespace

std::unique_ptr<MetadataRewriter>
llvm::bolt::createBuildIDRewriter(BinaryContext &BC) {
return std::make_unique<BuildIDRewriter>("build-id-rewriter", BC);
}
1 change: 1 addition & 0 deletions bolt/lib/Rewrite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
SDTRewriter.cpp
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Rewrite/MetadataManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ void MetadataManager::registerRewriter(
Rewriters.emplace_back(std::move(Rewriter));
}

void MetadataManager::runSectionInitializers() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
<< " after reading sections\n");
if (Error E = Rewriter->sectionInitializer()) {
errs() << "BOLT-ERROR: while running " << Rewriter->getName()
<< " after reading sections: " << toString(std::move(E)) << '\n';
exit(1);
}
}
}

void MetadataManager::runInitializersPreCFG() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
Expand Down
92 changes: 8 additions & 84 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,82 +643,6 @@ Error RewriteInstance::discoverStorage() {
return Error::success();
}

void RewriteInstance::parseBuildID() {
if (!BuildIDSection)
return;

StringRef Buf = BuildIDSection->getContents();

// Reading notes section (see Portable Formats Specification, Version 1.1,
// pg 2-5, section "Note Section").
DataExtractor DE =
DataExtractor(Buf,
/*IsLittleEndian=*/true, InputFile->getBytesInAddress());
uint64_t Offset = 0;
if (!DE.isValidOffset(Offset))
return;
uint32_t NameSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t DescSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t Type = DE.getU32(&Offset);

LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
<< "; Type = " << Type << "\n");

// Type 3 is a GNU build-id note section
if (Type != 3)
return;

StringRef Name = Buf.slice(Offset, Offset + NameSz);
Offset = alignTo(Offset + NameSz, 4);
if (Name.substr(0, 3) != "GNU")
return;

BuildID = Buf.slice(Offset, Offset + DescSz);
}

std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
if (BuildID.empty())
return std::nullopt;

std::string Str;
raw_string_ostream OS(Str);
const unsigned char *CharIter = BuildID.bytes_begin();
while (CharIter != BuildID.bytes_end()) {
if (*CharIter < 0x10)
OS << "0";
OS << Twine::utohexstr(*CharIter);
++CharIter;
}
return OS.str();
}

void RewriteInstance::patchBuildID() {
raw_fd_ostream &OS = Out->os();

if (BuildID.empty())
return;

size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
assert(IDOffset != StringRef::npos && "failed to patch build-id");

uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
if (!FileOffset) {
BC->errs()
<< "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
return;
}

char LastIDByte = BuildID[BuildID.size() - 1];
LastIDByte ^= 1;
OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);

BC->outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
}

Error RewriteInstance::run() {
assert(BC && "failed to create a binary context");

Expand Down Expand Up @@ -1977,7 +1901,6 @@ Error RewriteInstance::readSpecialSections() {
".rela" + std::string(BC->getMainCodeSectionName()));
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");

if (ErrorOr<BinarySection &> BATSec =
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
Expand Down Expand Up @@ -2035,10 +1958,7 @@ Error RewriteInstance::readSpecialSections() {
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));

// Parse build-id
parseBuildID();
if (std::optional<std::string> FileBuildID = getPrintableBuildID())
BC->setFileBuildID(*FileBuildID);
processSectionMetadata();

// Read .dynamic/PT_DYNAMIC.
return readELFDynamic();
Expand Down Expand Up @@ -3218,14 +3138,20 @@ void RewriteInstance::initializeMetadataManager() {
if (BC->IsLinuxKernel)
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));

MetadataManager.registerRewriter(createBuildIDRewriter(*BC));

MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));

MetadataManager.registerRewriter(createSDTRewriter(*BC));
}

void RewriteInstance::processMetadataPreCFG() {
void RewriteInstance::processSectionMetadata() {
initializeMetadataManager();

MetadataManager.runSectionInitializers();
}

void RewriteInstance::processMetadataPreCFG() {
MetadataManager.runInitializersPreCFG();

processProfileDataPreCFG();
Expand Down Expand Up @@ -5772,8 +5698,6 @@ void RewriteInstance::rewriteFile() {
// Update symbol tables.
patchELFSymTabs();

patchBuildID();

if (opts::EnableBAT)
encodeBATSection();

Expand Down

0 comments on commit 8ea59ec

Please sign in to comment.