Skip to content

Commit

Permalink
[WebAssembly][Objcopy] Write output section headers identically to in…
Browse files Browse the repository at this point in the history
…puts

Previously when objcopy generated section headers, it padded the LEB
that encodes the section size out to 5 bytes, matching the behavior of
clang. This is correct, but results in a binary that differs from the
input. This can sometimes have undesirable consequences (e.g. breaking
source maps).

This change makes the object reader remember the size of the LEB
encoding in the section header, so that llvm-objcopy can reproduce it
exactly. For sections not read from an object file (e.g. that
llvm-objcopy is adding itself), pad to 5 bytes.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D155535
  • Loading branch information
dschuff authored and aheejin committed Jul 27, 2023
1 parent edf2e0e commit 1b21067
Show file tree
Hide file tree
Showing 11 changed files with 178 additions and 12 deletions.
10 changes: 6 additions & 4 deletions llvm/include/llvm/Object/Wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,14 @@ class WasmSymbol {
struct WasmSection {
WasmSection() = default;

uint32_t Type = 0; // Section type (See below)
uint32_t Offset = 0; // Offset with in the file
uint32_t Type = 0;
uint32_t Offset = 0; // Offset within the file
StringRef Name; // Section name (User-defined sections only)
uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
ArrayRef<uint8_t> Content; // Section content
std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
ArrayRef<uint8_t> Content;
std::vector<wasm::WasmRelocation> Relocations;
// Length of the LEB encoding of the section header's size field
std::optional<uint8_t> HeaderSecSizeEncodingLen;
};

struct WasmSegment {
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/ObjectYAML/WasmYAML.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ struct Section {

SectionType Type;
std::vector<Relocation> Relocations;
std::optional<uint8_t> HeaderSecSizeEncodingLen;
};

struct CustomSection : Section {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjCopy/wasm/WasmObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct Section {
// For now, each section is only an opaque binary blob with no distinction
// between custom and known sections.
uint8_t SectionType;
std::optional<uint8_t> HeaderSecSizeEncodingLen;
StringRef Name;
ArrayRef<uint8_t> Contents;
};
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/ObjCopy/wasm/WasmReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ Expected<std::unique_ptr<Object>> Reader::create() const {
Obj->Sections.reserve(WasmObj.getNumSections());
for (const SectionRef &Sec : WasmObj.sections()) {
const WasmSection &WS = WasmObj.getWasmSection(Sec);
Obj->Sections.push_back(
{static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
Obj->Sections.push_back({static_cast<uint8_t>(WS.Type),
WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content});
// Give known sections standard names to allow them to be selected. (Custom
// sections already have their names filled in by the parser).
Section &ReaderSec = Obj->Sections.back();
Expand Down
13 changes: 8 additions & 5 deletions llvm/lib/ObjCopy/wasm/WasmWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,19 @@ Writer::SectionHeader Writer::createSectionHeader(const Section &S,
SectionSize = S.Contents.size();
if (HasName)
SectionSize += getULEB128Size(S.Name.size()) + S.Name.size();
// Pad the LEB value out to 5 bytes to make it a predictable size, and
// match the behavior of clang.
encodeULEB128(SectionSize, OS, 5);
// If we read this section from an object file, use its original size for the
// padding of the LEB value to avoid changing the file size. Otherwise, pad
// out to 5 bytes to make it predictable, and match the behavior of clang.
unsigned HeaderSecSizeEncodingLen =
S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5;
encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen);
if (HasName) {
encodeULEB128(S.Name.size(), OS);
OS << S.Name;
}
// Total section size is the content size plus 1 for the section type and
// 5 for the LEB-encoded size.
SectionSize = SectionSize + 1 + 5;
// the LEB-encoded size.
SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen;
return Header;
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Object/WasmObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,11 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
Section.Offset = Ctx.Ptr - Ctx.Start;
Section.Type = readUint8(Ctx);
LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
// When reading the section's size, store the size of the LEB used to encode
// it. This allows objcopy/strip to reproduce the binary identically.
const uint8_t *PreSizePtr = Ctx.Ptr;
uint32_t Size = readVaruint32(Ctx);
Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
if (Size == 0)
return make_error<StringError>("zero length section",
object_error::parse_failed);
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/ObjectYAML/WasmEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,8 +646,18 @@ bool WasmWriter::writeWasm(raw_ostream &OS) {

StringStream.flush();

unsigned HeaderSecSizeEncodingLen =
Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5;
unsigned RequiredLen = getULEB128Size(OutString.size());
// Wasm spec does not allow LEBs larger than 5 bytes
assert(RequiredLen <= 5);
if (HeaderSecSizeEncodingLen < RequiredLen) {
reportError("section header length can't be encoded in a LEB of size " +
Twine(HeaderSecSizeEncodingLen));
return false;
}
// Write the section size followed by the content
encodeULEB128(OutString.size(), OS);
encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen);
OS << OutString;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/WasmYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ void MappingTraits<WasmYAML::Object>::mapping(IO &IO,
static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
IO.mapRequired("Type", Section.Type);
IO.mapOptional("Relocations", Section.Relocations);
IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen);
}

static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
Expand Down
91 changes: 91 additions & 0 deletions llvm/test/ObjectYAML/wasm/section_header_size.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
## Test that obj2yaml output includes the section header size encoding length
## only when the length isn't padded to 5 bytes.
# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s

--- !WASM
FileHeader:
Version: 0x1
Sections:
- Type: TYPE
HeaderSecSizeEncodingLen: 3
Signatures:
- Index: 0
ParamTypes:
- I32
- I32
ReturnTypes:
- I32
- Type: FUNCTION
HeaderSecSizeEncodingLen: 4
FunctionTypes: [ 0 ]
- Type: MEMORY
HeaderSecSizeEncodingLen: 1
Memories:
- Flags: [ HAS_MAX ]
Minimum: 0x100
Maximum: 0x100
- Type: EXPORT
HeaderSecSizeEncodingLen: 5
Exports:
- Name: add
Kind: FUNCTION
Index: 0
- Type: CODE
HeaderSecSizeEncodingLen: 2
Functions:
- Index: 0
Locals: []
Body: 200020016A0B
...
# CHECK: --- !WASM
# CHECK-NEXT: FileHeader:
# CHECK-NEXT: Version: 0x1
# CHECK-NEXT: Sections:
# CHECK-NEXT: - Type: TYPE
# CHECK-NEXT: HeaderSecSizeEncodingLen: 3
# CHECK-NEXT: Signatures:
# CHECK-NEXT: - Index: 0
# CHECK-NEXT: ParamTypes:
# CHECK-NEXT: - I32
# CHECK-NEXT: - I32
# CHECK-NEXT: ReturnTypes:
# CHECK-NEXT: - I32
# CHECK-NEXT: - Type: FUNCTION
# CHECK-NEXT: HeaderSecSizeEncodingLen: 4
# CHECK-NEXT: FunctionTypes: [ 0 ]
# CHECK-NEXT: - Type: MEMORY
# CHECK-NEXT: Memories:
# CHECK-NEXT: - Flags: [ HAS_MAX ]
# CHECK-NEXT: Minimum: 0x100
# CHECK-NEXT: Maximum: 0x100
# CHECK-NEXT: - Type: EXPORT
# CHECK-NEXT: Exports:
# CHECK-NEXT: - Name: add
# CHECK-NEXT: Kind: FUNCTION
# CHECK-NEXT: Index: 0
# CHECK-NEXT: - Type: CODE
# CHECK-NEXT: HeaderSecSizeEncodingLen: 2
# CHECK-NEXT: Functions:
# CHECK-NEXT: - Index: 0
# CHECK-NEXT: Locals: []
# CHECK-NEXT: Body: 200020016A0B

## Test if we correctly error out if the provided section header size is less
## than the size required.
# RUN: not yaml2obj --docnum=2 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID
# INVALID: yaml2obj: error: section header length can't be encoded in a LEB of size 0

--- !WASM
FileHeader:
Version: 0x1
Sections:
- Type: TYPE
HeaderSecSizeEncodingLen: 0
Signatures:
- Index: 0
ParamTypes:
- I32
- I32
ReturnTypes:
- I32
...
41 changes: 41 additions & 0 deletions llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
## Test that objcopy generates section headers that are identical to those from
## the input binary, including the encoded size of the LEB that represents the
## section size.

# RUN: yaml2obj %s -o %t.wasm
# RUN: llvm-objcopy %t.wasm %t.wasm.copy
# RUN: diff %t.wasm %t.wasm.copy

--- !WASM
FileHeader:
Version: 0x1
Sections:
- Type: TYPE
HeaderSecSizeEncodingLen: 3
Signatures:
- Index: 0
ParamTypes:
- I32
- I32
ReturnTypes:
- I32
- Type: FUNCTION
HeaderSecSizeEncodingLen: 4
FunctionTypes: [ 0 ]
- Type: MEMORY
HeaderSecSizeEncodingLen: 1
Memories:
- Flags: [ HAS_MAX ]
Minimum: 0x100
Maximum: 0x100
- Type: EXPORT
HeaderSecSizeEncodingLen: 5
Exports:
- Name: add
Kind: FUNCTION
Index: 0
- Type: CODE
Functions:
- Index: 0
Locals: []
Body: 200020016A0B
12 changes: 12 additions & 0 deletions llvm/tools/obj2yaml/wasm2yaml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "llvm/Object/COFF.h"
#include "llvm/ObjectYAML/WasmYAML.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/YAMLTraits.h"

using namespace llvm;
Expand Down Expand Up @@ -392,6 +393,17 @@ ErrorOr<WasmYAML::Object *> WasmDumper::dump() {
llvm_unreachable("Unknown section type");
break;
}

// Only propagate the section size encoding length if it's not the minimal
// size or 5 (the default "padded" value). This is to avoid having every
// YAML output polluted with this value when we usually don't care about it
// (and avoid rewriting all the test expectations).
if (WasmSec.HeaderSecSizeEncodingLen &&
WasmSec.HeaderSecSizeEncodingLen !=
getULEB128Size(WasmSec.Content.size()) &&
WasmSec.HeaderSecSizeEncodingLen != 5)
S->HeaderSecSizeEncodingLen = WasmSec.HeaderSecSizeEncodingLen;

for (const wasm::WasmRelocation &Reloc : WasmSec.Relocations) {
WasmYAML::Relocation R;
R.Type = Reloc.Type;
Expand Down

0 comments on commit 1b21067

Please sign in to comment.