Skip to content

Commit

Permalink
MC: Support COFF string tables larger than 10MB
Browse files Browse the repository at this point in the history
Offsets past the range of single-slash encoding are encoded as base64,
padded to 6 characters, and prefixed with two slashes. This encoding is
undocumented but used by MSVC.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201940 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
gix committed Feb 22, 2014
1 parent 3c288fc commit fa3089b
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 3 deletions.
28 changes: 27 additions & 1 deletion lib/MC/WinCOFFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,12 +468,35 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
}
}

// Encode a string table entry offset in base 64, padded to 6 chars, and
// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
// Buffer must be at least 8 bytes large. No terminating null appended.
static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
assert(Value > 9999999 && Value <= 0xFFFFFFFFF &&
"Illegal section name encoding for value");

static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";

Buffer[0] = '/';
Buffer[1] = '/';

char* Ptr = Buffer + 7;
for (unsigned i = 0; i < 6; ++i) {
unsigned Rem = Value % 64;
Value /= 64;
*(Ptr--) = Alphabet[Rem];
}
}

/// making a section real involves assigned it a number and putting
/// name into the string table if needed
void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
if (S.Name.size() > COFF::NameSize) {
const unsigned Max6DecimalSize = 999999;
const unsigned Max7DecimalSize = 9999999;
const uint64_t MaxBase64Size = 0xFFFFFFFFF; // 64^6, including 0
uint64_t StringTableEntry = Strings.insert(S.Name.c_str());

if (StringTableEntry <= Max6DecimalSize) {
Expand All @@ -484,8 +507,11 @@ void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
char buffer[9] = { };
std::sprintf(buffer, "/%d", unsigned(StringTableEntry));
std::memcpy(S.Header.Name, buffer, 8);
} else if (StringTableEntry <= MaxBase64Size) {
// Starting with 10,000,000, offsets are encoded as base64.
encodeBase64StringEntry(S.Header.Name, StringTableEntry);
} else {
report_fatal_error("COFF string table is greater than 9,999,999 bytes.");
report_fatal_error("COFF string table is greater than 64 GB.");
}
} else
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
Expand Down
44 changes: 42 additions & 2 deletions lib/Object/COFFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <limits>

using namespace llvm;
using namespace object;
Expand Down Expand Up @@ -52,6 +53,40 @@ static error_code getObject(const T *&Obj, const MemoryBuffer *M,
return object_error::success;
}

// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without
// prefixed slashes.
static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) {
assert(Str.size() <= 6 && "String too long, possible overflow.");
if (Str.size() > 6)
return true;

uint64_t Value = 0;
while (!Str.empty()) {
unsigned CharVal;
if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25
CharVal = Str[0] - 'A';
else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51
CharVal = Str[0] - 'a' + 26;
else if (Str[0] >= '0' && Str[0] <= '9') // 52..61
CharVal = Str[0] - '0' + 52;
else if (Str[0] == '+') // 62
CharVal = Str[0] - '+' + 62;
else if (Str[0] == '/') // 63
CharVal = Str[0] - '/' + 63;
else
return true;

Value = (Value * 64) + CharVal;
Str = Str.substr(1);
}

if (Value > std::numeric_limits<uint32_t>::max())
return true;

Result = static_cast<uint32_t>(Value);
return false;
}

const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Ref) const {
const coff_symbol *Addr = reinterpret_cast<const coff_symbol*>(Ref.p);

Expand Down Expand Up @@ -766,8 +801,13 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec,
// Check for string table entry. First byte is '/'.
if (Name[0] == '/') {
uint32_t Offset;
if (Name.substr(1).getAsInteger(10, Offset))
return object_error::parse_failed;
if (Name[1] == '/') {
if (decodeBase64StringEntry(Name.substr(2), Offset))
return object_error::parse_failed;
} else {
if (Name.substr(1).getAsInteger(10, Offset))
return object_error::parse_failed;
}
if (error_code EC = getString(Offset, Name))
return EC;
}
Expand Down
26 changes: 26 additions & 0 deletions test/MC/COFF/section-name-encoding.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Encodings for different lengths:
// [0, 8]: raw name
// (8, 999999]: base 10 string table index (/9999999)
// (999999, 0xFFFFFFFF]: base 64 string table index (//AAAAAA)
//
// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s

Expand Down Expand Up @@ -60,3 +61,28 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
// CHECK: Name: seven_digit (2F 31 30 30 30 30 32 39)
// CHECK: }
.section seven_digit; .long 1


// Generate padding sections to increase the string table size to at least
// 10,000,000 bytes.
.macro pad_sections_ex pad
// 9x \pad
pad_sections \pad\pad\pad\pad\pad\pad\pad\pad\pad
.endm

// 1000x 'a'
pad_sections_ex aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa


// //AAmJa4 == 1000029 + 12 + (5 * (2 + (9 * 20 * 10 * 1000) + 1)) == 38*64^3 + 9*64^2 + 26*64 + 56
// v | | v ~~~~~~~~~~~~~~~~~~ v
// seven_digit offset v v "p0" pad NUL seperator
// "seven_digit\0" # of pad sections
//
// "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26 56".
//
// CHECK: Section {
// CHECK: Number: 15
// CHECK: Name: double_slash (2F 2F 41 41 6D 4A 61 34)
// CHECK: }
.section double_slash; .long 1

0 comments on commit fa3089b

Please sign in to comment.