Skip to content

Commit

Permalink
ELFObjectWriter: deduplicate suffices in strtab
Browse files Browse the repository at this point in the history
We already do this for shstrtab, so might as well do it for strtab. This
extracts the string table building code into a separate class. The idea
is to use it for other object formats too.

I mostly wanted to do this for the general principle, but it does save a
little bit on object file size. I tried this on a clang bootstrap and
saved 0.54% on the sum of object file sizes (1.14 MB out of 212 MB for
a release build).

Differential Revision: http://reviews.llvm.org/D3533

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207670 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
zmodem committed Apr 30, 2014
1 parent b1c1b8a commit a8febf2
Show file tree
Hide file tree
Showing 22 changed files with 282 additions and 165 deletions.
59 changes: 59 additions & 0 deletions include/llvm/Object/StringTableBuilder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//===-- StringTableBuilder.h - String table building utility ------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_OBJECT_STRINGTABLE_BUILDER_H
#define LLVM_OBJECT_STRINGTABLE_BUILDER_H

#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include <cassert>

namespace llvm {

/// \brief Utility for building string tables with deduplicated suffixes.
class StringTableBuilder {
SmallString<256> StringTable;
StringMap<size_t> StringIndexMap;

public:
/// \brief Add a string to the builder. Returns a StringRef to the internal
/// copy of s. Can only be used before the table is finalized.
StringRef add(StringRef s) {
assert(!isFinalized());
return StringIndexMap.GetOrCreateValue(s, 0).getKey();
}

/// \brief Analyze the strings and build the final table. No more strings can
/// be added after this point.
void finalize();

/// \brief Retrieve the string table data. Can only be used after the table
/// is finalized.
StringRef data() {
assert(isFinalized());
return StringTable;
}

/// \brief Get the offest of a string in the string table. Can only be used
/// after the table is finalized.
size_t getOffset(StringRef s) {
assert(isFinalized());
assert(StringIndexMap.count(s) && "String is not in table!");
return StringIndexMap[s];
}

private:
bool isFinalized() {
return !StringTable.empty();
}
};

} // end llvm namespace

#endif
118 changes: 32 additions & 86 deletions lib/MC/ELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/StringTableBuilder.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
Expand Down Expand Up @@ -132,11 +133,11 @@ class ELFObjectWriter : public MCObjectWriter {
MCSymbolData *SymbolData;
uint64_t StringIndex;
uint32_t SectionIndex;
StringRef Name;

// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
return SymbolData->getSymbol().getName() <
RHS.SymbolData->getSymbol().getName();
return Name < RHS.Name;
}
};

Expand All @@ -149,13 +150,13 @@ class ELFObjectWriter : public MCObjectWriter {

llvm::DenseMap<const MCSectionData *, std::vector<ELFRelocationEntry>>
Relocations;
DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
StringTableBuilder ShStrTabBuilder;

/// @}
/// @name Symbol Table Data
/// @{

SmallString<256> StringTable;
StringTableBuilder StrTabBuilder;
std::vector<uint64_t> FileSymbolData;
std::vector<ELFSymbolData> LocalSymbolData;
std::vector<ELFSymbolData> ExternalSymbolData;
Expand Down Expand Up @@ -676,7 +677,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
SectionIndexMapTy &SectionIndexMap) {
// The string table must be emitted first because we need the index
// into the string table for all the symbol names.
assert(StringTable.size() && "Missing string table");

// FIXME: Make sure the start of the symbol table is aligned.

Expand Down Expand Up @@ -1031,27 +1031,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
MCELF::SetBinding(Data, ELF::STB_GLOBAL);
}

// Index 0 is always the empty string.
StringMap<uint64_t> StringIndexMap;
StringTable += '\x00';

// FIXME: We could optimize suffixes in strtab in the same way we
// optimize them in shstrtab.

for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(),
ie = Asm.file_names_end();
it != ie;
++it) {
StringRef Name = *it;
uint64_t &Entry = StringIndexMap[Name];
if (!Entry) {
Entry = StringTable.size();
StringTable += Name;
StringTable += '\x00';
}
FileSymbolData.push_back(Entry);
}

// Add the data for the symbols.
for (MCSymbolData &SD : Asm.symbols()) {
const MCSymbol &Symbol = SD.getSymbol();
Expand Down Expand Up @@ -1102,22 +1081,15 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
// @@ in defined ones.
StringRef Name = Symbol.getName();
SmallString<32> Buf;

size_t Pos = Name.find("@@@");
if (Pos != StringRef::npos) {
Buf += Name.substr(0, Pos);
unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
Buf += Name.substr(Pos + Skip);
Name = Buf;
}
MSD.Name = StrTabBuilder.add(Name);

uint64_t &Entry = StringIndexMap[Name];
if (!Entry) {
Entry = StringTable.size();
StringTable += Name;
StringTable += '\x00';
}
MSD.StringIndex = Entry;
if (MSD.SectionIndex == ELF::SHN_UNDEF)
UndefinedSymbolData.push_back(MSD);
else if (Local)
Expand All @@ -1126,6 +1098,21 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
ExternalSymbolData.push_back(MSD);
}

for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
StrTabBuilder.add(*i);

StrTabBuilder.finalize();

for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
FileSymbolData.push_back(StrTabBuilder.getOffset(*i));

for (ELFSymbolData& MSD : LocalSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
for (ELFSymbolData& MSD : ExternalSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
for (ELFSymbolData& MSD : UndefinedSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);

// Symbols are required to be in lexicographic order.
array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
Expand Down Expand Up @@ -1436,23 +1423,6 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
}
}

static int compareBySuffix(const MCSectionELF *const *a,
const MCSectionELF *const *b) {
const StringRef &NameA = (*a)->getSectionName();
const StringRef &NameB = (*b)->getSectionName();
const unsigned sizeA = NameA.size();
const unsigned sizeB = NameB.size();
const unsigned len = std::min(sizeA, sizeB);
for (unsigned int i = 0; i < len; ++i) {
char ca = NameA[sizeA - i - 1];
char cb = NameB[sizeB - i - 1];
if (ca != cb)
return cb - ca;
}

return sizeB - sizeA;
}

void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
MCAsmLayout &Layout,
SectionIndexMapTy &SectionIndexMap,
Expand Down Expand Up @@ -1493,45 +1463,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
WriteSymbolTable(F, Asm, Layout, SectionIndexMap);

F = new MCDataFragment(&StrtabSD);
F->getContents().append(StringTable.begin(), StringTable.end());
F->getContents().append(StrTabBuilder.data().begin(),
StrTabBuilder.data().end());

F = new MCDataFragment(&ShstrtabSD);

std::vector<const MCSectionELF*> Sections;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
// Section header string table.
for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(it->getSection());
Sections.push_back(&Section);
}
array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);

// Section header string table.
//
// The first entry of a string table holds a null character so skip
// section 0.
uint64_t Index = 1;
F->getContents().push_back('\x00');

for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
const MCSectionELF &Section = *Sections[I];

StringRef Name = Section.getSectionName();
if (I != 0) {
StringRef PreviousName = Sections[I - 1]->getSectionName();
if (PreviousName.endswith(Name)) {
SectionStringTableIndex[&Section] = Index - Name.size() - 1;
continue;
}
}
// Remember the index into the string table so we can write it
// into the sh_name field of the section header table.
SectionStringTableIndex[&Section] = Index;

Index += Name.size() + 1;
F->getContents().append(Name.begin(), Name.end());
F->getContents().push_back('\x00');
ShStrTabBuilder.add(Section.getSectionName());
}
ShStrTabBuilder.finalize();
F->getContents().append(ShStrTabBuilder.data().begin(),
ShStrTabBuilder.data().end());
}

void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
Expand Down Expand Up @@ -1599,7 +1544,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,

switch(Section.getType()) {
case ELF::SHT_DYNAMIC:
sh_link = SectionStringTableIndex[&Section];
sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
sh_info = 0;
break;

Expand Down Expand Up @@ -1680,7 +1625,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
}
}

WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
Section.getType(),
Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
Alignment, Section.getEntrySize());
}
Expand Down
1 change: 1 addition & 0 deletions lib/Object/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ add_llvm_library(LLVMObject
MachOUniversal.cpp
Object.cpp
ObjectFile.cpp
StringTableBuilder.cpp
SymbolicFile.cpp
YAML.cpp
)
51 changes: 51 additions & 0 deletions lib/Object/StringTableBuilder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//===-- StringTableBuilder.cpp - String table building utility ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/SmallVector.h"
#include "llvm/Object/StringTableBuilder.h"

using namespace llvm;

static bool compareBySuffix(StringRef a, StringRef b) {
size_t sizeA = a.size();
size_t sizeB = b.size();
size_t len = std::min(sizeA, sizeB);
for (size_t i = 0; i < len; ++i) {
char ca = a[sizeA - i - 1];
char cb = b[sizeB - i - 1];
if (ca != cb)
return ca > cb;
}
return sizeA > sizeB;
}

void StringTableBuilder::finalize() {
SmallVector<StringRef, 8> Strings;
for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i)
Strings.push_back(i->getKey());

std::sort(Strings.begin(), Strings.end(), compareBySuffix);

// FIXME: Starting with a null byte is ELF specific. Generalize this so we
// can use the class with other object formats.
StringTable += '\x00';

StringRef Previous;
for (StringRef s : Strings) {
if (Previous.endswith(s)) {
StringIndexMap[s] = StringTable.size() - 1 - s.size();
continue;
}

StringIndexMap[s] = StringTable.size();
StringTable += s;
StringTable += '\x00';
Previous = s;
}
}
2 changes: 1 addition & 1 deletion test/MC/AArch64/tls-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@

// CHECK-ELF: Symbols [
// CHECK-ELF: Symbol {
// CHECK-ELF: Name: var (6)
// CHECK-ELF: Name: var
// CHECK-ELF-NEXT: Value:
// CHECK-ELF-NEXT: Size:
// CHECK-ELF-NEXT: Binding: Global
Expand Down
2 changes: 1 addition & 1 deletion test/MC/ARM64/tls-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@

// CHECK-ELF: Symbols [
// CHECK-ELF: Symbol {
// CHECK-ELF: Name: var (6)
// CHECK-ELF: Name: var
// CHECK-ELF-NEXT: Value:
// CHECK-ELF-NEXT: Size:
// CHECK-ELF-NEXT: Binding: Global
Expand Down
6 changes: 3 additions & 3 deletions test/MC/ELF/comdat.s
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
// Test that g1 and g2 are local, but g3 is an undefined global.

// CHECK: Symbol {
// CHECK: Name: g1 (1)
// CHECK: Name: g1
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local
Expand All @@ -58,7 +58,7 @@
// CHECK-NEXT: Section: .foo (0x7)
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: g2 (4)
// CHECK-NEXT: Name: g2
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local
Expand All @@ -68,7 +68,7 @@
// CHECK-NEXT: }

// CHECK: Symbol {
// CHECK: Name: g3 (7)
// CHECK: Name: g3
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Global
Expand Down
Loading

0 comments on commit a8febf2

Please sign in to comment.