Skip to content

Commit

Permalink
Refactor data-in-code annotations.
Browse files Browse the repository at this point in the history
Use a dedicated MachO load command to annotate data-in-code regions.
This is the same format the linker produces for final executable images,
allowing consistency of representation and use of introspection tools
for both object and executable files.

Data-in-code regions are annotated via ".data_region"/".end_data_region"
directive pairs, with an optional region type.

data_region_directive := ".data_region" { region_type }
region_type := "jt8" | "jt16" | "jt32" | "jta32"
end_data_region_directive := ".end_data_region"

The previous handling of ARM-style "$d.*" labels was broken and has
been removed. Specifically, it didn't handle ARM vs. Thumb mode when
marking the end of the section.

rdar://11459456

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157062 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Jim Grosbach committed May 18, 2012
1 parent 18e2f6e commit 3e96531
Show file tree
Hide file tree
Showing 28 changed files with 411 additions and 215 deletions.
28 changes: 8 additions & 20 deletions include/llvm/MC/MCAsmInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ namespace llvm {
// FIXME: Make this a more general encoding setting?
bool AllowUTF8;

/// UseDataRegionDirectives - This is true if data region markers should
/// be printed as ".data_region/.end_data_region" directives. If false,
/// use "$d/$a" labels instead.
bool UseDataRegionDirectives;

//===--- Data Emission Directives -------------------------------------===//

/// ZeroDirective - this should be set to the directive used to get some
Expand All @@ -172,18 +177,6 @@ namespace llvm {
const char *Data32bitsDirective; // Defaults to "\t.long\t"
const char *Data64bitsDirective; // Defaults to "\t.quad\t"

/// [Data|Code]Begin - These magic labels are used to marked a region as
/// data or code, and are used to provide additional information for
/// correct disassembly on targets that like to mix data and code within
/// a segment. These labels will be implicitly suffixed by the streamer
/// to give them unique names.
const char *DataBegin; // Defaults to "$d."
const char *CodeBegin; // Defaults to "$a."
const char *JT8Begin; // Defaults to "$a."
const char *JT16Begin; // Defaults to "$a."
const char *JT32Begin; // Defaults to "$a."
bool SupportsDataRegions;

/// GPRel64Directive - if non-null, a directive that is used to emit a word
/// which should be relocated as a 64-bit GP-relative offset, e.g. .gpdword
/// on Mips.
Expand Down Expand Up @@ -384,14 +377,6 @@ namespace llvm {
const char *getGPRel64Directive() const { return GPRel64Directive; }
const char *getGPRel32Directive() const { return GPRel32Directive; }

/// [Code|Data]Begin label name accessors.
const char *getCodeBeginLabelName() const { return CodeBegin; }
const char *getDataBeginLabelName() const { return DataBegin; }
const char *getJumpTable8BeginLabelName() const { return JT8Begin; }
const char *getJumpTable16BeginLabelName() const { return JT16Begin; }
const char *getJumpTable32BeginLabelName() const { return JT32Begin; }
bool getSupportsDataRegions() const { return SupportsDataRegions; }

/// getNonexecutableStackSection - Targets can implement this method to
/// specify a section to switch to if the translation unit doesn't have any
/// trampolines that require an executable stack.
Expand Down Expand Up @@ -488,6 +473,9 @@ namespace llvm {
bool doesAllowUTF8() const {
return AllowUTF8;
}
bool doesSupportDataRegionDirectives() const {
return UseDataRegionDirectives;
}
const char *getZeroDirective() const {
return ZeroDirective;
}
Expand Down
42 changes: 42 additions & 0 deletions include/llvm/MC/MCAssembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,16 @@ struct IndirectSymbolData {
MCSectionData *SectionData;
};

// FIXME: Ditto this. Purely so the Streamer and the ObjectWriter can talk
// to one another.
struct DataRegionData {
// This enum should be kept in sync w/ the mach-o definition in
// llvm/Object/MachOFormat.h.
enum KindTy { Data = 1, JumpTable8, JumpTable16, JumpTable32 } Kind;
MCSymbol *Start;
MCSymbol *End;
};

class MCAssembler {
friend class MCAsmLayout;

Expand All @@ -668,6 +678,10 @@ class MCAssembler {
const_indirect_symbol_iterator;
typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;

typedef std::vector<DataRegionData>::const_iterator
const_data_region_iterator;
typedef std::vector<DataRegionData>::iterator data_region_iterator;

private:
MCAssembler(const MCAssembler&); // DO NOT IMPLEMENT
void operator=(const MCAssembler&); // DO NOT IMPLEMENT
Expand Down Expand Up @@ -698,6 +712,7 @@ class MCAssembler {

std::vector<IndirectSymbolData> IndirectSymbols;

std::vector<DataRegionData> DataRegions;
/// The set of function symbols for which a .thumb_func directive has
/// been seen.
//
Expand Down Expand Up @@ -883,6 +898,33 @@ class MCAssembler {

size_t indirect_symbol_size() const { return IndirectSymbols.size(); }

/// @}
/// @name Data Region List Access
/// @{

// FIXME: This is a total hack, this should not be here. Once things are
// factored so that the streamer has direct access to the .o writer, it can
// disappear.
std::vector<DataRegionData> &getDataRegions() {
return DataRegions;
}

data_region_iterator data_region_begin() {
return DataRegions.begin();
}
const_data_region_iterator data_region_begin() const {
return DataRegions.begin();
}

data_region_iterator data_region_end() {
return DataRegions.end();
}
const_data_region_iterator data_region_end() const {
return DataRegions.end();
}

size_t data_region_size() const { return DataRegions.size(); }

/// @}
/// @name Backend Data Access
/// @{
Expand Down
4 changes: 4 additions & 0 deletions include/llvm/MC/MCContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ namespace llvm {
/// with a unique but unspecified name.
MCSymbol *CreateTempSymbol();

/// getUniqueSymbolID() - Return a unique identifier for use in constructing
/// symbol names.
unsigned getUniqueSymbolID() { return NextUniqueID++; }

/// CreateDirectionalLocalSymbol - Create the definition of a directional
/// local symbol for numbered label (used for "1:" definitions).
MCSymbol *CreateDirectionalLocalSymbol(int64_t LocalLabelVal);
Expand Down
8 changes: 8 additions & 0 deletions include/llvm/MC/MCDirectives.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ enum MCAssemblerFlag {
MCAF_Code64 ///< .code64 (X86)
};

enum MCDataRegionType {
MCDR_DataRegion, ///< .data_region
MCDR_DataRegionJT8, ///< .data_region jt8
MCDR_DataRegionJT16, ///< .data_region jt16
MCDR_DataRegionJT32, ///< .data_region jt32
MCDR_DataRegionEnd ///< .end_data_region
};

} // end namespace llvm

#endif
3 changes: 3 additions & 0 deletions include/llvm/MC/MCMachObjectWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ class MachObjectWriter : public MCObjectWriter {

void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);

void WriteLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
uint32_t DataSize);

// FIXME: We really need to improve the relocation validation. Basically, we
// want to implement a separate computation which evaluates the relocation
// entry as the linker would, and verifies that the resultant fixup value is
Expand Down
55 changes: 4 additions & 51 deletions include/llvm/MC/MCStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,7 @@ namespace llvm {
SmallVector<std::pair<const MCSection *,
const MCSection *>, 4> SectionStack;

unsigned UniqueCodeBeginSuffix;
unsigned UniqueDataBeginSuffix;

protected:
/// Indicator of whether the previous data-or-code indicator was for
/// code or not. Used to determine when we need to emit a new indicator.
enum DataType {
Data,
Code,
JumpTable8,
JumpTable16,
JumpTable32
};
DataType RegionIndicator;


MCStreamer(MCContext &Ctx);

const MCExpr *BuildSymbolDiff(MCContext &Context, const MCSymbol *A,
Expand Down Expand Up @@ -241,47 +226,15 @@ namespace llvm {
/// used in an assignment.
virtual void EmitLabel(MCSymbol *Symbol);

/// EmitDataRegion - Emit a label that marks the beginning of a data
/// region.
/// On ELF targets, this corresponds to an assembler statement such as:
/// $d.1:
virtual void EmitDataRegion();

/// EmitJumpTable8Region - Emit a label that marks the beginning of a
/// jump table composed of 8-bit offsets.
/// On ELF targets, this corresponds to an assembler statement such as:
/// $d.1:
virtual void EmitJumpTable8Region();

/// EmitJumpTable16Region - Emit a label that marks the beginning of a
/// jump table composed of 16-bit offsets.
/// On ELF targets, this corresponds to an assembler statement such as:
/// $d.1:
virtual void EmitJumpTable16Region();

/// EmitJumpTable32Region - Emit a label that marks the beginning of a
/// jump table composed of 32-bit offsets.
/// On ELF targets, this corresponds to an assembler statement such as:
/// $d.1:
virtual void EmitJumpTable32Region();

/// EmitCodeRegion - Emit a label that marks the beginning of a code
/// region.
/// On ELF targets, this corresponds to an assembler statement such as:
/// $a.1:
virtual void EmitCodeRegion();

/// ForceCodeRegion - Forcibly sets the current region mode to code. Used
/// at function entry points.
void ForceCodeRegion() { RegionIndicator = Code; }


virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol);

/// EmitAssemblerFlag - Note in the output the specified @p Flag
/// EmitAssemblerFlag - Note in the output the specified @p Flag.
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0;

/// EmitDataRegion - Note in the output the specified region @p Kind.
virtual void EmitDataRegion(MCDataRegionType Kind) {}

/// EmitThumbFunc - Note in the output that the specified @p Func is
/// a Thumb mode function (ARM target only).
virtual void EmitThumbFunc(MCSymbol *Func) = 0;
Expand Down
18 changes: 16 additions & 2 deletions include/llvm/Object/MachOFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ namespace macho {
DysymtabLoadCommandSize = 80,
Nlist32Size = 12,
Nlist64Size = 16,
RelocationInfoSize = 8
RelocationInfoSize = 8,
LinkeditLoadCommandSize = 16
};

/// \brief Constants for header magic field.
Expand Down Expand Up @@ -140,7 +141,8 @@ namespace macho {
LCT_UUID = 0x1b,
LCT_CodeSignature = 0x1d,
LCT_SegmentSplitInfo = 0x1e,
LCT_FunctionStarts = 0x26
LCT_FunctionStarts = 0x26,
LCT_DataInCode = 0x29
};

/// \brief Load command structure.
Expand Down Expand Up @@ -279,6 +281,18 @@ namespace macho {
uint64_t Value;
};

/// @}
/// @name Data-in-code Table Entry
/// @{

// See <mach-o/loader.h>.
enum DataRegionType { Data = 1, JumpTable8, JumpTable16, JumpTable32 };
struct DataInCodeTableEntry {
uint32_t Offset; /* from mach_header to start of data region */
uint16_t Length; /* number of bytes in data region */
uint16_t Kind; /* a DataRegionType value */
};

/// @}
/// @name Indirect Symbol Table
/// @{
Expand Down
3 changes: 3 additions & 0 deletions include/llvm/Object/MachOObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ class MachOObject {
void ReadSymbol64TableEntry(
uint64_t SymbolTableOffset, unsigned Index,
InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
void ReadDataInCodeTableEntry(
uint64_t TableOffset, unsigned Index,
InMemoryStruct<macho::DataInCodeTableEntry> &Res) const;
void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;

/// @}
Expand Down
13 changes: 1 addition & 12 deletions lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,10 +475,8 @@ void AsmPrinter::EmitFunctionHeader() {
void AsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (CurrentFnSym->isUndefined()) {
OutStreamer.ForceCodeRegion();
if (CurrentFnSym->isUndefined())
return OutStreamer.EmitLabel(CurrentFnSym);
}

report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
Expand Down Expand Up @@ -1085,15 +1083,6 @@ void AsmPrinter::EmitJumpTableInfo() {

EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));

// If we know the form of the jump table, go ahead and tag it as such.
if (!JTInDiffSection) {
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
OutStreamer.EmitJumpTable32Region();
} else {
OutStreamer.EmitDataRegion();
}
}

for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;

Expand Down
7 changes: 1 addition & 6 deletions lib/MC/MCAsmInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,14 @@ MCAsmInfo::MCAsmInfo() {
AllowNameToStartWithDigit = false;
AllowPeriodsInName = true;
AllowUTF8 = true;
UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t";
AscizDirective = "\t.asciz\t";
Data8bitsDirective = "\t.byte\t";
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
Data64bitsDirective = "\t.quad\t";
DataBegin = "$d.";
CodeBegin = "$a.";
JT8Begin = "$d.";
JT16Begin = "$d.";
JT32Begin = "$d.";
SupportsDataRegions = false;
SunStyleELFSectionSwitchSyntax = false;
UsesELFSectionDirectiveForBSS = false;
AlignDirective = "\t.align\t";
Expand Down
2 changes: 0 additions & 2 deletions lib/MC/MCAsmInfoCOFF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
SupportsDebugInformation = true;
DwarfSectionOffsetDirective = "\t.secrel32\t";
HasMicrosoftFastStdCallMangling = true;

SupportsDataRegions = false;
}

void MCAsmInfoMicrosoft::anchor() { }
Expand Down
7 changes: 0 additions & 7 deletions lib/MC/MCAsmInfoDarwin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasMachoTBSSDirective = true; // Uses .tbss
HasStaticCtorDtorReferenceInStaticMode = true;

CodeBegin = "L$start$code$";
DataBegin = "L$start$data$";
JT8Begin = "L$start$jt8$";
JT16Begin = "L$start$jt16$";
JT32Begin = "L$start$jt32$";
SupportsDataRegions = true;

// FIXME: Darwin 10 and newer don't need this.
LinkerRequiresNonEmptyDwarfLines = true;

Expand Down
16 changes: 16 additions & 0 deletions lib/MC/MCAsmStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class MCAsmStreamer : public MCStreamer {
virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
virtual void EmitDataRegion(MCDataRegionType Kind);
virtual void EmitThumbFunc(MCSymbol *Func);

virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
Expand Down Expand Up @@ -352,6 +353,21 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
EmitEOL();
}

void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
MCContext &Ctx = getContext();
const MCAsmInfo &MAI = Ctx.getAsmInfo();
if (!MAI.doesSupportDataRegionDirectives())
return;
switch (Kind) {
case MCDR_DataRegion: OS << "\t.data_region"; break;
case MCDR_DataRegionJT8: OS << "\t.data_region jt8"; break;
case MCDR_DataRegionJT16: OS << "\t.data_region jt16"; break;
case MCDR_DataRegionJT32: OS << "\t.data_region jt32"; break;
case MCDR_DataRegionEnd: OS << "\t.end_data_region"; break;
}
EmitEOL();
}

void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
// This needs to emit to a temporary string to get properly quoted
// MCSymbols when they have spaces in them.
Expand Down
2 changes: 2 additions & 0 deletions lib/MC/MCELFStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

#include "MCELF.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
Expand Down
Loading

0 comments on commit 3e96531

Please sign in to comment.