Skip to content

Commit

Permalink
[XRay] ARM 32-bit no-Thumb support in LLVM
Browse files Browse the repository at this point in the history
This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter.
This is one of 3 commits to different repositories of XRay ARM port. The other 2 are:

1. https://reviews.llvm.org/D23932 (Clang test)
2. https://reviews.llvm.org/D23933 (compiler-rt)

Differential Revision: https://reviews.llvm.org/D23931

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280888 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
deanberris committed Sep 8, 2016
1 parent 1d54212 commit 339ade7
Show file tree
Hide file tree
Showing 17 changed files with 317 additions and 62 deletions.
28 changes: 28 additions & 0 deletions include/llvm/CodeGen/AsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,34 @@ class AsmPrinter : public MachineFunctionPass {

MCSymbol *getSymbol(const GlobalValue *GV) const;

//===------------------------------------------------------------------===//
// XRay instrumentation implementation.
//===------------------------------------------------------------------===//
public:
// This describes the kind of sled we're storing in the XRay table.
enum class SledKind : uint8_t {
FUNCTION_ENTER = 0,
FUNCTION_EXIT = 1,
TAIL_CALL = 2,
};

// The table will contain these structs that point to the sled, the function
// containing the sled, and what kind of sled (and whether they should always
// be instrumented).
struct XRayFunctionEntry {
const MCSymbol *Sled;
const MCSymbol *Function;
SledKind Kind;
bool AlwaysInstrument;
const class Function *Fn;
};

// All the sleds to be emitted.
std::vector<XRayFunctionEntry> Sleds;

// Helper function to record a given XRay sled.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);

//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
//===------------------------------------------------------------------===//
Expand Down
10 changes: 9 additions & 1 deletion include/llvm/Target/Target.td
Original file line number Diff line number Diff line change
Expand Up @@ -956,11 +956,19 @@ def PATCHABLE_FUNCTION_ENTER : Instruction {
def PATCHABLE_RET : Instruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);
let AsmString = "# XRay Function Exit.";
let AsmString = "# XRay Function Patchable RET.";
let usesCustomInserter = 1;
let hasSideEffects = 1;
let isReturn = 1;
}
def PATCHABLE_FUNCTION_EXIT : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins);
let AsmString = "# XRay Function Exit.";
let usesCustomInserter = 1;
let hasSideEffects = 0; // FIXME: is this correct?
let isReturn = 0; // Original return instruction will follow
}
def PATCHABLE_TAIL_CALL : Instruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);
Expand Down
17 changes: 17 additions & 0 deletions include/llvm/Target/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,25 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
/// Wraps a return instruction and its operands to enable adding nop sleds
/// either before or after the return. The nop sleds are useful for inserting
/// instrumentation instructions at runtime.
/// The patch here replaces the return instruction.
HANDLE_TARGET_OPCODE(PATCHABLE_RET)

/// This is a marker instruction which gets translated into a nop sled, useful
/// for inserting instrumentation instructions at runtime.
/// The patch here prepends the return instruction.
/// The same thing as in x86_64 is not possible for ARM because it has multiple
/// return instructions. Furthermore, CPU allows parametrized and even
/// conditional return instructions. In the current ARM implementation we are
/// making use of the fact that currently LLVM doesn't seem to generate
/// conditional return instructions.
/// On ARM, the same instruction can be used for popping multiple registers
/// from the stack and returning (it just pops pc register too), and LLVM
/// generates it sometimes. So we can't insert the sled between this stack
/// adjustment and the return without splitting the original instruction into 2
/// instructions. So on ARM, rather than jumping into the exit trampoline, we
/// call it, it does the tracing, preserves the stack and returns.
HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)

/// Wraps a tail call instruction and its operands to enable adding nop sleds
/// either before or after the tail exit. We use this as a disambiguation from
/// PATCHABLE_RET which specifically only works for return instructions.
Expand Down
2 changes: 2 additions & 0 deletions include/llvm/Target/TargetSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class TargetSubtargetInfo : public MCSubtargetInfo {

virtual ~TargetSubtargetInfo();

virtual bool isXRaySupported() const { return false; }

// Interfaces to the major aspects of target machine information:
//
// -- Instruction opcode and operand information
Expand Down
10 changes: 10 additions & 0 deletions lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2606,3 +2606,13 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
AsmPrinterHandler::~AsmPrinterHandler() {}

void AsmPrinterHandler::markFunctionEnd() {}

void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
Sleds.emplace_back(
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
}
110 changes: 82 additions & 28 deletions lib/CodeGen/XRayInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,74 @@ struct XRayInstrumentation : public MachineFunctionPass {
}

bool runOnMachineFunction(MachineFunction &MF) override;

private:
// Replace the original RET instruction with the exit sled code ("patchable
// ret" pseudo-instruction), so that at runtime XRay can replace the sled
// with a code jumping to XRay trampoline, which calls the tracing handler
// and, in the end, issues the RET instruction.
// This is the approach to go on CPUs which have a single RET instruction,
// like x86/x86_64.
void replaceRetWithPatchableRet(MachineFunction &MF,
const TargetInstrInfo *TII);
// Prepend the original return instruction with the exit sled code ("patchable
// function exit" pseudo-instruction), preserving the original return
// instruction just after the exit sled code.
// This is the approach to go on CPUs which have multiple options for the
// return instruction, like ARM. For such CPUs we can't just jump into the
// XRay trampoline and issue a single return instruction there. We rather
// have to call the trampoline and return from it to the original return
// instruction of the function being instrumented.
void prependRetWithPatchableExit(MachineFunction &MF,
const TargetInstrInfo *TII);
};
} // anonymous namespace

void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
const TargetInstrInfo *TII)
{
// We look for *all* terminators and returns, then replace those with
// PATCHABLE_RET instructions.
SmallVector<MachineInstr *, 4> Terminators;
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
unsigned Opc = 0;
if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
// Replace return instructions with:
// PATCHABLE_RET <Opcode>, <Operand>...
Opc = TargetOpcode::PATCHABLE_RET;
}
if (TII->isTailCall(T)) {
// Treat the tail call as a return instruction, which has a
// different-looking sled than the normal return case.
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
}
if (Opc != 0) {
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
.addImm(T.getOpcode());
for (auto &MO : T.operands())
MIB.addOperand(MO);
Terminators.push_back(&T);
}
}
}

for (auto &I : Terminators)
I->eraseFromParent();
}

void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
const TargetInstrInfo *TII)
{
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
if (T.isReturn()) {
// Prepend the return instruction with PATCHABLE_FUNCTION_EXIT
auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT));
}
}
}
}

bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
Expand All @@ -54,6 +121,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
return false; // Function is too small.
}

if (!MF.getSubtarget().isXRaySupported()) {
//FIXME: can this be reported somehow?
return false;
}

// FIXME: Do the loop triviality analysis here or in an earlier pass.

// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
Expand All @@ -64,35 +136,17 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));

// Then we look for *all* terminators and returns, then replace those with
// PATCHABLE_RET instructions.
SmallVector<MachineInstr *, 4> Terminators;
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
unsigned Opc = 0;
if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
// Replace return instructions with:
// PATCHABLE_RET <Opcode>, <Operand>...
Opc = TargetOpcode::PATCHABLE_RET;
}
if (TII->isTailCall(T)) {
// Treat the tail call as a return instruction, which has a
// different-looking sled than the normal return case.
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
}
if (Opc != 0) {
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
.addImm(T.getOpcode());
for (auto &MO : T.operands())
MIB.addOperand(MO);
Terminators.push_back(&T);
}
}
switch (MF.getTarget().getTargetTriple().getArch()) {
case Triple::ArchType::arm:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;
default:
// For the architectures that have a single return instruction (such as
// RETQ on x86_64).
replaceRetWithPatchableRet(MF, TII);
break;
}

for (auto &I : Terminators)
I->eraseFromParent();

return true;
}

Expand Down
9 changes: 9 additions & 0 deletions lib/Target/ARM/ARMAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();

// Emit the XRay table for this function.
EmitXRayTable();

// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
Expand Down Expand Up @@ -2005,6 +2008,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
return;
}
case ARM::PATCHABLE_FUNCTION_ENTER:
LowerPATCHABLE_FUNCTION_ENTER(*MI);
return;
case ARM::PATCHABLE_FUNCTION_EXIT:
LowerPATCHABLE_FUNCTION_EXIT(*MI);
return;
}

MCInst TmpInst;
Expand Down
12 changes: 12 additions & 0 deletions lib/Target/ARM/ARMAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,19 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);

//===------------------------------------------------------------------===//
// XRay implementation
//===------------------------------------------------------------------===//
public:
// XRay-specific lowering for ARM.
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
// Helper function that emits the XRay sleds we've collected for a particular
// function.
void EmitXRayTable();

private:
void EmitSled(const MachineInstr &MI, SledKind Kind);

// Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
void emitAttributes();
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/ARM/ARMBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;

virtual void getNoopForElfTarget(MCInst &NopInst) const {
getNoopForMachoTarget(NopInst);
}

// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
Expand Down
87 changes: 87 additions & 0 deletions lib/Target/ARM/ARMMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;


Expand Down Expand Up @@ -150,3 +155,85 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
}
}
}

void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
{
static const int8_t NoopsInSledCount = 6;
// We want to emit the following pattern:
//
// .Lxray_sled_N:
// ALIGN
// B #20
// ; 6 NOP instructions (24 bytes)
// .tmpN
//
// We need the 24 bytes (6 instructions) because at runtime, we'd be patching
// over the full 28 bytes (7 instructions) with the following pattern:
//
// PUSH{ r0, lr }
// MOVW r0, #<lower 16 bits of function ID>
// MOVT r0, #<higher 16 bits of function ID>
// MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
// MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
// BLX ip
// POP{ r0, lr }
//
OutStreamer->EmitCodeAlignment(4);
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->EmitLabel(CurSled);
auto Target = OutContext.createTempSymbol();

// Emit "B #20" instruction, which jumps over the next 24 bytes (because
// register pc is 8 bytes ahead of the jump instruction by the moment CPU
// is executing it).
// By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
// It is not clear why |addReg(0)| is needed (the last operand).
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
.addImm(ARMCC::AL).addReg(0));

MCInst Noop;
Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
for (int8_t I = 0; I < NoopsInSledCount; I++)
{
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
}

OutStreamer->EmitLabel(Target);
recordSled(CurSled, MI, Kind);
}

void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
{
EmitSled(MI, SledKind::FUNCTION_ENTER);
}

void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
{
EmitSled(MI, SledKind::FUNCTION_EXIT);
}

void ARMAsmPrinter::EmitXRayTable()
{
if (Sleds.empty())
return;
if (Subtarget->isTargetELF()) {
auto *Section = OutContext.getELFSection(
"xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
CurrentFnSym->getName());
auto PrevSection = OutStreamer->getCurrentSectionOnly();
OutStreamer->SwitchSection(Section);
for (const auto &Sled : Sleds) {
OutStreamer->EmitSymbolValue(Sled.Sled, 4);
OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
auto Kind = static_cast<uint8_t>(Sled.Kind);
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Kind), 1));
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
OutStreamer->EmitZeros(6);
}
OutStreamer->SwitchSection(PrevSection);
}
Sleds.clear();
}
Loading

0 comments on commit 339ade7

Please sign in to comment.