Skip to content

Commit

Permalink
Revert "r225811 - Revert "r225808 - [PowerPC] Add StackMap/PatchPoint…
Browse files Browse the repository at this point in the history
… support""

This re-applies r225808, fixed to avoid problems with SDAG dependencies along
with the preceding fix to ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs.
These problems caused the original regression tests to assert/segfault on many
(but not all) systems.

Original commit message:

This commit does two things:

 1. Refactors PPCFastISel to use more of the common infrastructure for call
    lowering (this lets us take advantage of this common code for lowering some
    common intrinsics, stackmap/patchpoint among them).

 2. Adds support for stackmap/patchpoint lowering. For the most part, this is
    very similar to the support in the AArch64 target, with the obvious differences
    (different registers, NOP instructions, etc.). The test cases are adapted
    from the AArch64 test cases.

One difference of note is that the patchpoint call sequence takes 24 bytes, so
you can't use less than that (on AArch64 you can go down to 16). Also, as noted
in the docs, we take the patchpoint address to be the actual code address
(assuming the call is local in the TOC-sharing sense), which should yield
higher performance than generating the full cross-DSO indirect-call sequence
and is likely just as useful for JITed code (if not, we'll change it).

StackMaps and Patchpoints are still marked as experimental, and so this support
is doubly experimental. So go ahead and experiment!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225909 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Hal Finkel committed Jan 14, 2015
1 parent 21befa7 commit ade705c
Show file tree
Hide file tree
Showing 17 changed files with 1,121 additions and 94 deletions.
6 changes: 6 additions & 0 deletions docs/StackMaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,12 @@ lowered according to the calling convention specified at the
intrinsic's callsite. Variants of the intrinsic with non-void return
type also return a value according to calling convention.

On PowerPC, note that the ``<target>`` must be the actual intended target of
the indirect call, not the function-descriptor address normally used as the
C/C++ function-pointer representation. As a result, the call target must be
local because no adjustment or restoration of the TOC pointer (in register r2)
will be performed.

Requesting zero patch point arguments is valid. In this case, all
variable operands are handled just like
``llvm.experimental.stackmap.*``. The difference is that space will
Expand Down
90 changes: 89 additions & 1 deletion lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
Expand Down Expand Up @@ -69,10 +70,11 @@ namespace {
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget &Subtarget;
uint64_t TOCLabelID;
StackMaps SM;
public:
explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {}

const char *getPassName() const override {
return "PowerPC Assembly Printer";
Expand All @@ -90,6 +92,13 @@ namespace {
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;

void EmitEndOfAsmFile(Module &M) override;

void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
};

/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
Expand Down Expand Up @@ -316,6 +325,80 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
return TOCEntry;
}

void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
SM.serializeToStackMapSection();
}

void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();

SM.recordStackMap(MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");

// Scan ahead to trim the shadow.
const MachineBasicBlock &MBB = *MI.getParent();
MachineBasicBlock::const_iterator MII(MI);
++MII;
while (NumNOPBytes > 0) {
if (MII == MBB.end() || MII->isCall() ||
MII->getOpcode() == PPC::DBG_VALUE ||
MII->getOpcode() == TargetOpcode::PATCHPOINT ||
MII->getOpcode() == TargetOpcode::STACKMAP)
break;
++MII;
NumNOPBytes -= 4;
}

// Emit nops.
for (unsigned i = 0; i < NumNOPBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}

// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);

int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
unsigned EncodedBytes = 0;
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 6*4;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));

EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
}

// Emit padding.
unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
assert((NumBytes - EncodedBytes) % 4 == 0 &&
"Invalid number of NOP bytes requested!");
for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}

/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
Expand All @@ -332,6 +415,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(OutStreamer, SM, *MI);
case TargetOpcode::PATCHPOINT:
return LowerPATCHPOINT(OutStreamer, SM, *MI);

case PPC::MoveGOTtoLR: {
// Transform %LR = MoveGOTtoLR
// Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
Expand Down
35 changes: 35 additions & 0 deletions lib/Target/PowerPC/PPCCallingConv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the custom routines for the PPC Calling Convention that
// aren't done by tablegen.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H

#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"

namespace llvm {

inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
llvm_unreachable("The AnyReg calling convention is only supported by the " \
"stackmap and patchpoint intrinsics.");
// gracefully fallback to PPC C calling convention on Release builds.
return false;
}

} // End llvm namespace

#endif

38 changes: 38 additions & 0 deletions lib/Target/PowerPC/PPCCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,21 @@ class CCIfNotSubtarget<string F, CCAction A>
// Return Value Calling Convention
//===----------------------------------------------------------------------===//

// PPC64 AnyReg return-value convention. No explicit register is specified for
// the return-value. The register allocator is allowed and expected to choose
// any free register.
//
// This calling convention is currently only supported by the stackmap and
// patchpoint intrinsics. All other uses will result in an assert on Debug
// builds. On Release builds we fallback to the PPC C calling convention.
def RetCC_PPC64_AnyReg : CallingConv<[
CCCustom<"CC_PPC_AnyReg_Error">
]>;

// Return-value convention for PowerPC
def RetCC_PPC : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,

// On PPC64, integer return values are always promoted to i64
CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
Expand All @@ -51,6 +64,15 @@ def RetCC_PPC : CallingConv<[
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;

// No explicit register is specified for the AnyReg calling convention. The
// register allocator may assign the arguments to any free register.
//
// This calling convention is currently only supported by the stackmap and
// patchpoint intrinsics. All other uses will result in an assert on Debug
// builds. On Release builds we fallback to the PPC C calling convention.
def CC_PPC64_AnyReg : CallingConv<[
CCCustom<"CC_PPC_AnyReg_Error">
]>;

// Note that we don't currently have calling conventions for 64-bit
// PowerPC, but handle all the complexities of the ABI in the lowering
Expand All @@ -61,6 +83,8 @@ def RetCC_PPC : CallingConv<[
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
def CC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,

CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
Expand All @@ -74,6 +98,8 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,

CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
Expand Down Expand Up @@ -203,3 +229,15 @@ def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;

def CSR_NoRegs : CalleeSavedRegs<(add)>;

def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
(sequence "X%u", 14, 31),
(sequence "F%u", 0, 31),
(sequence "CR%u", 0, 7))>;

def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs,
(sequence "V%u", 0, 31))>;

def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
(sequence "VSL%u", 0, 31),
(sequence "VSH%u", 0, 31))>;

Loading

0 comments on commit ade705c

Please sign in to comment.