Skip to content

Commit

Permalink
AMDGPU : Add trap handler support.
Browse files Browse the repository at this point in the history
Differential Revision: http://reviews.llvm.org/D26010

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294692 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Wei Ding committed Feb 10, 2017
1 parent bcd8c96 commit c75c94d
Show file tree
Hide file tree
Showing 13 changed files with 213 additions and 30 deletions.
39 changes: 39 additions & 0 deletions docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,45 @@ VOP_SDWA examples:
For full list of supported instructions, refer to "Vector ALU instructions".

Trap Handler ABI
--------------------------
The Trap Handler suppored is implemented differently based on the host OS. OS
is obtained from the appropriate element of the target triple HSA OS:

.. code-block:: c++
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
};

TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For HSA OS, a trap handler is always enabled and that the following S_TRAP immediate
operand codes are supported:
.. code-block:: c++
enum TrapCode {
TrapCodeBreakPoint = 0,
TrapCodeLLVMTrap = 1,
TrapCodeLLVMDebugTrap = 2,
TrapCodeHSADebugTrap = 3
};

- 0: Used for debugger breakpoint. If debugger is not installed causes dispatch
to be terminated and its associated queue put into the error state.
- 1: Used for llvm.trap..queue_ptr is in SGPR0-1. Causes dispatch to be
terminated and its associated queue put into the error state.
- 2: Used for llvm.debugtrap. queue_ptr is in SGPR0-1. If debugger not installed
handled same as llvm.trap.
- 3: Used for HSA DEBUGTRAP. queue_ptr is in SGPR0-1, the user code is in VGPR0.

Graphics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For Graphics, S_ENDPGM is generated for llvm.trap. S_NOP is generated for
llvm.debugtrap together with a warning that there is no trap handler installed.

HSA Code Object Directives
--------------------------

Expand Down
6 changes: 6 additions & 0 deletions lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"Support unaligned global loads and stores"
>;

def FeatureTrapHandler: SubtargetFeature<"trap-handler",
"TrapHandler",
"true",
"Trap handler support"
>;

def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"UnalignedScratchAccess",
"true",
Expand Down
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
{ "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
{ "llvm.trap", "amdgpu-queue-ptr" }
{ "llvm.trap", "amdgpu-queue-ptr" },
{ "llvm.debugtrap", "amdgpu-queue-ptr" }
};

// TODO: We should not add the attributes if the known compile time workgroup
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
false);
Expand Down Expand Up @@ -634,6 +637,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
Expand Down
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,

SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,";
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";

FullFS += FS;

Expand Down Expand Up @@ -94,6 +94,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
UnalignedBufferAccess(false),

EnableXNACK(false),
TrapHandler(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
Expand Down
25 changes: 25 additions & 0 deletions lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,22 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
ISAVersion8_1_0,
};

enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
};

enum TrapCode {
TrapCodeBreakPoint = 0,
TrapCodeLLVMTrap = 1,
TrapCodeLLVMDebugTrap = 2,
TrapCodeHSADebugTrap = 3
};

enum TrapRegValues {
TrapCodeLLVMTrapRegValue = 1
};

protected:
// Basic subtarget description.
Triple TargetTriple;
Expand All @@ -88,6 +104,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;
bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
Expand Down Expand Up @@ -256,6 +273,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return CaymanISA;
}

TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}

bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
Expand Down Expand Up @@ -309,6 +330,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return UnalignedScratchAccess;
}

bool isTrapHandlerEnabled() const {
return TrapHandler;
}

bool isXNACKEnabled() const {
return EnableXNACK;
}
Expand Down
4 changes: 3 additions & 1 deletion lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ enum DstUnused {
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6)
#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1)
#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
Expand Down Expand Up @@ -387,7 +390,6 @@ enum DstUnused {

#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8

} // End namespace llvm

#endif
59 changes: 41 additions & 18 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);

setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
Expand Down Expand Up @@ -1779,24 +1780,46 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}

switch (MI.getOpcode()) {
case AMDGPU::S_TRAP_PSEUDO: {
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
.addImm(1);

MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);

if (!BB->isLiveIn(UserSGPR))
BB->addLiveIn(UserSGPR);

BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
.addReg(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1)
.addReg(AMDGPU::VGPR0, RegState::Implicit)
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
case AMDGPU::S_TRAP_PSEUDO: {
const DebugLoc &DL = MI.getDebugLoc();
const int TrapType = MI.getOperand(0).getImm();

if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
Subtarget->isTrapHandlerEnabled()) {

MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);

if (!BB->isLiveIn(UserSGPR))
BB->addLiveIn(UserSGPR);

BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
.addReg(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
.addImm(TrapType)
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
} else {
switch (TrapType) {
case SISubtarget::TrapCodeLLVMTrap:
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
break;
case SISubtarget::TrapCodeLLVMDebugTrap: {
DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
"debugtrap handler not supported",
DL,
DS_Warning);
LLVMContext &C = MF->getFunction()->getContext();
C.diagnose(NoTrap);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
.addImm(0);
break;
}
default:
llvm_unreachable("unsupported trap handler type!");
}
}

MI.eraseFromParent();
return BB;
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,11 @@ def DSTOMOD {
int NONE = 0;
}

def TRAPTYPE {
int LLVM_TRAP = 1;
int LLVM_DEBUG_TRAP = 2;
}

//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
Expand Down
12 changes: 10 additions & 2 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]

def S_TRAP_PSEUDO : VPseudoInstSI <(outs), (ins),
[(trap)]> {
def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
let hasSideEffects = 1;
let SALU = 1;
let usesCustomInserter = 1;
Expand Down Expand Up @@ -390,6 +389,15 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
} // End SubtargetPredicate = isGCN

let Predicates = [isGCN] in {
def : Pat<
(trap),
(S_TRAP_PSEUDO TRAPTYPE.LLVM_TRAP)
>;

def : Pat<
(debugtrap),
(S_TRAP_PSEUDO TRAPTYPE.LLVM_DEBUG_TRAP)
>;

def : Pat<
(int_amdgcn_else i64:$src, bb:$target),
Expand Down
2 changes: 1 addition & 1 deletion lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
// TODO: enable_trap_handler
COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),
Expand Down
4 changes: 2 additions & 2 deletions test/CodeGen/AMDGPU/fneg-fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}|
; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}}

; VI-NOT: and
; VI-NOT: _and
; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
Expand All @@ -22,7 +22,7 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}}
; CI: v_cvt_f16_f32_e32

; VI-NOT: and
; VI-NOT: _and
; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}|
; VI-NOT: [[MUL]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
Expand Down
77 changes: 73 additions & 4 deletions test/CodeGen/AMDGPU/trap.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,80 @@
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s

; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s

; enable trap handler feature
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s

; disable trap handler feature
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s

; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s

declare void @llvm.trap() #0
declare void @llvm.debugtrap() #0

; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 208

; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 144

; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 1
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP: s_trap 1

; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0

; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-MESA-TRAP: s_endpgm
define void @hsa_trap() {
call void @llvm.trap()
ret void
}

; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 208

; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 144

; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
; GCN-LABEL: {{^}}hsa_debugtrap:
; HSA-TRAP: enable_trap_handler = 1
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP: s_trap 2

; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm

; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-MESA-TRAP: s_endpgm
define void @hsa_debugtrap() {
call void @llvm.debugtrap()
ret void
}

; For non-HSA path
; GCN-LABEL: {{^}}trap:
; GCN: v_mov_b32_e32 v0, 1
; GCN: s_mov_b64 s[0:1], s[4:5]
; GCN: s_trap 1
; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; NO-MESA-TRAP: s_endpgm
define void @trap() {
call void @llvm.trap()
ret void
Expand Down

0 comments on commit c75c94d

Please sign in to comment.