Skip to content

Commit

Permalink
AMDGPU/R600: Add implicitarg.ptr intrinsic
Browse files Browse the repository at this point in the history
Differential Revision: http://reviews.llvm.org/D21622

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275024 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
jvesely committed Jul 10, 2016
1 parent 8f101a7 commit e2b4643
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 36 deletions.
6 changes: 6 additions & 0 deletions include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;

def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;


// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
def int_r600_implicitarg_ptr :
GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;

def int_r600_rat_store_typed :
// 1st parameter: Data
// 2nd parameter: Index
Expand Down
10 changes: 5 additions & 5 deletions lib/Target/AMDGPU/EvergreenInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -210,23 +210,23 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//

def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3,
[(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
>;

def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3,
[(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
>;

def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3,
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;

def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3,
[(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;

def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3,
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;

Expand Down
5 changes: 5 additions & 0 deletions lib/Target/AMDGPU/R600ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}

case Intrinsic::r600_implicitarg_ptr: {
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
case Intrinsic::r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case Intrinsic::r600_read_ngroups_y:
Expand Down
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/R600Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>

class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
(cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
>;

def load_param : LoadParamFrag<load>;
Expand Down
114 changes: 114 additions & 0 deletions test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s


; FUNC-LABEL: {{^}}workdim:

; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

define void @workdim (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.read.workdim() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; The workgroup.id values are stored in sgprs offset by the number of user
; sgprs.

; FUNC-LABEL: {{^}}workgroup_id_x:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define void @workgroup_id_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workgroup.id.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; FUNC-LABEL: {{^}}workgroup_id_y:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
define void @workgroup_id_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workgroup.id.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; FUNC-LABEL: {{^}}workgroup_id_z:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define void @workgroup_id_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workgroup.id.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 132{{$}}

; FUNC-LABEL: {{^}}workitem_id_x:
; GCN-NOHSA: buffer_store_dword v0
define void @workitem_id_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workitem.id.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 2180{{$}}

; FUNC-LABEL: {{^}}workitem_id_y:

; GCN-NOHSA: buffer_store_dword v1
define void @workitem_id_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workitem.id.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 4228{{$}}

; FUNC-LABEL: {{^}}workitem_id_z:
; GCN-NOHSA: buffer_store_dword v2
define void @workitem_id_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.amdgcn.workitem.id.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0

declare i32 @llvm.amdgcn.read.workdim() #0
Loading

0 comments on commit e2b4643

Please sign in to comment.