Skip to content

Commit

Permalink
AMDGPU: Change vintrp printing to better match sc
Browse files Browse the repository at this point in the history
Some of the immediates need to be printed differently
eventually.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289291 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Dec 10, 2016
1 parent 243f564 commit 425b3b6
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 19 deletions.
21 changes: 12 additions & 9 deletions lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,15 +679,18 @@ void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();

if (Imm == 2) {
O << "P0";
} else if (Imm == 1) {
O << "P20";
} else if (Imm == 0) {
O << "P10";
} else {
llvm_unreachable("Invalid interpolation parameter slot");
switch (Imm) {
case 0:
O << "p10";
break;
case 1:
O << "p20";
break;
case 2:
O << "p0";
break;
default:
O << "invalid_param_" << Imm;
}
}

Expand Down
6 changes: 3 additions & 3 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
0x00000000,
(outs VGPR_32:$vdst),
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
"v_interp_p1_f32 $vdst, $i, $attr_chan, $attr, [m0]",
"v_interp_p1_f32 $vdst, $i, $attr_chan, $attr",
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$i, (i32 imm:$attr_chan),
(i32 imm:$attr)))]
>;
Expand All @@ -75,7 +75,7 @@ defm V_INTERP_P2_F32 : VINTRP_m <
0x00000001,
(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
"v_interp_p2_f32 $vdst, [$src0], $j, $attr_chan, $attr, [m0]",
"v_interp_p2_f32 $vdst, $j, $attr_chan, $attr",
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$j, (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;

Expand All @@ -85,7 +85,7 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
0x00000002,
(outs VGPR_32:$vdst),
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
"v_interp_mov_f32 $vdst, $src0, $attr_chan, $attr, [m0]",
"v_interp_mov_f32 $vdst, $src0, $attr_chan, $attr",
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;

Expand Down
148 changes: 142 additions & 6 deletions test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s

;GCN-LABEL: {{^}}v_interp:
;GCN-NOT: s_wqm
;GCN: s_mov_b32 m0, s{{[0-9]+}}
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
; GCN-LABEL: {{^}}v_interp:
; GCN-NOT: s_wqm
; GCN: s_mov_b32 m0, s{{[0-9]+}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}}
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
main_body:
%i = extractelement <2 x float> %4, i32 0
Expand All @@ -20,10 +22,144 @@ main_body:
ret void
}

; GCN-LABEL: {{^}}v_interp_p1:
; GCN: s_movk_i32 m0, 0x100
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 0{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 0{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 0{{$}}

; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 3{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 4{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 63{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 64{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 64{{$}}
define amdgpu_ps void @v_interp_p1(float %i) {
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
%p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
%p0_3 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 0, i32 256)
%p0_4 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 0, i32 256)
%p0_5 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 1, i32 256)
%p0_6 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 2, i32 256)
%p0_7 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 3, i32 256)
%p0_8 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 4, i32 256)
%p0_9 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 63, i32 256)
%p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256)
%p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256)

store volatile float %p0_0, float addrspace(1)* undef
store volatile float %p0_1, float addrspace(1)* undef
store volatile float %p0_2, float addrspace(1)* undef
store volatile float %p0_3, float addrspace(1)* undef
store volatile float %p0_4, float addrspace(1)* undef
store volatile float %p0_5, float addrspace(1)* undef
store volatile float %p0_6, float addrspace(1)* undef
store volatile float %p0_7, float addrspace(1)* undef
store volatile float %p0_8, float addrspace(1)* undef
store volatile float %p0_9, float addrspace(1)* undef
store volatile float %p0_10, float addrspace(1)* undef
store volatile float %p0_11, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}v_interp_p2:
; GCN: s_movk_i32 m0, 0x100
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 0{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 63{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 64{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 64{{$}}
define amdgpu_ps void @v_interp_p2(float %x, float %j) {
%p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
%p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
%p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
%p2_3 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 3, i32 0, i32 256)
%p2_4 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 0, i32 256)

%p2_5 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 1, i32 256)
%p2_6 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 63, i32 256)
%p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256)
%p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256)

store volatile float %p2_0, float addrspace(1)* undef
store volatile float %p2_1, float addrspace(1)* undef
store volatile float %p2_2, float addrspace(1)* undef
store volatile float %p2_3, float addrspace(1)* undef
store volatile float %p2_4, float addrspace(1)* undef
store volatile float %p2_5, float addrspace(1)* undef
store volatile float %p2_6, float addrspace(1)* undef
store volatile float %p2_7, float addrspace(1)* undef
store volatile float %p2_8, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}v_interp_mov:
; GCN: s_movk_i32 m0, 0x100
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 0, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p20, 0, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, 0, 0{{$}}

; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 2, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 3, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 4, 0{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_8, 4, 0{{$}}

; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 63{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 64{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 64{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, 4, 64{{$}}
define amdgpu_ps void @v_interp_mov(float %x, float %j) {
%mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
%mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
%mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
%mov_3 = call float @llvm.amdgcn.interp.mov(i32 3, i32 0, i32 0, i32 256)

%mov_4 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 0, i32 256)
%mov_5 = call float @llvm.amdgcn.interp.mov(i32 0, i32 2, i32 0, i32 256)
%mov_6 = call float @llvm.amdgcn.interp.mov(i32 0, i32 3, i32 0, i32 256)
%mov_7 = call float @llvm.amdgcn.interp.mov(i32 0, i32 4, i32 0, i32 256)
%mov_8 = call float @llvm.amdgcn.interp.mov(i32 8, i32 4, i32 0, i32 256)

%mov_9 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 63, i32 256)
%mov_10 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 64, i32 256)
%mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256)
%mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256)

store volatile float %mov_0, float addrspace(1)* undef
store volatile float %mov_1, float addrspace(1)* undef
store volatile float %mov_2, float addrspace(1)* undef
store volatile float %mov_3, float addrspace(1)* undef

store volatile float %mov_4, float addrspace(1)* undef
store volatile float %mov_5, float addrspace(1)* undef
store volatile float %mov_6, float addrspace(1)* undef
store volatile float %mov_7, float addrspace(1)* undef
store volatile float %mov_8, float addrspace(1)* undef

store volatile float %mov_9, float addrspace(1)* undef
store volatile float %mov_10, float addrspace(1)* undef
store volatile float %mov_11, float addrspace(1)* undef
store volatile float %mov_12, float addrspace(1)* undef
ret void
}

; SI won't merge ds memory operations, because of the signed offset bug, so
; we only have check lines for VI.
; VI-LABEL: v_interp_readnone:
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: s_mov_b32 m0, 0
; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}}
; VI: s_mov_b32 m0, -1{{$}}
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
store float 0.0, float addrspace(3)* %lds
Expand Down
2 changes: 1 addition & 1 deletion test/MC/Disassembler/AMDGPU/missing_op.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=fiji -disassemble < %s | FileCheck %s -check-prefix=VI

#TODO: this test will fail when we fix v_interp_p2_f32 signature, remove it then
#VI: v_interp_p2_f32 v7, [v7], 16, /*Missing OP3*/, /*Missing OP4*/
#VI: v_interp_p2_f32 v7, 16, /*Missing OP3*/, /*Missing OP4*/
0xd4 0x41 0x1d 0xd4

0 comments on commit 425b3b6

Please sign in to comment.