Skip to content

Commit

Permalink
AMDGPU: Fix commuting v_sub_u16
Browse files Browse the repository at this point in the history
The correct commutable opcode was set to itself, so this
was simply swapping the operands to commute instead of also
changing the opcode to v_subrev_u16.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289093 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Dec 8, 2016
1 parent fcb33c4 commit 545a46b
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16>;
defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
Expand Down
169 changes: 169 additions & 0 deletions test/CodeGen/AMDGPU/sub.i16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%b = load volatile i16, i16 addrspace(1)* %gep.in1
%add = sub i16 %a, %b
store i16 %add, i16 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_constant:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffff85, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%add = sub i16 %a, 123
store i16 %add, i16 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_neg_constant:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x34d, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%add = sub i16 %a, -845
store i16 %add, i16 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_inline_63:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffffc1, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_sub_i16_inline_63(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%add = sub i16 %a, 63
store i16 %add, i16 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_dword [[ADD]]
define void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%b = load volatile i16, i16 addrspace(1)* %gep.in1
%add = sub i16 %a, %b
%ext = zext i16 %add to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%b = load volatile i16, i16 addrspace(1)* %gep.in1
%add = sub i16 %a, %b
%ext = zext i16 %add to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
define void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
%a = load i16, i16 addrspace(1)* %gep.in0
%b = load i16, i16 addrspace(1)* %gep.in1
%add = sub i16 %a, %b
%ext = sext i16 %add to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
%a = load i16, i16 addrspace(1)* %gep.in0
%b = load i16, i16 addrspace(1)* %gep.in1
%add = sub i16 %a, %b
%ext = sext i16 %add to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}

@lds = addrspace(3) global [512 x i32] undef, align 4

; GCN-LABEL: {{^}}v_test_sub_i16_constant_commute:
; VI: v_subrev_u16_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
; CI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 0x800, v{{[0-9]+}}
define void @v_test_sub_i16_constant_commute(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%size.trunc = trunc i32 %size to i16
call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %gep.in0
%add = sub i16 %a, %size.trunc
store i16 %add, i16 addrspace(1)* %out
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.groupstaticsize() #0

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

0 comments on commit 545a46b

Please sign in to comment.