forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AMDGPU: Fix legalization of MUBUF instructions in shaders
Summary: The addr64-based legalization is incorrect for MUBUF instructions with idxen set as well as for BUFFER_LOAD/STORE_FORMAT_* instructions. This affects e.g. shaders that access buffer textures. Since we never actually need the addr64-legalization in shaders, this patch takes the easy route and keys off the calling convention. If this ever affects (non-OpenGL) compute, the type of legalization needs to be chosen based on some TSFlag. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98664 Reviewers: arsenm, tstellarAMD Subscribers: kzhuravl, wdng, yaxunl, tony-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D26747 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287339 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
2 changed files
with
62 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK | ||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK | ||
|
||
; Test that buffer_load_format with VGPR resource descriptor is properly | ||
; legalized. | ||
|
||
; CHECK-LABEL: {{^}}test_none: | ||
; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} | ||
define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) { | ||
main_body: | ||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i | ||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32 | ||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0) | ||
ret float %tmp7 | ||
} | ||
|
||
; CHECK-LABEL: {{^}}test_idxen: | ||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}} | ||
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) { | ||
main_body: | ||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i | ||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32 | ||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0) | ||
ret float %tmp7 | ||
} | ||
|
||
; CHECK-LABEL: {{^}}test_offen: | ||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} | ||
define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) { | ||
main_body: | ||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i | ||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32 | ||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0) | ||
ret float %tmp7 | ||
} | ||
|
||
; CHECK-LABEL: {{^}}test_both: | ||
; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}} | ||
define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) { | ||
main_body: | ||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i | ||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32 | ||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0) | ||
ret float %tmp7 | ||
} | ||
|
||
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly | ||
|
||
attributes #0 = { nounwind readnone } |