Skip to content

Commit

Permalink
AMDGPU/SI: Don't emit multi-dword flat memory ops when they might acc…
Browse files Browse the repository at this point in the history
…ess scratch

Summary:
A single flat memory operations that might access the scratch buffer
can only access MaxPrivateElementSize bytes.

Reviewers: arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D25788

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285198 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
tstellarAMD committed Oct 26, 2016
1 parent e2f7559 commit 1a633d1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
16 changes: 16 additions & 0 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2591,6 +2591,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, DL);
}

MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;

unsigned NumElements = MemVT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::CONSTANT_ADDRESS:
Expand Down Expand Up @@ -2890,6 +2898,14 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return expandUnalignedStore(Store, DAG);
}

MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;

unsigned NumElements = VT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
Expand Down
27 changes: 25 additions & 2 deletions test/CodeGen/AMDGPU/flat-address-space.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s

; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
Expand Down Expand Up @@ -149,6 +150,28 @@ define void @flat_scratch_unaligned_store() {
ret void
}

; CHECK-LABEL: flat_scratch_multidword_load:
; HSA: flat_load_dword
; HSA: flat_load_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define void @flat_scratch_multidword_load() {
%scratch = alloca <2 x i32>
%fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
%ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
ret void
}

; CHECK-LABEL: flat_scratch_multidword_store:
; HSA: flat_store_dword
; HSA: flat_store_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define void @flat_scratch_multidword_store() {
%scratch = alloca <2 x i32>
%fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
attributes #3 = { nounwind readnone }

0 comments on commit 1a633d1

Please sign in to comment.