Skip to content

Commit

Permalink
AMDGPUAnnotateUniformValue should always treat volatile loads as dive…
Browse files Browse the repository at this point in the history
…rgent

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304554 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
alex-t committed Jun 2, 2017
1 parent 7df090c commit dfdb788
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
isMemOpHasNoClobberedMemOperand(Load))
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
Expand Down
1 change: 1 addition & 0 deletions lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
!Ld->isVolatile() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
Expand Down
15 changes: 15 additions & 0 deletions test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: @volatile_load
; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}

define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) {
bb:
%tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4
%tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5
store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4
ret void
}

0 comments on commit dfdb788

Please sign in to comment.