Skip to content

Commit

Permalink
AMDGPU: Treat texture gather instructions more like other MIMG instru…
Browse files Browse the repository at this point in the history
…ctions

Summary:
Setting MIMG to 0 has a bunch of unexpected side effects, including that
isVMEM returns false which leads to incorrect treatment in the hazard
recognizer. The reason I noticed it is that it also leads to incorrect
treatment in VGPR-to-SGPR copies, which is one cause of the referenced bug.

The only reason why MIMG was set to 0 is to signal the special handling of
dmasks, but that can be checked differently.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96877

Reviewers: arsenm, tstellarAMD

Subscribers: arsenm, kzhuravl, llvm-commits

Differential Revision: http://reviews.llvm.org/D22210

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275113 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
nhaehnle committed Jul 11, 2016
1 parent ae108ee commit ac1e47a
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 5 deletions.
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum {
FLAT = 1 << 21,
WQM = 1 << 22,
VGPRSpill = 1 << 23,
VOPAsmPrefer32Bit = 1 << 24
VOPAsmPrefer32Bit = 1 << 24,
Gather4 = 1 << 25
};
}

Expand Down
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3133,7 +3133,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();

if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
!TII->isGather4(Opcode))
adjustWritemask(Node, DAG);

if (Opcode == AMDGPU::INSERT_SUBREG ||
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/AMDGPU/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class InstSI <dag outs, dag ins, string asm = "",
// is unable to infer the encoding from the operands.
field bits<1> VOPAsmPrefer32Bit = 0;

field bits<1> Gather4 = 0;

// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
Expand Down Expand Up @@ -78,6 +80,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{22} = WQM;
let TSFlags{23} = VGPRSpill;
let TSFlags{24} = VOPAsmPrefer32Bit;
let TSFlags{25} = Gather4;

let SchedRW = [Write32Bit];

Expand Down
8 changes: 8 additions & 0 deletions lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}

static bool isGather4(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
}

bool isGather4(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::Gather4;
}

static bool isFLAT(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
}
Expand Down
4 changes: 2 additions & 2 deletions lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3557,8 +3557,8 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
// (red,red,red,red) etc.) The ISA document doesn't mention
// this.
// Therefore, disable all code which updates DMASK by setting these two:
let MIMG = 0;
// Therefore, disable all code which updates DMASK by setting this:
let Gather4 = 1;
let hasPostISelHook = 0;
let WQM = wqm;

Expand Down
22 changes: 21 additions & 1 deletion test/CodeGen/AMDGPU/llvm.SI.gather4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,27 @@ main_body:
ret void
}


;CHECK-LABEL: {{^}}gather4_sgpr_bug:
;
; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
;
;TODO: the readfirstlanes are unnecessary, see http://reviews.llvm.org/D22217
;
;CHECK: v_readfirstlane_b32 s[[LO:[0-9]+]], v{{[0-9]+}}
;CHECK: v_readfirstlane_b32
;CHECK: v_readfirstlane_b32
;CHECK: v_readfirstlane_b32 s[[HI:[0-9]+]], v{{[0-9]+}}
;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8
define amdgpu_ps float @gather4_sgpr_bug() {
main_body:
%tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
%tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp4 = extractelement <4 x float> %tmp2, i32 1
%tmp9 = fadd float undef, %tmp4
ret float %tmp9
}

declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
Expand Down

0 comments on commit ac1e47a

Please sign in to comment.