Skip to content

Commit

Permalink
AMDGPU/R600: Implement memory loads from constant AS
Browse files Browse the repository at this point in the history
Reviewers: tstellard

Subscribers: arsenm

Differential Revision: http://reviews.llvm.org/D19792

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269479 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
jvesely committed May 13, 2016
1 parent 4245f51 commit fbff874
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 127 deletions.
13 changes: 10 additions & 3 deletions lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "AMDGPUSubtarget.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
Expand Down Expand Up @@ -607,10 +608,16 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
if (!N->readMem())
return false;
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
N->getMemoryVT().bitsLT(MVT::i32))
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
return !isa<GlobalValue>(
GetUnderlyingObject(N->getMemOperand()->getValue(),
CurDAG->getDataLayout()));

//TODO: Why do we need this?
if (N->getMemoryVT().bitsLT(MVT::i32))
return true;
}

return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
Expand Down
6 changes: 4 additions & 2 deletions lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,13 +429,15 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
if (Literals[i]->isImm()) {
MILit.addImm(Literals[i]->getImm());
} else {
MILit.addImm(0);
MILit.addGlobalAddress(Literals[i]->getGlobal(),
Literals[i]->getOffset());
}
if (i + 1 < e) {
if (Literals[i + 1]->isImm()) {
MILit.addImm(Literals[i + 1]->getImm());
} else {
MILit.addImm(0);
MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
Literals[i + 1]->getOffset());
}
} else
MILit.addImm(0);
Expand Down
63 changes: 12 additions & 51 deletions lib/Target/AMDGPU/R600ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,16 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getImm());
break;
case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
//TODO: Perhaps combine this instruction with the next if possible
auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
MI->getOperand(0).getReg(),
AMDGPU::ALU_LITERAL_X);
int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
//TODO: Ugh this is rather ugly
MIB->getOperand(Idx) = MI->getOperand(1);
break;
}
case AMDGPU::CONST_COPY: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Expand Down Expand Up @@ -914,43 +924,10 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,

const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
Type *EltType = GV->getValueType();
unsigned Size = DL.getTypeAllocSize(EltType);
unsigned Alignment = DL.getPrefTypeAlignment(EltType);

MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS);
MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);

int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);

const GlobalVariable *Var = cast<GlobalVariable>(GV);
if (!Var->hasInitializer()) {
// This has no use, but bugpoint will hit it.
return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
}

const Constant *Init = Var->getInitializer();
SmallVector<SDNode*, 8> WorkList;

for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
E = DAG.getEntryNode()->use_end(); I != E; ++I) {
if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
continue;
WorkList.push_back(*I);
}
SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
E = WorkList.end(); I != E; ++I) {
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
Ops.push_back((*I)->getOperand(i));
}
DAG.UpdateNodeOperands(*I, Ops);
}
return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
}

SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
Expand Down Expand Up @@ -1604,22 +1581,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();

// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
isa<GlobalVariable>(GetUnderlyingObject(
LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {

SDValue Ptr = DAG.getZExtOrTrunc(
LoadNode->getBasePtr(), DL,
getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(2, DL, MVT::i32));
return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
LoadNode->getChain(), Ptr,
DAG.getTargetConstant(0, DL, MVT::i32),
Op.getOperand(2));
}

if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
scalarizeVectorLoad(LoadNode, DAG),
Expand Down
7 changes: 7 additions & 0 deletions lib/Target/AMDGPU/R600Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,13 @@ def : Pat <
(MOV_IMM_I32 imm:$val)
>;

def MOV_IMM_GLOBAL_ADDR : MOV_IMM<iPTR, i32imm>;
def : Pat <
(AMDGPUconstdata_ptr tglobaladdr:$addr),
(MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr)
>;


def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
def : Pat <
(fpimm:$val),
Expand Down
57 changes: 0 additions & 57 deletions test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll

This file was deleted.

33 changes: 19 additions & 14 deletions test/CodeGen/AMDGPU/gv-const-addrspace.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s


@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
Expand All @@ -10,13 +11,9 @@
; FUNC-LABEL: {{^}}float:
; GCN: s_load_dword

; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
; EG-DAG: MOV {{\** *}}T4.X
; EG-DAG: MOV {{\** *}}T5.X
; EG-DAG: MOV {{\** *}}T6.X
; EG: MOVA_INT

; EG: VTX_READ_32
; EG: @float_gv
; EG-NOT: MOVA_INT
define void @float(float addrspace(1)* %out, i32 %index) {
entry:
%0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
Expand All @@ -31,13 +28,9 @@ entry:

; GCN: s_load_dword

; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
; EG-DAG: MOV {{\** *}}T4.X
; EG-DAG: MOV {{\** *}}T5.X
; EG-DAG: MOV {{\** *}}T6.X
; EG: MOVA_INT

; EG: VTX_READ_32
; EG: @i32_gv
; EG-NOT: MOVA_INT
define void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
Expand All @@ -54,6 +47,9 @@ entry:
; FUNC-LABEL: {{^}}struct_foo_gv_load:
; GCN: s_load_dword

; EG: VTX_READ_32
; EG: @struct_foo_gv
; EG-NOT: MOVA_INT
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
%load = load i32, i32 addrspace(2)* %gep, align 4
Expand All @@ -68,13 +64,22 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {

; FUNC-LABEL: {{^}}array_v1_gv_load:
; GCN: s_load_dword

; EG: VTX_READ_32
; EG: @array_v1_gv
; EG-NOT: MOVA_INT
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}

; FUNC-LABEL: {{^}}gv_addressing_in_branch:

; EG: VTX_READ_32
; EG: @float_gv
; EG-NOT: MOVA_INT
define void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) {
entry:
%0 = icmp eq i32 0, %a
Expand Down

0 comments on commit fbff874

Please sign in to comment.