Skip to content

Commit

Permalink
AMDGPU/SI: Implement atomic load/store for i32 and i64
Browse files Browse the repository at this point in the history
Standard load/store instructions with GLC bit set.

Reviewers: tstellardAMD, arsenm

Differential Revision: http://reviews.llvm.org/D18760

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265709 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
jvesely committed Apr 7, 2016
1 parent f313dae commit 62aa62a
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 24 deletions.
45 changes: 33 additions & 12 deletions lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
static bool checkType(const Value *ptr, unsigned int addrspace);
static bool checkPrivateAddress(const MachineMemOperand *Op);

static bool isGlobalStore(const StoreSDNode *N);
static bool isFlatStore(const StoreSDNode *N);
static bool isGlobalStore(const MemSDNode *N);
static bool isFlatStore(const MemSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);

bool isCPLoad(const LoadSDNode *N) const;
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
bool isGlobalLoad(const LoadSDNode *N) const;
bool isFlatLoad(const LoadSDNode *N) const;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool isGlobalLoad(const MemSDNode *N) const;
bool isFlatLoad(const MemSDNode *N) const;
bool isParamLoad(const LoadSDNode *N) const;
bool isPrivateLoad(const LoadSDNode *N) const;
bool isLocalLoad(const LoadSDNode *N) const;
Expand Down Expand Up @@ -128,6 +128,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
void SelectMUBUFConstant(SDValue Constant,
SDValue &SOffset,
SDValue &ImmOffset) const;
Expand Down Expand Up @@ -558,7 +560,9 @@ bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
return false;
}

bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
if (!N->writeMem())
return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}

Expand All @@ -573,23 +577,29 @@ bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}

bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
if (!N->writeMem())
return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
}

bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}

bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
const Value *MemVal = N->getMemOperand()->getValue();
if (CbId == -1)
return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);

return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
}

bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
if (!N->readMem())
return false;
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
N->getMemoryVT().bitsLT(MVT::i32))
Expand All @@ -606,7 +616,9 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}

bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
if (!N->readMem())
return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
}

Expand Down Expand Up @@ -955,8 +967,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,

SDLoc DL(Addr);

GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
if (!GLC.getNode())
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
if (!SLC.getNode())
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);

Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Expand Down Expand Up @@ -1112,6 +1126,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}

bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
SDValue GLC, SLC, TFE;

return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &GLC) const {
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ def global_store : PatFrag<(ops node:$val, node:$ptr),
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;

def global_store_atomic : PatFrag<(ops node:$val, node:$ptr),
(atomic_store node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<MemSDNode>(N));
}]>;

// Global address space loads
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
Expand Down
18 changes: 18 additions & 0 deletions lib/Target/AMDGPU/CIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,11 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(inst $addr, 0, 0, 0)
>;

class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
(inst $addr, 1, 0, 0)
>;

def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
Expand All @@ -297,17 +302,30 @@ def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;

def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;


class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $addr, $data, 0, 0, 0)
>;

class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
// atomic store follows aotmic binop convenction so the address comes first
(node i64:$addr, vt:$data),
(inst $addr, $data, 1, 0, 0)
>;

def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;

def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;

class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
(vt (node i64:$addr, data_vt:$data)),
Expand Down
19 changes: 14 additions & 5 deletions lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -140,24 +140,26 @@ def SIconstdata_ptr : SDNode<

class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
isConstantLoad(cast<LoadSDNode>(N), -1);
return isFlatLoad(dyn_cast<MemSDNode>(N)) ||
isGlobalLoad(dyn_cast<MemSDNode>(N)) ||
isConstantLoad(cast<MemSDNode>(N), -1);
}]>;

def flat_load : flat_ld <load>;
def atomic_flat_load : flat_ld<atomic_load>;
def flat_az_extloadi8 : flat_ld <az_extloadi8>;
def flat_sextloadi8 : flat_ld <sextloadi8>;
def flat_az_extloadi16 : flat_ld <az_extloadi16>;
def flat_sextloadi16 : flat_ld <sextloadi16>;

class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
isGlobalStore(dyn_cast<StoreSDNode>(N));
return isFlatStore(dyn_cast<MemSDNode>(N)) ||
isGlobalStore(dyn_cast<MemSDNode>(N));
}]>;

def flat_store: flat_st <store>;
def atomic_flat_store: flat_st <atomic_store>;
def flat_truncstorei8 : flat_st <truncstorei8>;
def flat_truncstorei16 : flat_st <truncstorei16>;

Expand All @@ -167,6 +169,12 @@ def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
isConstantLoad(cast<LoadSDNode>(N), -1);
}]>;

def mubuf_load_atomic : PatFrag <(ops node:$ptr), (atomic_load node:$ptr), [{
return isGlobalLoad(cast<MemSDNode>(N)) ||
isConstantLoad(cast<MemSDNode>(N), -1);
}]>;


def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
return isConstantLoad(cast<LoadSDNode>(N), -1) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
Expand Down Expand Up @@ -721,6 +729,7 @@ def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
Expand Down
48 changes: 41 additions & 7 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3102,20 +3102,35 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
// MUBUF Patterns
//===----------------------------------------------------------------------===//

multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
class MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : Pat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;

multiclass MUBUFLoad_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
ValueType vt, PatFrag atomic_ld> {
def : Pat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
>;

def : Pat <
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
>;
}

let Predicates = [isSICI] in {
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;

defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
} // End Predicates = [isSICI]

class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
Expand Down Expand Up @@ -3176,6 +3191,25 @@ defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;

multiclass MUBUFStore_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is forst
def : Pat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), vt:$val),
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
>;

def : Pat <
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
>;
}
let Predicates = [isSICI] in {
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, global_store_atomic>;
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
} // End Predicates = [isSICI]

class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
Expand Down
Loading

0 comments on commit 62aa62a

Please sign in to comment.