Skip to content

Commit

Permalink
Revert "AMDGPU: Add VI i16 support"
Browse files Browse the repository at this point in the history
This reverts commit r285939 and r285948.  These broke some conformance tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285995 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
tstellarAMD committed Nov 4, 2016
1 parent 1228c39 commit ae152bd
Show file tree
Hide file tree
Showing 42 changed files with 341 additions and 1,490 deletions.
2 changes: 0 additions & 2 deletions lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,6 @@ def isCIVI : Predicate <

def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;

def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;

class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
Expand Down
27 changes: 3 additions & 24 deletions lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,32 +586,19 @@ bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const

bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.

unsigned SrcSize = Source.getSizeInBits();
unsigned DestSize = Dest.getSizeInBits();

return DestSize < SrcSize && DestSize % 32 == 0 ;
return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
}

bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
// Truncate is just accessing a subregister.

unsigned SrcSize = Source->getScalarSizeInBits();
unsigned DestSize = Dest->getScalarSizeInBits();

if (DestSize== 16 && Subtarget->has16BitInsts())
return SrcSize >= 32;

return DestSize < SrcSize && DestSize % 32 == 0;
return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
(Dest->getPrimitiveSizeInBits() % 32 == 0);
}

bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
unsigned SrcSize = Src->getScalarSizeInBits();
unsigned DestSize = Dest->getScalarSizeInBits();

if (SrcSize == 16 && Subtarget->has16BitInsts())
return DestSize >= 32;

return SrcSize == 32 && DestSize == 64;
}

Expand All @@ -620,10 +607,6 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
// practical purposes, the extra mov 0 to load a 64-bit is free. As used,
// this will enable reducing 64-bit operations the 32-bit, which is always
// good.

if (Src == MVT::i16)
return Dest == MVT::i32 ||Dest == MVT::i64 ;

return Src == MVT::i32 && Dest == MVT::i64;
}

Expand Down Expand Up @@ -2463,10 +2446,6 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
if (VT.isVector() || Size > 64)
return SDValue();

// There are i16 integer mul/mad.
if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
return SDValue();

SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);

Expand Down
6 changes: 3 additions & 3 deletions lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -529,14 +529,14 @@ multiclass BFIPatterns <Instruction BFI_INT,

def : Pat <
(fcopysign f32:$src0, f32:$src1),
(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
(BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
>;

def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
(BFI_INT (LoadImm32 0x7fffffff),
(i32 (EXTRACT_SUBREG $src0, sub1)),
(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
>;
Expand All @@ -545,7 +545,7 @@ multiclass BFIPatterns <Instruction BFI_INT,
(f64 (fcopysign f64:$src0, f32:$src1)),
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
(BFI_INT (LoadImm32 0x7fffffff),
(i32 (EXTRACT_SUBREG $src0, sub1)),
$src1), sub1)
>;
Expand Down
51 changes: 7 additions & 44 deletions lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -708,13 +708,13 @@ let Predicates = [isGCN] in {
// int_SI_vs_load_input
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, (i32 0), imm:$attr_offset, 0, 0, 0)
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;

// Offset in an 32-bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0)
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
>;


Expand Down Expand Up @@ -914,7 +914,7 @@ def : Pat<
>;


class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
class MUBUFLoad_Pattern <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : Pat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
Expand All @@ -936,34 +936,15 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
}

let Predicates = [isSICI] in {
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
def : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;

defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
} // End Predicates = [isSICI]

multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag ld> {

def : Pat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}

let Predicates = [Has16BitInsts] in {

defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;

} // End Predicates = [Has16BitInsts]

class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
Expand All @@ -972,8 +953,6 @@ class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <

def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
Expand Down Expand Up @@ -1046,20 +1025,6 @@ defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
} // End Predicates = [isSICI]


multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag st> {

def : Pat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}

defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>;

class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
Expand All @@ -1068,8 +1033,6 @@ class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat

def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
Expand Down
10 changes: 2 additions & 8 deletions lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -489,12 +489,8 @@ class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <

def : DSReadPat <DS_READ_I8, i32, si_sextload_local_i8>;
def : DSReadPat <DS_READ_U8, i32, si_az_extload_local_i8>;
def : DSReadPat <DS_READ_I8, i16, si_sextload_local_i8>;
def : DSReadPat <DS_READ_U8, i16, si_az_extload_local_i8>;
def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
def : DSReadPat <DS_READ_U16, i16, si_load_local>;
def : DSReadPat <DS_READ_B32, i32, si_load_local>;

let AddedComplexity = 100 in {
Expand All @@ -516,8 +512,6 @@ class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <

def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
def : DSWritePat <DS_WRITE_B8, i16, si_truncstore_local_i8>;
def : DSWritePat <DS_WRITE_B16, i16, si_store_local>;
def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;

let AddedComplexity = 100 in {
Expand All @@ -528,8 +522,8 @@ def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
def : Pat <
(si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
(DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
(i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
(DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
(i1 0))
>;

Expand Down
6 changes: 0 additions & 6 deletions lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,6 @@ let Predicates = [isCIVI] in {

def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
Expand Down Expand Up @@ -391,10 +389,6 @@ def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;

} // End Predicates = [isCIVI]

let Predicates = [isVI] in {
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei8, i16>;
def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
}


//===----------------------------------------------------------------------===//
Expand Down
76 changes: 4 additions & 72 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);

if (Subtarget->has16BitInsts())
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);

computeRegisterProperties(STI.getRegisterInfo());

// We need to custom lower vector stores from local memory
Expand Down Expand Up @@ -224,55 +221,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);

if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::Constant, MVT::i16, Legal);

setOperationAction(ISD::SMIN, MVT::i16, Legal);
setOperationAction(ISD::SMAX, MVT::i16, Legal);

setOperationAction(ISD::UMIN, MVT::i16, Legal);
setOperationAction(ISD::UMAX, MVT::i16, Legal);

setOperationAction(ISD::SETCC, MVT::i16, Promote);
AddPromotedToType(ISD::SETCC, MVT::i16, MVT::i32);

setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);

setOperationAction(ISD::ROTR, MVT::i16, Promote);
setOperationAction(ISD::ROTL, MVT::i16, Promote);

setOperationAction(ISD::SDIV, MVT::i16, Promote);
setOperationAction(ISD::UDIV, MVT::i16, Promote);
setOperationAction(ISD::SREM, MVT::i16, Promote);
setOperationAction(ISD::UREM, MVT::i16, Promote);

setOperationAction(ISD::BSWAP, MVT::i16, Promote);
setOperationAction(ISD::BITREVERSE, MVT::i16, Promote);

setOperationAction(ISD::CTTZ, MVT::i16, Promote);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);

setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);

setOperationAction(ISD::BR_CC, MVT::i16, Expand);

setOperationAction(ISD::LOAD, MVT::i16, Custom);

setTruncStoreAction(MVT::i64, MVT::i16, Expand);

setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
AddPromotedToType(ISD::UINT_TO_FP, MVT::i16, MVT::i32);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
AddPromotedToType(ISD::SINT_TO_FP, MVT::i16, MVT::i32);
setOperationAction(ISD::FP16_TO_FP, MVT::i16, Promote);
AddPromotedToType(ISD::FP16_TO_FP, MVT::i16, MVT::i32);
setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);
}

setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
Expand Down Expand Up @@ -2610,17 +2558,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = Load->getMemoryVT();

if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");
// FIXME: Copied from PPC
// First, load into 32 bits, then truncate to 1 bit.

SDValue Chain = Load->getChain();
SDValue BasePtr = Load->getBasePtr();
MachineMemOperand *MMO = Load->getMemOperand();

EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;

SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, RealMemVT, MMO);
BasePtr, MVT::i8, MMO);

SDValue Ops[] = {
DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
Expand Down Expand Up @@ -3434,23 +3381,8 @@ static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
}

EVT VT = K0->getValueType(0);

MVT NVT = MVT::i32;
unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

SDValue Tmp1, Tmp2, Tmp3;
Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);

if (VT == MVT::i16) {
Tmp1 = DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, NVT,
Tmp1, Tmp2, Tmp3);

return DAG.getNode(ISD::TRUNCATE, SL, VT, Tmp1);
} else
return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
}

static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
Expand Down
1 change: 1 addition & 0 deletions lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ def getAtomicNoRetOp : InstrMapping {

include "SIInstructions.td"
include "CIInstructions.td"
include "VIInstructions.td"

include "DSInstructions.td"
include "MIMGInstructions.td"
Loading

0 comments on commit ae152bd

Please sign in to comment.