Skip to content

Commit

Permalink
AMDGPU: Make f16 ConstantFP legal
Browse files Browse the repository at this point in the history
Not having this legal led to combine failures, resulting
in dumb things like bitcasts of constants not being folded
away.

The only reason I'm leaving the v_mov_b32 hack that f32
already uses is to avoid madak formation test regressions.
PeepholeOptimizer has an ordering issue where the immediate
fold attempt is into the sgpr->vgpr copy instead of the actual
use. Running it twice avoids that problem.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289096 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Dec 8, 2016
1 parent 00f1f5a commit beec226
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 18 deletions.
14 changes: 1 addition & 13 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);

// F16 - Constant Actions.
setOperationAction(ISD::ConstantFP, MVT::f16, Custom);
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);

// F16 - Load/Store Actions.
setOperationAction(ISD::LOAD, MVT::f16, Promote);
Expand Down Expand Up @@ -1848,9 +1848,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
case ISD::TRAP: return lowerTRAP(Op, DAG);

case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
}
Expand Down Expand Up @@ -2055,15 +2052,6 @@ SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
DAG.getNode(ISD::FTRUNC, DL, VT, Op);
}

SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const {
if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(Op)) {
return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(),
SDLoc(Op), MVT::i32);
}

return SDValue();
}

SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f16 &&
"Do not know how to custom lower FP_ROUND for non-f16 type");
Expand Down
3 changes: 0 additions & 3 deletions lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const SDLoc &DL,
EVT VT) const;

/// \brief Custom lowering for ISD::ConstantFP.
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;

/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;

Expand Down
13 changes: 13 additions & 0 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -706,11 +706,24 @@ def : Pat <
(S_MOV_B32 imm:$imm)
>;

// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
def : Pat <
(VGPRImm<(f16 fpimm)>:$imm),
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
>;

def : Pat <
(f32 fpimm:$imm),
(S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;

def : Pat <
(f16 fpimm:$imm),
(S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
>;

def : Pat <
(i32 frameindex:$fi),
(V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
Expand Down
5 changes: 3 additions & 2 deletions test/CodeGen/AMDGPU/br_cc.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ two:

; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; SI: s_cbranch_vccz

; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; VI: s_cbranch_vccnz

; VI: one{{$}}
Expand Down Expand Up @@ -85,7 +86,7 @@ two:

; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
; VI: v_cmp_nge_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
; GCN: s_cbranch_vccnz

; GCN: one{{$}}
Expand Down

0 comments on commit beec226

Please sign in to comment.