Skip to content

Commit

Permalink
CodeGen: extend f16 conversions to permit types > float.
Browse files Browse the repository at this point in the history
This makes the two intrinsics @llvm.convert.from.f16 and
@llvm.convert.to.f16 accept types other than simple "float". This is
only strictly needed for the truncate operation, since otherwise
double rounding occurs and there's no way to represent the strict IEEE
conversion. However, for symmetry we allow larger types in the extend
too.

During legalization, we can expand an "fp16_to_double" operation into
two extends for convenience, but abort when the truncate isn't legal. A new
libcall is probably needed here.

Even after this commit, various target tweaks are needed to actually use the
extended intrinsics. I've put these into separate commits for clarity, so there
are no actual tests of f64 conversion here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213248 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
TNorthover committed Jul 17, 2014
1 parent f33a30c commit 3e61ccd
Show file tree
Hide file tree
Showing 23 changed files with 140 additions and 115 deletions.
20 changes: 10 additions & 10 deletions docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8653,14 +8653,14 @@ Syntax:

::

declare i16 @llvm.convert.to.fp16(float %a)
declare i16 @llvm.convert.to.fp16.f32(float %a)
declare i16 @llvm.convert.to.fp16.f64(double %a)

Overview:
"""""""""

The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
from single precision floating point format to half precision floating
point format.
The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
conventional floating point type to half precision floating point format.

Arguments:
""""""""""
Expand All @@ -8671,17 +8671,16 @@ converted.
Semantics:
""""""""""

The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
from single precision floating point format to half precision floating
point format. The return value is an ``i16`` which contains the
converted number.
The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
conventional floating point format to half precision floating point format. The
return value is an ``i16`` which contains the converted number.

Examples:
"""""""""

.. code-block:: llvm
%res = call i16 @llvm.convert.to.fp16(float %a)
%res = call i16 @llvm.convert.to.fp16.f32(float %a)
store i16 %res, i16* @x, align 2
.. _int_convert_from_fp16:
Expand All @@ -8694,7 +8693,8 @@ Syntax:

::

declare float @llvm.convert.from.fp16(i16 %a)
declare float @llvm.convert.from.fp16.f32(i16 %a)
declare double @llvm.convert.from.fp16.f64(i16 %a)

Overview:
"""""""""
Expand Down
10 changes: 5 additions & 5 deletions include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,11 +472,11 @@ namespace ISD {
/// 5) ISD::CvtCode indicating the type of conversion to do
CONVERT_RNDSAT,

/// FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
/// promotions and truncation for half-precision (16 bit) floating
/// numbers. We need special nodes since FP16 is a storage-only type with
/// special semantics of operations.
FP16_TO_FP32, FP32_TO_FP16,
/// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions
/// and truncation for half-precision (16 bit) floating numbers. These nodes
/// form a semi-softened interface for dealing with f16 (as an i16), which
/// is often a storage-only type but has native conversions.
FP16_TO_FP, FP_TO_FP16,

/// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
/// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
Expand Down
6 changes: 2 additions & 4 deletions include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -496,10 +496,8 @@ def int_donothing : Intrinsic<[], [], [IntrNoMem]>;

// Intrisics to support half precision floating point format
let Properties = [IntrNoMem] in {
def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
GCCBuiltin<"__gnu_f2h_ieee">;
def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
GCCBuiltin<"__gnu_h2f_ieee">;
def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
}

// These convert intrinsics are to support various conversions between
Expand Down
4 changes: 2 additions & 2 deletions include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,8 @@ def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>;
def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
def f16_to_f32 : SDNode<"ISD::FP16_TO_FP32", SDTIntToFPOp>;
def f32_to_f16 : SDNode<"ISD::FP32_TO_FP16", SDTFPToIntOp>;
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;

def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
Expand Down
23 changes: 20 additions & 3 deletions lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
case ISD::FP_TO_FP16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
Expand Down Expand Up @@ -3513,10 +3514,26 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
case ISD::FP16_TO_FP32:
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
case ISD::FP16_TO_FP: {
if (Node->getValueType(0) == MVT::f32) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
break;
}

// We can extend to types bigger than f32 in two steps without changing the
// result. Since "f16 -> f32" is much more commonly available, give CodeGen
// the option of emitting that before resorting to a libcall.
SDValue Res =
DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
Results.push_back(
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
case ISD::FP32_TO_FP16:
}
case ISD::FP_TO_FP16:
// Can't use two-step truncation here because the rounding may be
// significant.
assert(Node->getOperand(0).getValueType() == MVT::f32 &&
"Don't know libcall for FPROUND_F64_F16");
Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
break;
case ISD::ConstantFP: {
Expand Down
23 changes: 16 additions & 7 deletions lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
Expand Down Expand Up @@ -380,11 +380,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {

// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
// nodes?
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
SDLoc(N)).first;
SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
false, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
}

SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
Expand Down Expand Up @@ -628,7 +635,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
case ISD::FP_TO_FP16: Res = SoftenFloatOp_FP_TO_FP16(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
Expand Down Expand Up @@ -704,7 +711,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}

SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_FP16(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::f32 &&
"Cannot soften in one step");
EVT RVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
SDValue Op = GetSoftenedFloat(N->getOperand(0));
Expand Down
6 changes: 3 additions & 3 deletions lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;

case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;

case ISD::AND:
case ISD::OR:
Expand Down Expand Up @@ -401,7 +401,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
DAG.getValueType(N->getValueType(0).getScalarType()));
}

SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);

Expand Down Expand Up @@ -826,7 +826,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP32:
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;

Expand Down
6 changes: 3 additions & 3 deletions lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
Expand Down Expand Up @@ -403,7 +403,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
Expand All @@ -428,7 +428,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
SDValue SoftenFloatOp_FP_TO_FP16(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
Expand Down
7 changes: 4 additions & 3 deletions lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5155,12 +5155,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
MVT::i16, getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
MVT::f32, getValue(I.getArgOperand(0))));
setValue(&I,
DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
Expand Down
4 changes: 2 additions & 2 deletions lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";

case ISD::CONVERT_RNDSAT: {
switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
Expand Down
36 changes: 18 additions & 18 deletions lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2244,46 +2244,46 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;

defm FCVT : FPConversion<"fcvt">;

def : Pat<(f32_to_f16 FPR32:$Rn),
def : Pat<(fp_to_f16 FPR32:$Rn),
(i32 (COPY_TO_REGCLASS
(f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
GPR32))>;

def : Pat<(f32 (f16_to_f32 i32:$Rn)),
def : Pat<(f32 (f16_to_fp i32:$Rn)),
(FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
hsub))>;

// When converting from f16 coming directly from a load, make sure we
// load into the FPR16 registers rather than going through the GPRs.
// f16->f32
def : Pat<(f32 (f16_to_f32 (i32
def : Pat<(f32 (f16_to_fp (i32
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))))),
(FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
def : Pat<(f32 (f16_to_f32 (i32
def : Pat<(f32 (f16_to_fp (i32
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))))),
(FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
def : Pat <(f32 (f16_to_f32 (i32
def : Pat <(f32 (f16_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
def : Pat <(f32 (f16_to_f32 (i32
def : Pat <(f32 (f16_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
(FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;

// f16->f64
def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))))))),
(FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))))))),
(FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
(FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
(FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;

Expand All @@ -2292,37 +2292,37 @@ def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
// registers rather than going through the GPRs.
let AddedComplexity = 10 in {
// f32->f16
def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)>;
def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
(STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)>;
def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
// f64->f16
def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)>;
def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
(STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)>;
def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
}
Expand Down
4 changes: 2 additions & 2 deletions lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// Special handling for half-precision FP.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
}

Expand Down
4 changes: 2 additions & 2 deletions lib/Target/ARM/ARMInstrVFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,10 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;

def : Pat<(f32_to_f16 SPR:$a),
def : Pat<(fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;

def : Pat<(f16_to_f32 GPR:$a),
def : Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;

def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
Expand Down
Loading

0 comments on commit 3e61ccd

Please sign in to comment.