From 999821cddfeb8fd5115261c539c951f8733c943a Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Tue, 10 Apr 2012 13:22:49 +0000 Subject: [PATCH] Transform div to mul with reciprocal only when fp imm is legal. This fixes PR12516 and uncovers one weird problem in legalize (workarounded) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 ++++++-- test/CodeGen/ARM/2012-04-10-DAGCombine.ll | 31 +++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/ARM/2012-04-10-DAGCombine.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cf7ce587f362..b5b20284c1d3 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5769,8 +5769,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is not too horrible (eg not NaN). - if (st == APFloat::opOK || st == APFloat::opInexact) + // Only do the transform if the reciprocal is not too horrible (eg not NaN) + // and the reciprocal is a legal fp imm. + if ((st == APFloat::opOK || st == APFloat::opInexact) && + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, DAG.getConstantFP(Recip, VT)); } diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll new file mode 100644 index 000000000000..6f50f279b5de --- /dev/null +++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math +;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +;target triple = "armv7-none-linux-gnueabi" + +define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 { +bb4: + %tmp = extractelement <2 x float> undef, i32 0 + br i1 undef, label %bb18, label %bb5 + +bb5: ; preds = %bb4 + %tmp6 = fadd float %tmp, -1.500000e+01 + %tmp7 = fdiv float %tmp6, 2.000000e+01 + %tmp8 = fadd float %tmp7, 1.000000e+00 + %tmp9 = fdiv float 1.000000e+00, %tmp8 + %tmp10 = fsub float 1.000000e+00, %tmp9 + %tmp11 = fmul float %tmp10, 1.000000e+01 + %tmp12 = fadd float %tmp11, 1.500000e+01 + %tmp13 = fdiv float %tmp12, %tmp + %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0 + %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer + %tmp16 = fmul <4 x float> zeroinitializer, %tmp15 + %tmp17 = fadd <4 x float> %tmp16, %arg + store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0 + br label %bb18 + +bb18: ; preds = %bb5, %bb4 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null}