Skip to content

Commit

Permalink
Transform div to mul with reciprocal only when fp imm is legal.
Browse files Browse the repository at this point in the history
This fixes PR12516 and uncovers one weird problem in legalize (workarounded)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154394 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
asl committed Apr 10, 2012
1 parent bce0de4 commit 999821c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
11 changes: 9 additions & 2 deletions lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5769,8 +5769,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
APFloat N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
// Only do the transform if the reciprocal is not too horrible (eg not NaN).
if (st == APFloat::opOK || st == APFloat::opInexact)
// Only do the transform if the reciprocal is not too horrible (eg not NaN)
// and the reciprocal is a legal fp imm.
if ((st == APFloat::opOK || st == APFloat::opInexact) &&
(!LegalOperations ||
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
DAG.getConstantFP(Recip, VT));
}
Expand Down
31 changes: 31 additions & 0 deletions test/CodeGen/ARM/2012-04-10-DAGCombine.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
;target triple = "armv7-none-linux-gnueabi"

define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
bb4:
%tmp = extractelement <2 x float> undef, i32 0
br i1 undef, label %bb18, label %bb5

bb5: ; preds = %bb4
%tmp6 = fadd float %tmp, -1.500000e+01
%tmp7 = fdiv float %tmp6, 2.000000e+01
%tmp8 = fadd float %tmp7, 1.000000e+00
%tmp9 = fdiv float 1.000000e+00, %tmp8
%tmp10 = fsub float 1.000000e+00, %tmp9
%tmp11 = fmul float %tmp10, 1.000000e+01
%tmp12 = fadd float %tmp11, 1.500000e+01
%tmp13 = fdiv float %tmp12, %tmp
%tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
%tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
%tmp16 = fmul <4 x float> zeroinitializer, %tmp15
%tmp17 = fadd <4 x float> %tmp16, %arg
store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
br label %bb18

bb18: ; preds = %bb5, %bb4
ret void
}

!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}

0 comments on commit 999821c

Please sign in to comment.