ARM: lower fpowi appropriately for Windows ARM

This handles the last case of the builtin function calls that we would generate code which differed from Microsoft's ABI. Rather than generating a call to `__pow{d,s}i2` we now promote the parameter to a float or double and invoke `powf` or `pow` instead. Addresses PR30825! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286082 91177308-0d34-0410-b5e6-96231b3b80d8
larryv · Nov 6, 2016 · 10d3cd3 · 10d3cd3
1 parent 1340fda
commit 10d3cd3
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 0 deletions.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1017,6 +1017,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
   }
 
+  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
+    for (auto &VT : {MVT::f32, MVT::f64})
+      setOperationAction(ISD::FPOWI, VT, Custom);
+
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
@@ -7525,6 +7529,58 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
   Results.push_back(SDValue(CmpSwap, 2));
 }
 
+static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
+                          SelectionDAG &DAG) {
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
+         "Custom lowering is MSVCRT specific!");
+
+  SDLoc dl(Op);
+  SDValue Val = Op.getOperand(0);
+  MVT Ty = Val->getSimpleValueType(0);
+  SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
+  SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
+                                         TLI.getPointerTy(DAG.getDataLayout()));
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Node = Val;
+  Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
+  Entry.isZExt = true;
+  Args.push_back(Entry);
+
+  Entry.Node = Exponent;
+  Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
+  Entry.isZExt = true;
+  Args.push_back(Entry);
+
+  Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
+
+  // In the in-chain to the call is the entry node  If we are emitting a
+  // tailcall, the chain will be mutated if the node has a non-entry input
+  // chain.
+  SDValue InChain = DAG.getEntryNode();
+  SDValue TCChain = InChain;
+
+  const auto *F = DAG.getMachineFunction().getFunction();
+  bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+              F->getReturnType() == LCRTy;
+  if (IsTC)
+    InChain = TCChain;
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(InChain)
+      .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
+      .setTailCall(IsTC);
+  std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
+
+  // Return the chain (the DAG root) if it is a tail call
+  return !CI.second.getNode() ? DAG.getRoot() : CI.first;
+}
+
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
@@ -7611,6 +7667,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     llvm_unreachable("Don't know how to custom lower this!");
   case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
   case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+  case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
   case ARMISD::WIN__DBZCHK: return SDValue();
   }
 }

diff --git a/test/CodeGen/ARM/Windows/powi.ll b/test/CodeGen/ARM/Windows/powi.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple thumbv7--windows-itanium -filetype asm -o - %s | FileCheck %s
+
+declare double @llvm.powi.f64(double, i32)
+declare float @llvm.powi.f32(float, i32)
+
+define arm_aapcs_vfpcc double @d(double %d, i32 %i) {
+entry:
+  %0 = tail call double @llvm.powi.f64(double %d, i32 %i)
+  ret double %0
+}
+
+; CHECK-LABEL: d:
+; CHECK: vmov s[[REGISTER:[0-9]+]], r0
+; CHECK-NEXT: vcvt.f64.s32 d1, s[[REGISTER]]
+; CHECK-NEXT: b pow
+; CHECK-NOT: __powisf2
+
+define arm_aapcs_vfpcc float @f(float %f, i32 %i) {
+entry:
+  %0 = tail call float @llvm.powi.f32(float %f, i32 %i)
+  ret float %0
+}
+
+; CHECK-LABEL: f:
+; CHECK: vmov s[[REGISTER:[0-9]+]], r0
+; CHECK-NEXT: vcvt.f32.s32 s1, s[[REGISTER]]
+; CHECK-NEXT: b pow
+; CHECK-NOT: __powisf2
+
+define arm_aapcs_vfpcc float @g(double %d, i32 %i) {
+entry:
+  %0 = tail call double @llvm.powi.f64(double %d, i32 %i)
+  %conv = fptrunc double %0 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: g:
+; CHECK: vmov s[[REGISTER:[0-9]+]], r0
+; CHECK-NEXT: vcvt.f64.s32 d1, s[[REGISTER]]
+; CHECK-NEXT: bl pow
+; CHECK-NOT: bl __powidf2
+; CHECK-NEXT: vcvt.f32.f64 s0, d0
+
+define arm_aapcs_vfpcc double @h(float %f, i32 %i) {
+entry:
+  %0 = tail call float @llvm.powi.f32(float %f, i32 %i)
+  %conv = fpext float %0 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: h:
+; CHECK: vmov s[[REGISTER:[0-9]+]], r0
+; CHECK-NEXT: vcvt.f32.s32 s1, s[[REGISTER]]
+; CHECK-NEXT: bl powf
+; CHECK-NOT: bl __powisf2
+; CHECK-NEXT: vcvt.f64.f32 d0, s0
+