diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 47c89e8d24ad..68ac96f9b5a8 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -7353,6 +7353,42 @@ Semantics: This function returns the same values as the libm ``fabs`` functions would, and handles error conditions in the same way. +'``llvm.copysign.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.copysign`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.copysign.f32(float %Mag, float %Sgn) + declare double @llvm.copysign.f64(double %Mag, double %Sgn) + declare x86_fp80 @llvm.copysign.f80(x86_fp80 %Mag, x86_fp80 %Sgn) + declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn) + declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %Mag, ppc_fp128 %Sgn) + +Overview: +""""""""" + +The '``llvm.copysign.*``' intrinsics return a value with the magnitude of the +first operand and the sign of the second operand. + +Arguments: +"""""""""" + +The arguments and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``copysign`` +functions would, and handles error conditions in the same way. + '``llvm.floor.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 856e0a5824db..c7414e063fef 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -296,6 +296,8 @@ let Properties = [IntrReadMem] in { def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_copysign : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index ef532357c5c6..0883ab0ce7f1 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -443,6 +443,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 237a33a0cfff..b768f39e570d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bf65319295b1..fd5f97718f55 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -98,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -557,6 +558,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3b9f3581a4cd..df9293da261e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4925,6 +4925,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 55125bd23f9f..a2ea35e64a19 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -697,6 +697,11 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + + // These operations default to expand for vector types. + if (VT >= MVT::FIRST_VECTOR_VALUETYPE && + VT <= MVT::LAST_VECTOR_VALUETYPE) + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index d9f132363b15..4224ae2d273c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -253,6 +253,12 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::sin: case Intrinsic::cos: return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index e452acdafac2..a7026f68a2c2 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1767,6 +1767,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { case Intrinsic::log10: case Intrinsic::log2: case Intrinsic::fabs: + case Intrinsic::copysign: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: @@ -1831,6 +1832,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { case LibFunc::fabsf: case LibFunc::fabsl: return Intrinsic::fabs; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + return Intrinsic::copysign; case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index 566dcc75cd81..216b937ad6d0 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -468,6 +468,59 @@ for.end: ; preds = %for.body, %entry declare double @llvm.fabs(double) nounwind readnone +;CHECK-LABEL: @copysign_f32( +;CHECK: llvm.copysign.v4f32 +;CHECK: ret void +define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx1, align 4 + %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone + %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.copysign.f32(float, float) nounwind readnone + +define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv + %1 = load double* %arrayidx, align 8 + %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone + %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx2, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.copysign(double, double) nounwind readnone + ;CHECK-LABEL: @floor_f32( ;CHECK: llvm.floor.v4f32 ;CHECK: ret void