diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 91b9d40f8ef7..c6f5906d661d 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -102,6 +102,8 @@ class X86TTI final : public ImmutablePass, public TargetTransformInfo { unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const override; + unsigned getIntImmCost(int64_t) const; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -808,6 +810,19 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise); } +/// \brief Calculate the cost of materializing a 64-bit value. This helper +/// method might only calculate a fraction of a larger immediate. Therefore it +/// is valid to return a cost of ZERO. +unsigned X86TTI::getIntImmCost(int64_t Val) const { + if (Val == 0) + return TCC_Free; + + if (isInt<32>(Val)) + return TCC_Basic; + + return 2 * TCC_Basic; +} + unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -825,11 +840,21 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { if (Imm == 0) return TCC_Free; - if (Imm.getBitWidth() <= 64 && - (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue()))) - return TCC_Basic; - else - return 2 * TCC_Basic; + // Sign-extend all constants to a multiple of 64-bit. + APInt ImmVal = Imm; + if (BitSize & 0x3f) + ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); + + // Split the constant into 64-bit chunks and calculate the cost for each + // chunk. + unsigned Cost = 0; + for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { + APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); + int64_t Val = Tmp.getSExtValue(); + Cost += getIntImmCost(Val); + } + // We need at least one instruction to materialze the constant. + return std::max(1U, Cost); } unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -889,9 +914,11 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, break; } - if ((Idx == ImmIdx) && - Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) - return TCC_Free; + if (Idx == ImmIdx) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = X86TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost; + } return X86TTI::getIntImmCost(Imm, Ty); } diff --git a/test/Transforms/ConstantHoisting/X86/large-immediate.ll b/test/Transforms/ConstantHoisting/X86/large-immediate.ll index e0af9c9be572..b8c04f38b12f 100644 --- a/test/Transforms/ConstantHoisting/X86/large-immediate.ll +++ b/test/Transforms/ConstantHoisting/X86/large-immediate.ll @@ -25,3 +25,12 @@ define i196 @test3(i196 %a) nounwind { %2 = mul i196 %1, 2 ret i196 %2 } + +; Check that we don't hoist immediates with small values. +define i96 @test4(i96 %a) nounwind { +; CHECK-LABEL: test4 +; CHECK-NOT: %const = bitcast i96 2 to i96 + %1 = mul i96 %a, 2 + %2 = add i96 %1, 2 + ret i96 %2 +} diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll index cef022ebc0bd..9df44177820e 100644 --- a/test/Transforms/ConstantHoisting/X86/stackmap.ll +++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll @@ -6,11 +6,11 @@ target triple = "x86_64-apple-macosx10.9.0" ; Test if the 3rd argument of a stackmap is hoisted. define i128 @test1(i128 %a) { ; CHECK-LABEL: @test1 -; CHECK: %const = bitcast i128 13464618275673403322 to i128 +; CHECK: %const = bitcast i128 134646182756734033220 to i128 ; CHECK: tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const) entry: - %0 = add i128 %a, 13464618275673403322 - tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 13464618275673403322) + %0 = add i128 %a, 134646182756734033220 + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220) ret i128 %0 }