From 611d8de3bc5e6223b25aca1feaa28c1c2d0ca866 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 12 Dec 2016 21:59:30 +0000 Subject: [PATCH] [APFloat] Implement PPCDoubleDouble add and subtract. Summary: I looked at libgcc's implementation (which is based on the paper, Software for Doubled-Precision Floating-Point Computations", by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.) and made it generic to arbitrary IEEE floats. Differential Revision: https://reviews.llvm.org/D26817 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289472 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/APFloat.h | 71 +++++++++++- lib/Support/APFloat.cpp | 210 +++++++++++++++++++++++++++++++++- unittests/ADT/APFloatTest.cpp | 135 +++++++++++++++++++--- 3 files changed, 393 insertions(+), 23 deletions(-) diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 5f9c83d98690..98d4cc20560a 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -27,6 +27,7 @@ struct fltSemantics; class APSInt; class StringRef; class APFloat; +class raw_ostream; template class SmallVectorImpl; @@ -479,6 +480,8 @@ class IEEEFloat final : public APFloatBase { /// @} + cmpResult compareAbsoluteValue(const IEEEFloat &) const; + private: /// \name Simple Queries /// @{ @@ -527,7 +530,6 @@ class IEEEFloat final : public APFloatBase { bool convertFromStringSpecials(StringRef str); opStatus normalize(roundingMode, lostFraction); opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract); - cmpResult compareAbsoluteValue(const IEEEFloat &) const; opStatus handleOverflow(roundingMode); bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const; opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool, @@ -600,6 +602,12 @@ class DoubleAPFloat final : public APFloatBase { const fltSemantics *Semantics; std::unique_ptr Floats; + opStatus addImpl(const APFloat &a, const APFloat &aa, const APFloat &c, + const APFloat &cc, roundingMode RM); + + opStatus addWithSpecial(const DoubleAPFloat &LHS, const DoubleAPFloat &RHS, + DoubleAPFloat &Out, roundingMode RM); + public: DoubleAPFloat(const fltSemantics &S); DoubleAPFloat(const fltSemantics &S, uninitializedTag); @@ -623,6 +631,19 @@ class DoubleAPFloat final : public APFloatBase { APFloat &getFirst() { return Floats[0]; } const APFloat &getFirst() const { return Floats[0]; } + APFloat &getSecond() { return Floats[1]; } + const APFloat &getSecond() const { return Floats[1]; } + + opStatus add(const DoubleAPFloat &RHS, roundingMode RM); + opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM); + void changeSign(); + cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const; + + fltCategory getCategory() const; + bool isNegative() const; + + void makeInf(bool Neg); + void makeNaN(bool SNaN, bool Neg, const APInt *fill); }; } // End detail namespace @@ -747,7 +768,15 @@ class APFloat : public APFloatBase { void makeZero(bool Neg) { getIEEE().makeZero(Neg); } - void makeInf(bool Neg) { getIEEE().makeInf(Neg); } + void makeInf(bool Neg) { + if (usesLayout(*U.semantics)) { + return U.IEEE.makeInf(Neg); + } else if (usesLayout(*U.semantics)) { + return U.Double.makeInf(Neg); + } else { + llvm_unreachable("Unexpected semantics"); + } + } void makeNaN(bool SNaN, bool Neg, const APInt *fill) { getIEEE().makeNaN(SNaN, Neg, fill); @@ -772,6 +801,17 @@ class APFloat : public APFloatBase { explicit APFloat(DoubleAPFloat F, const fltSemantics &S) : U(std::move(F), S) {} + cmpResult compareAbsoluteValue(const APFloat &RHS) const { + assert(&getSemantics() == &RHS.getSemantics()); + if (usesLayout(getSemantics())) { + return U.IEEE.compareAbsoluteValue(RHS.U.IEEE); + } else if (usesLayout(getSemantics())) { + return U.Double.compareAbsoluteValue(RHS.U.Double); + } else { + llvm_unreachable("Unexpected semantics"); + } + } + public: APFloat(const fltSemantics &Semantics) : U(Semantics) {} APFloat(const fltSemantics &Semantics, StringRef S); @@ -885,10 +925,22 @@ class APFloat : public APFloatBase { void Profile(FoldingSetNodeID &NID) const { getIEEE().Profile(NID); } opStatus add(const APFloat &RHS, roundingMode RM) { - return getIEEE().add(RHS.getIEEE(), RM); + if (usesLayout(getSemantics())) { + return U.IEEE.add(RHS.U.IEEE, RM); + } else if (usesLayout(getSemantics())) { + return U.Double.add(RHS.U.Double, RM); + } else { + llvm_unreachable("Unexpected semantics"); + } } opStatus subtract(const APFloat &RHS, roundingMode RM) { - return getIEEE().subtract(RHS.getIEEE(), RM); + if (usesLayout(getSemantics())) { + return U.IEEE.subtract(RHS.U.IEEE, RM); + } else if (usesLayout(getSemantics())) { + return U.Double.subtract(RHS.U.Double, RM); + } else { + llvm_unreachable("Unexpected semantics"); + } } opStatus multiply(const APFloat &RHS, roundingMode RM) { return getIEEE().multiply(RHS.getIEEE(), RM); @@ -1011,14 +1063,25 @@ class APFloat : public APFloatBase { return getIEEE().toString(Str, FormatPrecision, FormatMaxPadding); } + void print(raw_ostream &) const; + void dump() const; + bool getExactInverse(APFloat *inv) const { return getIEEE().getExactInverse(inv ? &inv->getIEEE() : nullptr); } + // This is for internal test only. + // TODO: Remove it after the PPCDoubleDouble transition. + const APFloat &getSecondFloat() const { + assert(&getSemantics() == &PPCDoubleDouble); + return U.Double.getSecond(); + } + friend hash_code hash_value(const APFloat &Arg); friend int ilogb(const APFloat &Arg) { return ilogb(Arg.getIEEE()); } friend APFloat scalbn(APFloat X, int Exp, roundingMode RM); friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM); + friend IEEEFloat; friend DoubleAPFloat; }; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 20fd564a23af..3549aa303fdd 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -19,8 +19,10 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -3847,8 +3849,9 @@ DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) } DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) - : Semantics(&S), Floats(new APFloat[2]{APFloat(PPCDoubleDoubleImpl, I), - APFloat(IEEEdouble)}) { + : Semantics(&S), Floats(new APFloat[2]{ + APFloat(PPCDoubleDoubleImpl, I), + APFloat(IEEEdouble, APInt(64, I.getRawData()[1]))}) { assert(Semantics == &PPCDoubleDouble); } @@ -3858,7 +3861,8 @@ DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, Floats(new APFloat[2]{std::move(First), std::move(Second)}) { assert(Semantics == &PPCDoubleDouble); // TODO Check for First == &IEEEdouble once the transition is done. - assert(&Floats[0].getSemantics() == &PPCDoubleDoubleImpl); + assert(&Floats[0].getSemantics() == &PPCDoubleDoubleImpl || + &Floats[0].getSemantics() == &IEEEdouble); assert(&Floats[1].getSemantics() == &IEEEdouble); } @@ -3887,6 +3891,198 @@ DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { return *this; } +// "Software for Doubled-Precision Floating-Point Computations", +// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. +APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, + const APFloat &c, const APFloat &cc, + roundingMode RM) { + int Status = opOK; + APFloat z = a; + Status |= z.add(c, RM); + if (!z.isFinite()) { + if (!z.isInfinity()) { + Floats[0] = std::move(z); + Floats[1].makeZero(false); + return (opStatus)Status; + } + Status = opOK; + auto AComparedToC = a.compareAbsoluteValue(c); + z = cc; + Status |= z.add(aa, RM); + if (AComparedToC == APFloat::cmpGreaterThan) { + // z = cc + aa + c + a; + Status |= z.add(c, RM); + Status |= z.add(a, RM); + } else { + // z = cc + aa + a + c; + Status |= z.add(a, RM); + Status |= z.add(c, RM); + } + if (!z.isFinite()) { + Floats[0] = std::move(z); + Floats[1].makeZero(false); + return (opStatus)Status; + } + Floats[0] = z; + APFloat zz = aa; + Status |= zz.add(cc, RM); + if (AComparedToC == APFloat::cmpGreaterThan) { + // Floats[1] = a - z + c + zz; + Floats[1] = a; + Status |= Floats[1].subtract(z, RM); + Status |= Floats[1].add(c, RM); + Status |= Floats[1].add(zz, RM); + } else { + // Floats[1] = c - z + a + zz; + Floats[1] = c; + Status |= Floats[1].subtract(z, RM); + Status |= Floats[1].add(a, RM); + Status |= Floats[1].add(zz, RM); + } + } else { + // q = a - z; + APFloat q = a; + Status |= q.subtract(z, RM); + + // zz = q + c + (a - (q + z)) + aa + cc; + // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. + auto zz = q; + Status |= zz.add(c, RM); + Status |= q.add(z, RM); + Status |= q.subtract(a, RM); + q.changeSign(); + Status |= zz.add(q, RM); + Status |= zz.add(aa, RM); + Status |= zz.add(cc, RM); + if (zz.isZero() && !zz.isNegative()) { + Floats[0] = std::move(z); + Floats[1].makeZero(false); + return opOK; + } + Floats[0] = z; + Status |= Floats[0].add(zz, RM); + if (!Floats[0].isFinite()) { + Floats[1].makeZero(false); + return (opStatus)Status; + } + Floats[1] = std::move(z); + Status |= Floats[1].subtract(Floats[0], RM); + Status |= Floats[1].add(zz, RM); + } + return (opStatus)Status; +} + +APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, + const DoubleAPFloat &RHS, + DoubleAPFloat &Out, + roundingMode RM) { + if (LHS.getCategory() == fcNaN) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcNaN) { + Out = RHS; + return opOK; + } + if (LHS.getCategory() == fcZero) { + Out = RHS; + return opOK; + } + if (RHS.getCategory() == fcZero) { + Out = LHS; + return opOK; + } + if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && + LHS.isNegative() != RHS.isNegative()) { + Out.makeNaN(false, Out.isNegative(), nullptr); + return opInvalidOp; + } + if (LHS.getCategory() == fcInfinity) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcInfinity) { + Out = RHS; + return opOK; + } + assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); + + // These conversions will go away once PPCDoubleDoubleImpl goes away. + // (PPCDoubleDoubleImpl, IEEEDouble) -> (IEEEDouble, IEEEDouble) + APFloat A(IEEEdouble, + APInt(64, LHS.Floats[0].bitcastToAPInt().getRawData()[0])), + AA(LHS.Floats[1]), + C(IEEEdouble, APInt(64, RHS.Floats[0].bitcastToAPInt().getRawData()[0])), + CC(RHS.Floats[1]); + assert(&AA.getSemantics() == &IEEEdouble); + assert(&CC.getSemantics() == &IEEEdouble); + Out.Floats[0] = APFloat(IEEEdouble); + assert(&Out.Floats[1].getSemantics() == &IEEEdouble); + + auto Ret = Out.addImpl(A, AA, C, CC, RM); + + // (IEEEDouble, IEEEDouble) -> (PPCDoubleDoubleImpl, IEEEDouble) + uint64_t Buffer[] = {Out.Floats[0].bitcastToAPInt().getRawData()[0], + Out.Floats[1].bitcastToAPInt().getRawData()[0]}; + Out.Floats[0] = APFloat(PPCDoubleDoubleImpl, APInt(128, 2, Buffer)); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, + roundingMode RM) { + return addWithSpecial(*this, RHS, *this, RM); +} + +APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, + roundingMode RM) { + changeSign(); + auto Ret = add(RHS, RM); + changeSign(); + return Ret; +} + +void DoubleAPFloat::changeSign() { + Floats[0].changeSign(); + Floats[1].changeSign(); +} + +APFloat::cmpResult +DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { + auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); + if (Result != cmpEqual) + return Result; + Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); + if (Result == cmpLessThan || Result == cmpGreaterThan) { + auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); + auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); + if (Against && !RHSAgainst) + return cmpLessThan; + if (!Against && RHSAgainst) + return cmpGreaterThan; + if (!Against && !RHSAgainst) + return Result; + if (Against && RHSAgainst) + return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); + } + return Result; +} + +APFloat::fltCategory DoubleAPFloat::getCategory() const { + return Floats[0].getCategory(); +} + +bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } + +void DoubleAPFloat::makeInf(bool Neg) { + Floats[0].makeInf(Neg); + Floats[1].makeZero(false); +} + +void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { + Floats[0].makeNaN(SNaN, Neg, fill); + Floats[1].makeZero(false); +} + } // End detail namespace APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { @@ -3959,4 +4155,12 @@ APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { } } +void APFloat::print(raw_ostream &OS) const { + SmallVector Buffer; + toString(Buffer); + OS << Buffer << "\n"; +} + +void APFloat::dump() const { print(dbgs()); } + } // End llvm namespace diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp index eea3222c047e..6da57ce526ea 100644 --- a/unittests/ADT/APFloatTest.cpp +++ b/unittests/ADT/APFloatTest.cpp @@ -1512,22 +1512,6 @@ TEST(APFloatTest, PPCDoubleDouble) { EXPECT_EQ(0x0360000000000000ull, test.bitcastToAPInt().getRawData()[0]); EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); - test = APFloat(APFloat::PPCDoubleDouble, "1.0"); - test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-105"), APFloat::rmNearestTiesToEven); - EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); - EXPECT_EQ(0x3960000000000000ull, test.bitcastToAPInt().getRawData()[1]); - - test = APFloat(APFloat::PPCDoubleDouble, "1.0"); - test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-106"), APFloat::rmNearestTiesToEven); - EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); -#if 0 // XFAIL - // This is what we would expect with a true double-double implementation - EXPECT_EQ(0x3950000000000000ull, test.bitcastToAPInt().getRawData()[1]); -#else - // This is what we get with our 106-bit mantissa approximation - EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); -#endif - // PR30869 { auto Result = APFloat(APFloat::PPCDoubleDouble, "1.0") + @@ -3186,4 +3170,123 @@ TEST(APFloatTest, frexp) { EXPECT_EQ(52, Exp); EXPECT_TRUE(APFloat(APFloat::IEEEdouble, "0x1.c60f120d9f87cp-1").bitwiseIsEqual(Frac)); } + +TEST(APFloatTest, PPCDoubleDoubleAddSpecial) { + using DataType = std::tuple; + DataType Data[] = { + // (1 + 0) + (-1 + 0) = fcZero + {0x3ff0000000000000ull, 0, 0xbff0000000000000ull, 0, APFloat::fcZero, + APFloat::rmNearestTiesToEven}, + // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity + {0x7fefffffffffffffull, 0x7c8ffffffffffffeull, 0x7948000000000000ull, + 0ull, APFloat::fcInfinity, APFloat::rmNearestTiesToEven}, + // TODO: change the 4th 0x75effffffffffffe to 0x75efffffffffffff when + // PPCDoubleDoubleImpl is gone. + // LDBL_MAX + (1.011111... >> (1023 - 106) + (1.1111111...0 >> (1023 - + // 160))) = fcNormal + {0x7fefffffffffffffull, 0x7c8ffffffffffffeull, 0x7947ffffffffffffull, + 0x75effffffffffffeull, APFloat::fcNormal, APFloat::rmNearestTiesToEven}, + // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity + {0x7fefffffffffffffull, 0x7c8ffffffffffffeull, 0x7fefffffffffffffull, + 0x7c8ffffffffffffeull, APFloat::fcInfinity, + APFloat::rmNearestTiesToEven}, + // NaN + (1 + 0) = fcNaN + {0x7ff8000000000000ull, 0, 0x3ff0000000000000ull, 0, APFloat::fcNaN, + APFloat::rmNearestTiesToEven}, + }; + + for (auto Tp : Data) { + uint64_t Op1[2], Op2[2]; + APFloat::fltCategory Expected; + APFloat::roundingMode RM; + std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp; + + APFloat A1(APFloat::PPCDoubleDouble, APInt(128, 2, Op1)); + APFloat A2(APFloat::PPCDoubleDouble, APInt(128, 2, Op2)); + A1.add(A2, RM); + + EXPECT_EQ(Expected, A1.getCategory()); + } +} + +TEST(APFloatTest, PPCDoubleDoubleAdd) { + using DataType = std::tuple; + DataType Data[] = { + // (1 + 0) + (1e-105 + 0) = (1 + 1e-105) + {0x3ff0000000000000ull, 0, 0x3960000000000000ull, 0, + 0x3ff0000000000000ull, 0x3960000000000000ull, + APFloat::rmNearestTiesToEven}, + // (1 + 0) + (1e-106 + 0) = (1 + 1e-106) + {0x3ff0000000000000ull, 0, 0x3950000000000000ull, 0, + 0x3ff0000000000000ull, 0x3950000000000000ull, + APFloat::rmNearestTiesToEven}, + // (1 + 1e-106) + (1e-106 + 0) = (1 + 1e-105) + {0x3ff0000000000000ull, 0x3950000000000000ull, 0x3950000000000000ull, 0, + 0x3ff0000000000000ull, 0x3960000000000000ull, + APFloat::rmNearestTiesToEven}, + // (1 + 0) + (epsilon + 0) = (1 + epsilon) + {0x3ff0000000000000ull, 0, 0x0000000000000001ull, 0, + 0x3ff0000000000000ull, 0x0000000000000001ull, + APFloat::rmNearestTiesToEven}, + // TODO: change 0xf950000000000000 to 0xf940000000000000, when + // PPCDoubleDoubleImpl is gone. + // (DBL_MAX - 1 << (1023 - 105)) + (1 << (1023 - 53) + 0) = DBL_MAX + + // 1.11111... << (1023 - 52) + {0x7fefffffffffffffull, 0xf950000000000000ull, 0x7c90000000000000ull, 0, + 0x7fefffffffffffffull, 0x7c8ffffffffffffeull, + APFloat::rmNearestTiesToEven}, + // TODO: change 0xf950000000000000 to 0xf940000000000000, when + // PPCDoubleDoubleImpl is gone. + // (1 << (1023 - 53) + 0) + (DBL_MAX - 1 << (1023 - 105)) = DBL_MAX + + // 1.11111... << (1023 - 52) + {0x7c90000000000000ull, 0, 0x7fefffffffffffffull, 0xf950000000000000ull, + 0x7fefffffffffffffull, 0x7c8ffffffffffffeull, + APFloat::rmNearestTiesToEven}, + }; + + for (auto Tp : Data) { + uint64_t Op1[2], Op2[2], Expected[2]; + APFloat::roundingMode RM; + std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; + + APFloat A1(APFloat::PPCDoubleDouble, APInt(128, 2, Op1)); + APFloat A2(APFloat::PPCDoubleDouble, APInt(128, 2, Op2)); + A1.add(A2, RM); + + EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(Expected[1], + A1.getSecondFloat().bitcastToAPInt().getRawData()[0]); + } +} + +TEST(APFloatTest, PPCDoubleDoubleSubtract) { + using DataType = std::tuple; + DataType Data[] = { + // (1 + 0) - (-1e-105 + 0) = (1 + 1e-105) + {0x3ff0000000000000ull, 0, 0xb960000000000000ull, 0, + 0x3ff0000000000000ull, 0x3960000000000000ull, + APFloat::rmNearestTiesToEven}, + // (1 + 0) - (-1e-106 + 0) = (1 + 1e-106) + {0x3ff0000000000000ull, 0, 0xb950000000000000ull, 0, + 0x3ff0000000000000ull, 0x3950000000000000ull, + APFloat::rmNearestTiesToEven}, + }; + + for (auto Tp : Data) { + uint64_t Op1[2], Op2[2], Expected[2]; + APFloat::roundingMode RM; + std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; + + APFloat A1(APFloat::PPCDoubleDouble, APInt(128, 2, Op1)); + APFloat A2(APFloat::PPCDoubleDouble, APInt(128, 2, Op2)); + A1.subtract(A2, RM); + + EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(Expected[1], + A1.getSecondFloat().bitcastToAPInt().getRawData()[0]); + } +} }