Skip to content

Commit

Permalink
softfloat: Move floatN_log2 to softfloat-parts.c.inc
Browse files Browse the repository at this point in the history
Rename to parts$N_log2.  Though this is partly a ruse, since I do not
believe the code will succeed for float128 without work.  Which is ok
for now, because we do not need this for more than float32 and float64.

Since berkeley-testfloat-3 doesn't support log2, compare float64_log2
vs the system log2.  Fix the errors for inputs near 1.0:

test: 3ff00000000000b0  +0x1.00000000000b0p+0
  sf: 3d2fa00000000000  +0x1.fa00000000000p-45
libm: 3d2fbd422b1bd36f  +0x1.fbd422b1bd36fp-45
Error in fraction: 32170028290927 ulp

test: 3feec24f6770b100  +0x1.ec24f6770b100p-1
  sf: bfad3740d13c9ec0  -0x1.d3740d13c9ec0p-5
libm: bfad3740d13c9e98  -0x1.d3740d13c9e98p-5
Error in fraction: 40 ulp

Reviewed-by: Alex Bennée <[email protected]>
Signed-off-by: Richard Henderson <[email protected]>
  • Loading branch information
rth7680 committed Jun 3, 2021
1 parent 572c4d8 commit 2fa3546
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 99 deletions.
125 changes: 125 additions & 0 deletions fpu/softfloat-parts.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1331,3 +1331,128 @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
g_assert_not_reached();
}
}

/*
* Return log2(A)
*/
static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
{
uint64_t a0, a1, r, t, ign;
FloatPartsN f;
int i, n, a_exp, f_exp;

if (unlikely(a->cls != float_class_normal)) {
switch (a->cls) {
case float_class_snan:
case float_class_qnan:
parts_return_nan(a, s);
return;
case float_class_zero:
/* log2(0) = -inf */
a->cls = float_class_inf;
a->sign = 1;
return;
case float_class_inf:
if (unlikely(a->sign)) {
goto d_nan;
}
return;
default:
break;
}
g_assert_not_reached();
}
if (unlikely(a->sign)) {
goto d_nan;
}

/* TODO: This algorithm looses bits too quickly for float128. */
g_assert(N == 64);

a_exp = a->exp;
f_exp = -1;

r = 0;
t = DECOMPOSED_IMPLICIT_BIT;
a0 = a->frac_hi;
a1 = 0;

n = fmt->frac_size + 2;
if (unlikely(a_exp == -1)) {
/*
* When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
* When the value is very close to 1.0, there are lots of 1's in
* the msb parts of the fraction. At the end, when we subtract
* this value from -1.0, we can see a catastrophic loss of precision,
* as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
* bits of y in the final result. To minimize this, compute as many
* digits as we can.
* ??? This case needs another algorithm to avoid this.
*/
n = fmt->frac_size * 2 + 2;
/* Don't compute a value overlapping the sticky bit */
n = MIN(n, 62);
}

for (i = 0; i < n; i++) {
if (a1) {
mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
} else if (a0 & 0xffffffffull) {
mul64To128(a0, a0, &a0, &a1);
} else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
a0 >>= 32;
a0 *= a0;
} else {
goto exact;
}

if (a0 & DECOMPOSED_IMPLICIT_BIT) {
if (unlikely(a_exp == 0 && r == 0)) {
/*
* When a_exp == 0, we're computing the log2 of a value
* [1.0,2.0). When the value is very close to 1.0, there
* are lots of 0's in the msb parts of the fraction.
* We need to compute more digits to produce a correct
* result -- restart at the top of the fraction.
* ??? This is likely to lose precision quickly, as for
* float128; we may need another method.
*/
f_exp -= i;
t = r = DECOMPOSED_IMPLICIT_BIT;
i = 0;
} else {
r |= t;
}
} else {
add128(a0, a1, a0, a1, &a0, &a1);
}
t >>= 1;
}

/* Set sticky for inexact. */
r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);

exact:
parts_sint_to_float(a, a_exp, 0, s);
if (r == 0) {
return;
}

memset(&f, 0, sizeof(f));
f.cls = float_class_normal;
f.frac_hi = r;
f.exp = f_exp - frac_normalize(&f);

if (a_exp < 0) {
parts_sub_normal(a, &f);
} else if (a_exp > 0) {
parts_add_normal(a, &f);
} else {
*a = f;
}
return;

d_nan:
float_raise(float_flag_invalid, s);
parts_default_nan(a, s);
}
126 changes: 27 additions & 99 deletions fpu/softfloat.c
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,12 @@ static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
#define parts_scalbn(A, N, S) \
PARTS_GENERIC_64_128(scalbn, A)(A, N, S)

static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);

#define parts_log2(A, S, F) \
PARTS_GENERIC_64_128(log2, A)(A, S, F)

/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
Expand Down Expand Up @@ -4060,6 +4066,27 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *s)
return floatx80_round_pack_canonical(&p, s);
}

/*
* log2
*/
float32 float32_log2(float32 a, float_status *status)
{
FloatParts64 p;

float32_unpack_canonical(&p, a, status);
parts_log2(&p, status, &float32_params);
return float32_round_pack_canonical(&p, status);
}

float64 float64_log2(float64 a, float_status *status)
{
FloatParts64 p;

float64_unpack_canonical(&p, a, status);
parts_log2(&p, status, &float64_params);
return float64_round_pack_canonical(&p, status);
}

/*----------------------------------------------------------------------------
| The pattern for a default generated NaN.
*----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -5246,56 +5273,6 @@ float32 float32_exp2(float32 a, float_status *status)
return float32_round_pack_canonical(&rp, status);
}

/*----------------------------------------------------------------------------
| Returns the binary log of the single-precision floating-point value `a'.
| The operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 float32_log2(float32 a, float_status *status)
{
bool aSign, zSign;
int aExp;
uint32_t aSig, zSig, i;

a = float32_squash_input_denormal(a, status);
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );

if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
}
if ( aSign ) {
float_raise(float_flag_invalid, status);
return float32_default_nan(status);
}
if ( aExp == 0xFF ) {
if (aSig) {
return propagateFloat32NaN(a, float32_zero, status);
}
return a;
}

aExp -= 0x7F;
aSig |= 0x00800000;
zSign = aExp < 0;
zSig = aExp << 23;

for (i = 1 << 22; i > 0; i >>= 1) {
aSig = ( (uint64_t)aSig * aSig ) >> 23;
if ( aSig & 0x01000000 ) {
aSig >>= 1;
zSig |= i;
}
}

if ( zSign )
zSig = -zSig;

return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
}

/*----------------------------------------------------------------------------
| Returns the remainder of the double-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
Expand Down Expand Up @@ -5384,55 +5361,6 @@ float64 float64_rem(float64 a, float64 b, float_status *status)

}

/*----------------------------------------------------------------------------
| Returns the binary log of the double-precision floating-point value `a'.
| The operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_log2(float64 a, float_status *status)
{
bool aSign, zSign;
int aExp;
uint64_t aSig, aSig0, aSig1, zSig, i;
a = float64_squash_input_denormal(a, status);

aSig = extractFloat64Frac( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );

if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
normalizeFloat64Subnormal( aSig, &aExp, &aSig );
}
if ( aSign ) {
float_raise(float_flag_invalid, status);
return float64_default_nan(status);
}
if ( aExp == 0x7FF ) {
if (aSig) {
return propagateFloat64NaN(a, float64_zero, status);
}
return a;
}

aExp -= 0x3FF;
aSig |= UINT64_C(0x0010000000000000);
zSign = aExp < 0;
zSig = (uint64_t)aExp << 52;
for (i = 1LL << 51; i > 0; i >>= 1) {
mul64To128( aSig, aSig, &aSig0, &aSig1 );
aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
if ( aSig & UINT64_C(0x0020000000000000) ) {
aSig >>= 1;
zSig |= i;
}
}

if ( zSign )
zSig = -zSig;
return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
}

/*----------------------------------------------------------------------------
| Rounds the extended double-precision floating-point value `a'
| to the precision provided by floatx80_rounding_precision and returns the
Expand Down
Loading

0 comments on commit 2fa3546

Please sign in to comment.