-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add conversion from float64 to extended
- Loading branch information
Showing
3 changed files
with
112 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Package extended provides conversions to and from 80-bit "extended" | ||
// floating-point numbers. | ||
// | ||
// Note that while NaNs are handled by this package, the distinction between | ||
// quiet NaN and signaling NaN is not preserved. | ||
package extended | ||
|
||
import ( | ||
"math" | ||
"math/bits" | ||
) | ||
|
||
// An Extended is an 80-bit extended precision floating-point number. | ||
type Extended struct { | ||
// The sign is stored as the high bit, the low 15 bits contain the exponent, | ||
// with a bias of 16383. | ||
SignExponent uint16 | ||
|
||
// The fraction includes a ones place as the high bit. The valiue in the | ||
// ones place may be zero. | ||
Fraction uint64 | ||
} | ||
|
||
// FromFloat64 converts a 64-bit floating-point number to an 80-bit extended | ||
// floating-point number. | ||
func FromFloat64(x float64) (e Extended) { | ||
xbits := math.Float64bits(x) | ||
sign := int(xbits>>(63-15)) & 0x8000 | ||
exponent := int(xbits>>52) & ((1 << 11) - 1) | ||
mantissa := xbits & ((1 << 52) - 1) | ||
switch exponent { | ||
case 0: | ||
// Zero or subnormal. | ||
// Number is (-1)^sign * 2^-1022 * 0.mantissa. | ||
if mantissa == 0 { | ||
return Extended{uint16(sign), 0} | ||
} | ||
// 2^-1022 * 0.mantissa = 2^(e-16383) * 2^lzero * 0.mantissa | ||
// -1022 = e - 16383 + lzero | ||
// e = -1022 + 16383 - lzero | ||
nzero := bits.LeadingZeros64(mantissa) | ||
exponent := 16383 - 1022 + 11 - nzero | ||
return Extended{uint16(sign | exponent), mantissa << nzero} | ||
|
||
case (1 << 11) - 1: | ||
// Infinity or NaN. | ||
if mantissa == 0 { | ||
return Extended{uint16(sign | 0x7fff), 0} | ||
} | ||
return Extended{uint16(sign | 0x7fff), ^uint64(0)} | ||
|
||
default: | ||
// 2^(e64 - 1023) * 1.fraction = 2^(e80 - 16383) * 1.fraction | ||
// e63 - 1023 = e80 - 16383 | ||
// e80 = e63 + 16383 - 1023 | ||
exponent := exponent + 16383 - 1023 | ||
return Extended{uint16(sign | exponent), 1<<63 | mantissa<<11} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package extended | ||
|
||
import ( | ||
"math" | ||
"strconv" | ||
"testing" | ||
) | ||
|
||
func TestFromF64(t *testing.T) { | ||
type testcase struct { | ||
name string | ||
exponent uint16 | ||
fraction uint64 | ||
input float64 | ||
} | ||
cases := []testcase{ | ||
{"basic_one", 16383, 1 << 63, 1.0}, | ||
{"basic_two", 16384, 1 << 63, 2.0}, | ||
{"basic_half", 16382, 1 << 63, 0.5}, | ||
{"small", 16383 - 10, 1 << 63, 0.0009765625}, | ||
{"smaller", 16383 - 100, 1 << 63, 7.888609052210118e-31}, | ||
{"after_one", 16383, (1 << 63) + (1 << 11), 1.0000000000000002}, | ||
{"infinity", 32767, 0, math.Inf(0)}, | ||
{"zero", 0, 0, 0.0}, | ||
{"nan", 32767, ^uint64(0), math.NaN()}, | ||
{"smallest_normal", 15361, 1 << 63, 2.2250738585072014e-308}, | ||
{"subnormal", 15360, 1 << 63, 1.1125369292536007e-308}, | ||
{"smallest_subnormal", 15309, 1 << 63, 5e-324}, | ||
} | ||
for _, c := range cases { | ||
t.Run(c.name, func(t *testing.T) { | ||
for sign := 0; sign < 2; sign++ { | ||
expect := Extended{ | ||
c.exponent | uint16(sign<<15), | ||
c.fraction, | ||
} | ||
fin := c.input | ||
if sign != 0 { | ||
fin = -fin | ||
} | ||
out := FromFloat64(fin) | ||
if out != expect { | ||
t.Errorf("FromFloat64(%s) = %04x:%016x, expect %04x:%016x", | ||
strconv.FormatFloat(fin, 'g', -1, 64), | ||
out.SignExponent, out.Fraction, expect.SignExponent, expect.Fraction) | ||
} | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module github.com/depp/extended | ||
|
||
go 1.18 |