Skip to content

Commit

Permalink
Add conversion from float64 to extended
Browse files Browse the repository at this point in the history
  • Loading branch information
depp committed May 27, 2022
1 parent bea69ab commit c2da1b1
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 0 deletions.
59 changes: 59 additions & 0 deletions extended.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Package extended provides conversions to and from 80-bit "extended"
// floating-point numbers.
//
// Note that while NaNs are handled by this package, the distinction between
// quiet NaN and signaling NaN is not preserved.
package extended

import (
"math"
"math/bits"
)

// An Extended is an 80-bit extended precision floating-point number.
type Extended struct {
// The sign is stored as the high bit, the low 15 bits contain the exponent,
// with a bias of 16383.
SignExponent uint16

// The fraction includes a ones place as the high bit. The valiue in the
// ones place may be zero.
Fraction uint64
}

// FromFloat64 converts a 64-bit floating-point number to an 80-bit extended
// floating-point number.
func FromFloat64(x float64) (e Extended) {
xbits := math.Float64bits(x)
sign := int(xbits>>(63-15)) & 0x8000
exponent := int(xbits>>52) & ((1 << 11) - 1)
mantissa := xbits & ((1 << 52) - 1)
switch exponent {
case 0:
// Zero or subnormal.
// Number is (-1)^sign * 2^-1022 * 0.mantissa.
if mantissa == 0 {
return Extended{uint16(sign), 0}
}
// 2^-1022 * 0.mantissa = 2^(e-16383) * 2^lzero * 0.mantissa
// -1022 = e - 16383 + lzero
// e = -1022 + 16383 - lzero
nzero := bits.LeadingZeros64(mantissa)
exponent := 16383 - 1022 + 11 - nzero
return Extended{uint16(sign | exponent), mantissa << nzero}

case (1 << 11) - 1:
// Infinity or NaN.
if mantissa == 0 {
return Extended{uint16(sign | 0x7fff), 0}
}
return Extended{uint16(sign | 0x7fff), ^uint64(0)}

default:
// 2^(e64 - 1023) * 1.fraction = 2^(e80 - 16383) * 1.fraction
// e63 - 1023 = e80 - 16383
// e80 = e63 + 16383 - 1023
exponent := exponent + 16383 - 1023
return Extended{uint16(sign | exponent), 1<<63 | mantissa<<11}
}
}
50 changes: 50 additions & 0 deletions extended_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package extended

import (
"math"
"strconv"
"testing"
)

func TestFromF64(t *testing.T) {
type testcase struct {
name string
exponent uint16
fraction uint64
input float64
}
cases := []testcase{
{"basic_one", 16383, 1 << 63, 1.0},
{"basic_two", 16384, 1 << 63, 2.0},
{"basic_half", 16382, 1 << 63, 0.5},
{"small", 16383 - 10, 1 << 63, 0.0009765625},
{"smaller", 16383 - 100, 1 << 63, 7.888609052210118e-31},
{"after_one", 16383, (1 << 63) + (1 << 11), 1.0000000000000002},
{"infinity", 32767, 0, math.Inf(0)},
{"zero", 0, 0, 0.0},
{"nan", 32767, ^uint64(0), math.NaN()},
{"smallest_normal", 15361, 1 << 63, 2.2250738585072014e-308},
{"subnormal", 15360, 1 << 63, 1.1125369292536007e-308},
{"smallest_subnormal", 15309, 1 << 63, 5e-324},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
for sign := 0; sign < 2; sign++ {
expect := Extended{
c.exponent | uint16(sign<<15),
c.fraction,
}
fin := c.input
if sign != 0 {
fin = -fin
}
out := FromFloat64(fin)
if out != expect {
t.Errorf("FromFloat64(%s) = %04x:%016x, expect %04x:%016x",
strconv.FormatFloat(fin, 'g', -1, 64),
out.SignExponent, out.Fraction, expect.SignExponent, expect.Fraction)
}
}
})
}
}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/depp/extended

go 1.18

0 comments on commit c2da1b1

Please sign in to comment.