forked from angr/vex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfp_80_64.c
636 lines (538 loc) · 18.7 KB
/
fp_80_64.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
#ifndef USED_AS_INCLUDE
#include "../pub/libvex_basictypes.h"
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* Test program for developing code for conversions between
x87 64-bit and 80-bit floats.
80-bit format exists only for x86/x86-64, and so the routines
hardwire it as little-endian. The 64-bit format (IEEE double)
could exist on any platform, little or big-endian and so we
have to take that into account. IOW, these routines have to
work correctly when compiled on both big- and little-endian
targets, but the 80-bit images only ever have to exist in
little-endian format.
*/
static void show_f80 ( UChar* );
static void show_f64 ( UChar* );
static inline
UInt read_bit_array ( UChar* arr, UInt n )
{
UChar c = arr[n >> 3];
c >>= (n&7);
return c & 1;
}
static inline
void write_bit_array ( UChar* arr, UInt n, UInt b )
{
UChar c = arr[n >> 3];
c &= ~(1 << (n&7));
c |= ((b&1) << (n&7));
arr[n >> 3] = c;
}
static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
{
asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
:
: "r" (&f80[0]), "r" (&f64[0])
: "memory" );
}
static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
{
asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
:
: "r" (&f64[0]), "r" (&f80[0])
: "memory" );
}
#endif /* ndef USED_AS_INCLUDE */
/* 80 and 64-bit floating point formats:
80-bit:
S 0 0-------0 zero
S 0 0X------X denormals
S 1-7FFE 1X------X normals (all normals have leading 1)
S 7FFF 10------0 infinity
S 7FFF 10X-----X snan
S 7FFF 11X-----X qnan
S is the sign bit. For runs X----X, at least one of the Xs must be
nonzero. Exponent is 15 bits, fractional part is 63 bits, and
there is an explicitly represented leading 1, and a sign bit,
giving 80 in total.
64-bit avoids the confusion of an explicitly represented leading 1
and so is simpler:
S 0 0------0 zero
S 0 X------X denormals
S 1-7FE any normals
S 7FF 0------0 infinity
S 7FF 0X-----X snan
S 7FF 1X-----X qnan
Exponent is 11 bits, fractional part is 52 bits, and there is a
sign bit, giving 64 in total.
*/
/* Convert a IEEE754 double (64-bit) into an x87 extended double
(80-bit), mimicing the hardware fairly closely. Both numbers are
stored little-endian. Limitations, all of which could be fixed,
given some level of hassle:
* Identity of NaNs is not preserved.
See comments in the code for more details.
*/
static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
{
Bool mantissaIsZero;
Int bexp, i, j, shift;
UChar sign;
sign = toUChar( (f64[7] >> 7) & 1 );
bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
bexp &= 0x7FF;
mantissaIsZero = False;
if (bexp == 0 || bexp == 0x7FF) {
/* We'll need to know whether or not the mantissa (bits 51:0) is
all zeroes in order to handle these cases. So figure it
out. */
mantissaIsZero
= toBool(
(f64[6] & 0x0F) == 0
&& f64[5] == 0 && f64[4] == 0 && f64[3] == 0
&& f64[2] == 0 && f64[1] == 0 && f64[0] == 0
);
}
/* If the exponent is zero, either we have a zero or a denormal.
Produce a zero. This is a hack in that it forces denormals to
zero. Could do better. */
if (bexp == 0) {
f80[9] = toUChar( sign << 7 );
f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
= f80[3] = f80[2] = f80[1] = f80[0] = 0;
if (mantissaIsZero)
/* It really is zero, so that's all we can do. */
return;
/* There is at least one 1-bit in the mantissa. So it's a
potentially denormalised double -- but we can produce a
normalised long double. Count the leading zeroes in the
mantissa so as to decide how much to bump the exponent down
by. Note, this is SLOW. */
shift = 0;
for (i = 51; i >= 0; i--) {
if (read_bit_array(f64, i))
break;
shift++;
}
/* and copy into place as many bits as we can get our hands on. */
j = 63;
for (i = 51 - shift; i >= 0; i--) {
write_bit_array( f80, j,
read_bit_array( f64, i ) );
j--;
}
/* Set the exponent appropriately, and we're done. */
bexp -= shift;
bexp += (16383 - 1023);
f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
f80[8] = toUChar( bexp & 0xFF );
return;
}
/* If the exponent is 7FF, this is either an Infinity, a SNaN or
QNaN, as determined by examining bits 51:0, thus:
0 ... 0 Inf
0X ... X SNaN
1X ... X QNaN
where at least one of the Xs is not zero.
*/
if (bexp == 0x7FF) {
if (mantissaIsZero) {
/* Produce an appropriately signed infinity:
S 1--1 (15) 1 0--0 (63)
*/
f80[9] = toUChar( (sign << 7) | 0x7F );
f80[8] = 0xFF;
f80[7] = 0x80;
f80[6] = f80[5] = f80[4] = f80[3]
= f80[2] = f80[1] = f80[0] = 0;
return;
}
/* So it's either a QNaN or SNaN. Distinguish by considering
bit 51. Note, this destroys all the trailing bits
(identity?) of the NaN. IEEE754 doesn't require preserving
these (it only requires that there be one QNaN value and one
SNaN value), but x87 does seem to have some ability to
preserve them. Anyway, here, the NaN's identity is
destroyed. Could be improved. */
if (f64[6] & 8) {
/* QNaN. Make a QNaN:
S 1--1 (15) 1 1--1 (63)
*/
f80[9] = toUChar( (sign << 7) | 0x7F );
f80[8] = 0xFF;
f80[7] = 0xFF;
f80[6] = f80[5] = f80[4] = f80[3]
= f80[2] = f80[1] = f80[0] = 0xFF;
} else {
/* SNaN. Make a SNaN:
S 1--1 (15) 0 1--1 (63)
*/
f80[9] = toUChar( (sign << 7) | 0x7F );
f80[8] = 0xFF;
f80[7] = 0x7F;
f80[6] = f80[5] = f80[4] = f80[3]
= f80[2] = f80[1] = f80[0] = 0xFF;
}
return;
}
/* It's not a zero, denormal, infinity or nan. So it must be a
normalised number. Rebias the exponent and build the new
number. */
bexp += (16383 - 1023);
f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
f80[8] = toUChar( bexp & 0xFF );
f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
| ((f64[5] >> 5) & 7) );
f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
f80[0] = toUChar( 0 );
}
/* Convert a x87 extended double (80-bit) into an IEEE 754 double
(64-bit), mimicking the hardware fairly closely. Both numbers are
stored little-endian. Limitations, both of which could be fixed,
given some level of hassle:
* Rounding following truncation could be a bit better.
* Identity of NaNs is not preserved.
See comments in the code for more details.
*/
static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
{
Bool isInf;
Int bexp, i, j;
UChar sign;
sign = toUChar((f80[9] >> 7) & 1);
bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
bexp &= 0x7FFF;
/* If the exponent is zero, either we have a zero or a denormal.
But an extended precision denormal becomes a double precision
zero, so in either case, just produce the appropriately signed
zero. */
if (bexp == 0) {
f64[7] = toUChar(sign << 7);
f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
return;
}
/* If the exponent is 7FFF, this is either an Infinity, a SNaN or
QNaN, as determined by examining bits 62:0, thus:
0 ... 0 Inf
0X ... X SNaN
1X ... X QNaN
where at least one of the Xs is not zero.
*/
if (bexp == 0x7FFF) {
isInf = toBool(
(f80[7] & 0x7F) == 0
&& f80[6] == 0 && f80[5] == 0 && f80[4] == 0
&& f80[3] == 0 && f80[2] == 0 && f80[1] == 0
&& f80[0] == 0
);
if (isInf) {
if (0 == (f80[7] & 0x80))
goto wierd_NaN;
/* Produce an appropriately signed infinity:
S 1--1 (11) 0--0 (52)
*/
f64[7] = toUChar((sign << 7) | 0x7F);
f64[6] = 0xF0;
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
return;
}
/* So it's either a QNaN or SNaN. Distinguish by considering
bit 62. Note, this destroys all the trailing bits
(identity?) of the NaN. IEEE754 doesn't require preserving
these (it only requires that there be one QNaN value and one
SNaN value), but x87 does seem to have some ability to
preserve them. Anyway, here, the NaN's identity is
destroyed. Could be improved. */
if (f80[8] & 0x40) {
/* QNaN. Make a QNaN:
S 1--1 (11) 1 1--1 (51)
*/
f64[7] = toUChar((sign << 7) | 0x7F);
f64[6] = 0xFF;
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
} else {
/* SNaN. Make a SNaN:
S 1--1 (11) 0 1--1 (51)
*/
f64[7] = toUChar((sign << 7) | 0x7F);
f64[6] = 0xF7;
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
}
return;
}
/* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
zero, the x87 FPU appears to consider the number denormalised
and converts it to a QNaN. */
if (0 == (f80[7] & 0x80)) {
wierd_NaN:
/* Strange hardware QNaN:
S 1--1 (11) 1 0--0 (51)
*/
/* On a PIII, these QNaNs always appear with sign==1. I have
no idea why. */
f64[7] = (1 /*sign*/ << 7) | 0x7F;
f64[6] = 0xF8;
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
return;
}
/* It's not a zero, denormal, infinity or nan. So it must be a
normalised number. Rebias the exponent and consider. */
bexp -= (16383 - 1023);
if (bexp >= 0x7FF) {
/* It's too big for a double. Construct an infinity. */
f64[7] = toUChar((sign << 7) | 0x7F);
f64[6] = 0xF0;
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
return;
}
if (bexp <= 0) {
/* It's too small for a normalised double. First construct a
zero and then see if it can be improved into a denormal. */
f64[7] = toUChar(sign << 7);
f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
if (bexp < -52)
/* Too small even for a denormal. */
return;
/* Ok, let's make a denormal. Note, this is SLOW. */
/* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
/* bexp is in range -52 .. 0 inclusive */
for (i = 63; i >= 0; i--) {
j = i - 12 + bexp;
if (j < 0) break;
/* We shouldn't really call vassert from generated code. */
assert(j >= 0 && j < 52);
write_bit_array ( f64,
j,
read_bit_array ( f80, i ) );
}
/* and now we might have to round ... */
if (read_bit_array(f80, 10+1 - bexp) == 1)
goto do_rounding;
return;
}
/* Ok, it's a normalised number which is representable as a double.
Copy the exponent and mantissa into place. */
/*
for (i = 0; i < 52; i++)
write_bit_array ( f64,
i,
read_bit_array ( f80, i+11 ) );
*/
f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
/* Now consider any rounding that needs to happen as a result of
truncating the mantissa. */
if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
/* If the bottom bits of f80 are "100 0000 0000", then the
infinitely precise value is deemed to be mid-way between the
two closest representable values. Since we're doing
round-to-nearest (the default mode), in that case it is the
bit immediately above which indicates whether we should round
upwards or not -- if 0, we don't. All that is encapsulated
in the following simple test. */
if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
return;
do_rounding:
/* Round upwards. This is a kludge. Once in every 2^24
roundings (statistically) the bottom three bytes are all 0xFF
and so we don't round at all. Could be improved. */
if (f64[0] != 0xFF) {
f64[0]++;
}
else
if (f64[0] == 0xFF && f64[1] != 0xFF) {
f64[0] = 0;
f64[1]++;
}
else
if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
f64[0] = 0;
f64[1] = 0;
f64[2]++;
}
/* else we don't round, but we should. */
}
}
#ifndef USED_AS_INCLUDE
//////////////
static void show_f80 ( UChar* f80 )
{
Int i;
printf("%d ", read_bit_array(f80, 79));
for (i = 78; i >= 64; i--)
printf("%d", read_bit_array(f80, i));
printf(" %d ", read_bit_array(f80, 63));
for (i = 62; i >= 0; i--)
printf("%d", read_bit_array(f80, i));
}
static void show_f64le ( UChar* f64 )
{
Int i;
printf("%d ", read_bit_array(f64, 63));
for (i = 62; i >= 52; i--)
printf("%d", read_bit_array(f64, i));
printf(" ");
for (i = 51; i >= 0; i--)
printf("%d", read_bit_array(f64, i));
}
//////////////
/* Convert f80 to a 64-bit IEEE double using both the hardware and the
soft version, and compare the results. If they differ, print
details and return 1. If they are identical, return 0.
*/
int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
{
Char buf64s[100], buf64h[100];
Bool same;
Int k;
convert_f80le_to_f64le_HW(f80, f64h);
convert_f80le_to_f64le(f80, f64s);
same = True;
for (k = 0; k < 8; k++) {
if (f64s[k] != f64h[k]) {
same = False; break;
}
}
/* bitwise identical */
if (same)
return 0;
sprintf(buf64s, "%.16e", *(double*)f64s);
sprintf(buf64h, "%.16e", *(double*)f64h);
/* Not bitwise identical, but pretty darn close */
if (0 == strcmp(buf64s, buf64h))
return 0;
printf("\n");
printf("f80: "); show_f80(f80); printf("\n");
printf("f64h: "); show_f64le(f64h); printf("\n");
printf("f64s: "); show_f64le(f64s); printf("\n");
printf("[test %d] %.16Le -> (hw %s, sw %s)\n",
test_no, *(long double*)f80,
buf64h, buf64s );
return 1;
}
/* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
using both the hardware and the soft version, and compare the
results. If they differ, print details and return 1. If they are
identical, return 0.
*/
int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
{
Char buf80s[100], buf80h[100];
Bool same;
Int k;
convert_f64le_to_f80le_HW(f64, f80h);
convert_f64le_to_f80le(f64, f80s);
same = True;
for (k = 0; k < 10; k++) {
if (f80s[k] != f80h[k]) {
same = False; break;
}
}
/* bitwise identical */
if (same)
return 0;
sprintf(buf80s, "%.20Le", *(long double*)f80s);
sprintf(buf80h, "%.20Le", *(long double*)f80h);
/* Not bitwise identical, but pretty darn close */
if (0 == strcmp(buf80s, buf80h))
return 0;
printf("\n");
printf("f64: "); show_f64le(f64); printf("\n");
printf("f80h: "); show_f80(f80h); printf("\n");
printf("f80s: "); show_f80(f80s); printf("\n");
printf("[test %d] %.16e -> (hw %s, sw %s)\n",
test_no, *(double*)f64,
buf80h, buf80s );
return 1;
}
void do_80_to_64_tests ( void )
{
UInt b9,b8,b7,i, j;
Int fails=0, tests=0;
UChar* f64h = malloc(8);
UChar* f64s = malloc(8);
UChar* f80 = malloc(10);
int STEP = 1;
srandom(4343);
/* Ten million random bit patterns */
for (i = 0; i < 10000000; i++) {
tests++;
for (j = 0; j < 10; j++)
f80[j] = (random() >> 7) & 255;
fails += do_80_to_64_test(tests, f80, f64h, f64s);
}
/* 2^24 numbers in which the first 24 bits are tested exhaustively
-- this covers the sign, exponent and leading part of the
mantissa. */
for (b9 = 0; b9 < 256; b9 += STEP) {
for (b8 = 0; b8 < 256; b8 += STEP) {
for (b7 = 0; b7 < 256; b7 += STEP) {
tests++;
for (i = 0; i < 10; i++)
f80[i] = 0;
for (i = 0; i < 8; i++)
f64h[i] = f64s[i] = 0;
f80[9] = b9;
f80[8] = b8;
f80[7] = b7;
fails += do_80_to_64_test(tests, f80, f64h, f64s);
}}}
printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails);
}
void do_64_to_80_tests ( void )
{
UInt b7,b6,b5,i, j;
Int fails=0, tests=0;
UChar* f80h = malloc(10);
UChar* f80s = malloc(10);
UChar* f64 = malloc(8);
int STEP = 1;
srandom(2323);
/* Ten million random bit patterns */
for (i = 0; i < 10000000; i++) {
tests++;
for (j = 0; j < 8; j++)
f64[j] = (random() >> 13) & 255;
fails += do_64_to_80_test(tests, f64, f80h, f80s);
}
/* 2^24 numbers in which the first 24 bits are tested exhaustively
-- this covers the sign, exponent and leading part of the
mantissa. */
for (b7 = 0; b7 < 256; b7 += STEP) {
for (b6 = 0; b6 < 256; b6 += STEP) {
for (b5 = 0; b5 < 256; b5 += STEP) {
tests++;
for (i = 0; i < 8; i++)
f64[i] = 0;
for (i = 0; i < 10; i++)
f80h[i] = f80s[i] = 0;
f64[7] = b7;
f64[6] = b6;
f64[5] = b5;
fails += do_64_to_80_test(tests, f64, f80h, f80s);
}}}
printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails);
}
int main ( void )
{
do_80_to_64_tests();
do_64_to_80_tests();
return 0;
}
#endif /* ndef USED_AS_INCLUDE */