forked from gcc-mirror/gcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
decBasic.c
3909 lines (3664 loc) · 157 KB
/
decBasic.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Common base code for the decNumber C Library.
Copyright (C) 2007-2020 Free Software Foundation, Inc.
Contributed by IBM Corporation. Author Mike Cowlishaw.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* ------------------------------------------------------------------ */
/* decBasic.c -- common base code for Basic decimal types */
/* ------------------------------------------------------------------ */
/* This module comprises code that is shared between decDouble and */
/* decQuad (but not decSingle). The main arithmetic operations are */
/* here (Add, Subtract, Multiply, FMA, and Division operators). */
/* */
/* Unlike decNumber, parameterization takes place at compile time */
/* rather than at runtime. The parameters are set in the decDouble.c */
/* (etc.) files, which then include this one to produce the compiled */
/* code. The functions here, therefore, are code shared between */
/* multiple formats. */
/* */
/* This must be included after decCommon.c. */
/* ------------------------------------------------------------------ */
/* Names here refer to decFloat rather than to decDouble, etc., and */
/* the functions are in strict alphabetical order. */
/* The compile-time flags SINGLE, DOUBLE, and QUAD are set up in */
/* decCommon.c */
#if !defined(QUAD)
#error decBasic.c must be included after decCommon.c
#endif
#if SINGLE
#error Routines in decBasic.c are for decDouble and decQuad only
#endif
/* Private constants */
#define DIVIDE 0x80000000 /* Divide operations [as flags] */
#define REMAINDER 0x40000000 /* .. */
#define DIVIDEINT 0x20000000 /* .. */
#define REMNEAR 0x10000000 /* .. */
/* Private functions (local, used only by routines in this module) */
static decFloat *decDivide(decFloat *, const decFloat *,
const decFloat *, decContext *, uInt);
static decFloat *decCanonical(decFloat *, const decFloat *);
static void decFiniteMultiply(bcdnum *, uByte *, const decFloat *,
const decFloat *);
static decFloat *decInfinity(decFloat *, const decFloat *);
static decFloat *decInvalid(decFloat *, decContext *);
static decFloat *decNaNs(decFloat *, const decFloat *, const decFloat *,
decContext *);
static Int decNumCompare(const decFloat *, const decFloat *, Flag);
static decFloat *decToIntegral(decFloat *, const decFloat *, decContext *,
enum rounding, Flag);
static uInt decToInt32(const decFloat *, decContext *, enum rounding,
Flag, Flag);
/* ------------------------------------------------------------------ */
/* decCanonical -- copy a decFloat, making canonical */
/* */
/* result gets the canonicalized df */
/* df is the decFloat to copy and make canonical */
/* returns result */
/* */
/* This is exposed via decFloatCanonical for Double and Quad only. */
/* This works on specials, too; no error or exception is possible. */
/* ------------------------------------------------------------------ */
static decFloat * decCanonical(decFloat *result, const decFloat *df) {
uInt encode, precode, dpd; /* work */
uInt inword, uoff, canon; /* .. */
Int n; /* counter (down) */
if (df!=result) *result=*df; /* effect copy if needed */
if (DFISSPECIAL(result)) {
if (DFISINF(result)) return decInfinity(result, df); /* clean Infinity */
/* is a NaN */
DFWORD(result, 0)&=~ECONNANMASK; /* clear ECON except selector */
if (DFISCCZERO(df)) return result; /* coefficient continuation is 0 */
/* drop through to check payload */
}
/* return quickly if the coefficient continuation is canonical */
{ /* declare block */
#if DOUBLE
uInt sourhi=DFWORD(df, 0);
uInt sourlo=DFWORD(df, 1);
if (CANONDPDOFF(sourhi, 8)
&& CANONDPDTWO(sourhi, sourlo, 30)
&& CANONDPDOFF(sourlo, 20)
&& CANONDPDOFF(sourlo, 10)
&& CANONDPDOFF(sourlo, 0)) return result;
#elif QUAD
uInt sourhi=DFWORD(df, 0);
uInt sourmh=DFWORD(df, 1);
uInt sourml=DFWORD(df, 2);
uInt sourlo=DFWORD(df, 3);
if (CANONDPDOFF(sourhi, 4)
&& CANONDPDTWO(sourhi, sourmh, 26)
&& CANONDPDOFF(sourmh, 16)
&& CANONDPDOFF(sourmh, 6)
&& CANONDPDTWO(sourmh, sourml, 28)
&& CANONDPDOFF(sourml, 18)
&& CANONDPDOFF(sourml, 8)
&& CANONDPDTWO(sourml, sourlo, 30)
&& CANONDPDOFF(sourlo, 20)
&& CANONDPDOFF(sourlo, 10)
&& CANONDPDOFF(sourlo, 0)) return result;
#endif
} /* block */
/* Loop to repair a non-canonical coefficent, as needed */
inword=DECWORDS-1; /* current input word */
uoff=0; /* bit offset of declet */
encode=DFWORD(result, inword);
for (n=DECLETS-1; n>=0; n--) { /* count down declets of 10 bits */
dpd=encode>>uoff;
uoff+=10;
if (uoff>32) { /* crossed uInt boundary */
inword--;
encode=DFWORD(result, inword);
uoff-=32;
dpd|=encode<<(10-uoff); /* get pending bits */
}
dpd&=0x3ff; /* clear uninteresting bits */
if (dpd<0x16e) continue; /* must be canonical */
canon=BIN2DPD[DPD2BIN[dpd]]; /* determine canonical declet */
if (canon==dpd) continue; /* have canonical declet */
/* need to replace declet */
if (uoff>=10) { /* all within current word */
encode&=~(0x3ff<<(uoff-10)); /* clear the 10 bits ready for replace */
encode|=canon<<(uoff-10); /* insert the canonical form */
DFWORD(result, inword)=encode; /* .. and save */
continue;
}
/* straddled words */
precode=DFWORD(result, inword+1); /* get previous */
precode&=0xffffffff>>(10-uoff); /* clear top bits */
DFWORD(result, inword+1)=precode|(canon<<(32-(10-uoff)));
encode&=0xffffffff<<uoff; /* clear bottom bits */
encode|=canon>>(10-uoff); /* insert canonical */
DFWORD(result, inword)=encode; /* .. and save */
} /* n */
return result;
} /* decCanonical */
/* ------------------------------------------------------------------ */
/* decDivide -- divide operations */
/* */
/* result gets the result of dividing dfl by dfr: */
/* dfl is the first decFloat (lhs) */
/* dfr is the second decFloat (rhs) */
/* set is the context */
/* op is the operation selector */
/* returns result */
/* */
/* op is one of DIVIDE, REMAINDER, DIVIDEINT, or REMNEAR. */
/* ------------------------------------------------------------------ */
#define DIVCOUNT 0 /* 1 to instrument subtractions counter */
#define DIVBASE ((uInt)BILLION) /* the base used for divide */
#define DIVOPLEN DECPMAX9 /* operand length ('digits' base 10**9) */
#define DIVACCLEN (DIVOPLEN*3) /* accumulator length (ditto) */
static decFloat * decDivide(decFloat *result, const decFloat *dfl,
const decFloat *dfr, decContext *set, uInt op) {
decFloat quotient; /* for remainders */
bcdnum num; /* for final conversion */
uInt acc[DIVACCLEN]; /* coefficent in base-billion .. */
uInt div[DIVOPLEN]; /* divisor in base-billion .. */
uInt quo[DIVOPLEN+1]; /* quotient in base-billion .. */
uByte bcdacc[(DIVOPLEN+1)*9+2]; /* for quotient in BCD, +1, +1 */
uInt *msua, *msud, *msuq; /* -> msu of acc, div, and quo */
Int divunits, accunits; /* lengths */
Int quodigits; /* digits in quotient */
uInt *lsua, *lsuq; /* -> current acc and quo lsus */
Int length, multiplier; /* work */
uInt carry, sign; /* .. */
uInt *ua, *ud, *uq; /* .. */
uByte *ub; /* .. */
uInt uiwork; /* for macros */
uInt divtop; /* top unit of div adjusted for estimating */
#if DIVCOUNT
static uInt maxcount=0; /* worst-seen subtractions count */
uInt divcount=0; /* subtractions count [this divide] */
#endif
/* calculate sign */
num.sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr)) { /* either is special? */
/* NaNs are handled as usual */
if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
/* one or two infinities */
if (DFISINF(dfl)) {
if (DFISINF(dfr)) return decInvalid(result, set); /* Two infinities bad */
if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); /* as is rem */
/* Infinity/x is infinite and quiet, even if x=0 */
DFWORD(result, 0)=num.sign;
return decInfinity(result, result);
}
/* must be x/Infinity -- remainders are lhs */
if (op&(REMAINDER|REMNEAR)) return decCanonical(result, dfl);
/* divides: return zero with correct sign and exponent depending */
/* on op (Etiny for divide, 0 for divideInt) */
decFloatZero(result);
if (op==DIVIDEINT) DFWORD(result, 0)|=num.sign; /* add sign */
else DFWORD(result, 0)=num.sign; /* zeros the exponent, too */
return result;
}
/* next, handle zero operands (x/0 and 0/x) */
if (DFISZERO(dfr)) { /* x/0 */
if (DFISZERO(dfl)) { /* 0/0 is undefined */
decFloatZero(result);
DFWORD(result, 0)=DECFLOAT_qNaN;
set->status|=DEC_Division_undefined;
return result;
}
if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); /* bad rem */
set->status|=DEC_Division_by_zero;
DFWORD(result, 0)=num.sign;
return decInfinity(result, result); /* x/0 -> signed Infinity */
}
num.exponent=GETEXPUN(dfl)-GETEXPUN(dfr); /* ideal exponent */
if (DFISZERO(dfl)) { /* 0/x (x!=0) */
/* if divide, result is 0 with ideal exponent; divideInt has */
/* exponent=0, remainders give zero with lower exponent */
if (op&DIVIDEINT) {
decFloatZero(result);
DFWORD(result, 0)|=num.sign; /* add sign */
return result;
}
if (!(op&DIVIDE)) { /* a remainder */
/* exponent is the minimum of the operands */
num.exponent=MINI(GETEXPUN(dfl), GETEXPUN(dfr));
/* if the result is zero the sign shall be sign of dfl */
num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
}
bcdacc[0]=0;
num.msd=bcdacc; /* -> 0 */
num.lsd=bcdacc; /* .. */
return decFinalize(result, &num, set); /* [divide may clamp exponent] */
} /* 0/x */
/* [here, both operands are known to be finite and non-zero] */
/* extract the operand coefficents into 'units' which are */
/* base-billion; the lhs is high-aligned in acc and the msu of both */
/* acc and div is at the right-hand end of array (offset length-1); */
/* the quotient can need one more unit than the operands as digits */
/* in it are not necessarily aligned neatly; further, the quotient */
/* may not start accumulating until after the end of the initial */
/* operand in acc if that is small (e.g., 1) so the accumulator */
/* must have at least that number of units extra (at the ls end) */
GETCOEFFBILL(dfl, acc+DIVACCLEN-DIVOPLEN);
GETCOEFFBILL(dfr, div);
/* zero the low uInts of acc */
acc[0]=0;
acc[1]=0;
acc[2]=0;
acc[3]=0;
#if DOUBLE
#if DIVOPLEN!=2
#error Unexpected Double DIVOPLEN
#endif
#elif QUAD
acc[4]=0;
acc[5]=0;
acc[6]=0;
acc[7]=0;
#if DIVOPLEN!=4
#error Unexpected Quad DIVOPLEN
#endif
#endif
/* set msu and lsu pointers */
msua=acc+DIVACCLEN-1; /* [leading zeros removed below] */
msuq=quo+DIVOPLEN;
/*[loop for div will terminate because operands are non-zero] */
for (msud=div+DIVOPLEN-1; *msud==0;) msud--;
/* the initial least-significant unit of acc is set so acc appears */
/* to have the same length as div. */
/* This moves one position towards the least possible for each */
/* iteration */
divunits=(Int)(msud-div+1); /* precalculate */
lsua=msua-divunits+1; /* initial working lsu of acc */
lsuq=msuq; /* and of quo */
/* set up the estimator for the multiplier; this is the msu of div, */
/* plus two bits from the unit below (if any) rounded up by one if */
/* there are any non-zero bits or units below that [the extra two */
/* bits makes for a much better estimate when the top unit is small] */
divtop=*msud<<2;
if (divunits>1) {
uInt *um=msud-1;
uInt d=*um;
if (d>=750000000) {divtop+=3; d-=750000000;}
else if (d>=500000000) {divtop+=2; d-=500000000;}
else if (d>=250000000) {divtop++; d-=250000000;}
if (d) divtop++;
else for (um--; um>=div; um--) if (*um) {
divtop++;
break;
}
} /* >1 unit */
#if DECTRACE
{Int i;
printf("----- div=");
for (i=divunits-1; i>=0; i--) printf("%09ld ", (LI)div[i]);
printf("\n");}
#endif
/* now collect up to DECPMAX+1 digits in the quotient (this may */
/* need OPLEN+1 uInts if unaligned) */
quodigits=0; /* no digits yet */
for (;; lsua--) { /* outer loop -- each input position */
#if DECCHECK
if (lsua<acc) {
printf("Acc underrun...\n");
break;
}
#endif
#if DECTRACE
printf("Outer: quodigits=%ld acc=", (LI)quodigits);
for (ua=msua; ua>=lsua; ua--) printf("%09ld ", (LI)*ua);
printf("\n");
#endif
*lsuq=0; /* default unit result is 0 */
for (;;) { /* inner loop -- calculate quotient unit */
/* strip leading zero units from acc (either there initially or */
/* from subtraction below); this may strip all if exactly 0 */
for (; *msua==0 && msua>=lsua;) msua--;
accunits=(Int)(msua-lsua+1); /* [maybe 0] */
/* subtraction is only necessary and possible if there are as */
/* least as many units remaining in acc for this iteration as */
/* there are in div */
if (accunits<divunits) {
if (accunits==0) msua++; /* restore */
break;
}
/* If acc is longer than div then subtraction is definitely */
/* possible (as msu of both is non-zero), but if they are the */
/* same length a comparison is needed. */
/* If a subtraction is needed then a good estimate of the */
/* multiplier for the subtraction is also needed in order to */
/* minimise the iterations of this inner loop because the */
/* subtractions needed dominate division performance. */
if (accunits==divunits) {
/* compare the high divunits of acc and div: */
/* acc<div: this quotient unit is unchanged; subtraction */
/* will be possible on the next iteration */
/* acc==div: quotient gains 1, set acc=0 */
/* acc>div: subtraction necessary at this position */
for (ud=msud, ua=msua; ud>div; ud--, ua--) if (*ud!=*ua) break;
/* [now at first mismatch or lsu] */
if (*ud>*ua) break; /* next time... */
if (*ud==*ua) { /* all compared equal */
*lsuq+=1; /* increment result */
msua=lsua; /* collapse acc units */
*msua=0; /* .. to a zero */
break;
}
/* subtraction necessary; estimate multiplier [see above] */
/* if both *msud and *msua are small it is cost-effective to */
/* bring in part of the following units (if any) to get a */
/* better estimate (assume some other non-zero in div) */
#define DIVLO 1000000U
#define DIVHI (DIVBASE/DIVLO)
#if DECUSE64
if (divunits>1) {
/* there cannot be a *(msud-2) for DECDOUBLE so next is */
/* an exact calculation unless DECQUAD (which needs to */
/* assume bits out there if divunits>2) */
uLong mul=(uLong)*msua * DIVBASE + *(msua-1);
uLong div=(uLong)*msud * DIVBASE + *(msud-1);
#if QUAD
if (divunits>2) div++;
#endif
mul/=div;
multiplier=(Int)mul;
}
else multiplier=*msua/(*msud);
#else
if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
multiplier=(*msua*DIVHI + *(msua-1)/DIVLO)
/(*msud*DIVHI + *(msud-1)/DIVLO +1);
}
else multiplier=(*msua<<2)/divtop;
#endif
}
else { /* accunits>divunits */
/* msud is one unit 'lower' than msua, so estimate differently */
#if DECUSE64
uLong mul;
/* as before, bring in extra digits if possible */
if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
mul=((uLong)*msua * DIVHI * DIVBASE) + *(msua-1) * DIVHI
+ *(msua-2)/DIVLO;
mul/=(*msud*DIVHI + *(msud-1)/DIVLO +1);
}
else if (divunits==1) {
mul=(uLong)*msua * DIVBASE + *(msua-1);
mul/=*msud; /* no more to the right */
}
else {
mul=(uLong)(*msua) * (uInt)(DIVBASE<<2)
+ (*(msua-1)<<2);
mul/=divtop; /* [divtop already allows for sticky bits] */
}
multiplier=(Int)mul;
#else
multiplier=*msua * ((DIVBASE<<2)/divtop);
#endif
}
if (multiplier==0) multiplier=1; /* marginal case */
*lsuq+=multiplier;
#if DIVCOUNT
/* printf("Multiplier: %ld\n", (LI)multiplier); */
divcount++;
#endif
/* Carry out the subtraction acc-(div*multiplier); for each */
/* unit in div, do the multiply, split to units (see */
/* decFloatMultiply for the algorithm), and subtract from acc */
#define DIVMAGIC 2305843009U /* 2**61/10**9 */
#define DIVSHIFTA 29
#define DIVSHIFTB 32
carry=0;
for (ud=div, ua=lsua; ud<=msud; ud++, ua++) {
uInt lo, hop;
#if DECUSE64
uLong sub=(uLong)multiplier*(*ud)+carry;
if (sub<DIVBASE) {
carry=0;
lo=(uInt)sub;
}
else {
hop=(uInt)(sub>>DIVSHIFTA);
carry=(uInt)(((uLong)hop*DIVMAGIC)>>DIVSHIFTB);
/* the estimate is now in hi; now calculate sub-hi*10**9 */
/* to get the remainder (which will be <DIVBASE)) */
lo=(uInt)sub;
lo-=carry*DIVBASE; /* low word of result */
if (lo>=DIVBASE) {
lo-=DIVBASE; /* correct by +1 */
carry++;
}
}
#else /* 32-bit */
uInt hi;
/* calculate multiplier*(*ud) into hi and lo */
LONGMUL32HI(hi, *ud, multiplier); /* get the high word */
lo=multiplier*(*ud); /* .. and the low */
lo+=carry; /* add the old hi */
carry=hi+(lo<carry); /* .. with any carry */
if (carry || lo>=DIVBASE) { /* split is needed */
hop=(carry<<3)+(lo>>DIVSHIFTA); /* hi:lo/2**29 */
LONGMUL32HI(carry, hop, DIVMAGIC); /* only need the high word */
/* [DIVSHIFTB is 32, so carry can be used directly] */
/* the estimate is now in carry; now calculate hi:lo-est*10**9; */
/* happily the top word of the result is irrelevant because it */
/* will always be zero so this needs only one multiplication */
lo-=(carry*DIVBASE);
/* the correction here will be at most +1; do it */
if (lo>=DIVBASE) {
lo-=DIVBASE;
carry++;
}
}
#endif
if (lo>*ua) { /* borrow needed */
*ua+=DIVBASE;
carry++;
}
*ua-=lo;
} /* ud loop */
if (carry) *ua-=carry; /* accdigits>divdigits [cannot borrow] */
} /* inner loop */
/* the outer loop terminates when there is either an exact result */
/* or enough digits; first update the quotient digit count and */
/* pointer (if any significant digits) */
#if DECTRACE
if (*lsuq || quodigits) printf("*lsuq=%09ld\n", (LI)*lsuq);
#endif
if (quodigits) {
quodigits+=9; /* had leading unit earlier */
lsuq--;
if (quodigits>DECPMAX+1) break; /* have enough */
}
else if (*lsuq) { /* first quotient digits */
const uInt *pow;
for (pow=DECPOWERS; *lsuq>=*pow; pow++) quodigits++;
lsuq--;
/* [cannot have >DECPMAX+1 on first unit] */
}
if (*msua!=0) continue; /* not an exact result */
/* acc is zero iff used all of original units and zero down to lsua */
/* (must also continue to original lsu for correct quotient length) */
if (lsua>acc+DIVACCLEN-DIVOPLEN) continue;
for (; msua>lsua && *msua==0;) msua--;
if (*msua==0 && msua==lsua) break;
} /* outer loop */
/* all of the original operand in acc has been covered at this point */
/* quotient now has at least DECPMAX+2 digits */
/* *msua is now non-0 if inexact and sticky bits */
/* lsuq is one below the last uint of the quotient */
lsuq++; /* set -> true lsu of quo */
if (*msua) *lsuq|=1; /* apply sticky bit */
/* quo now holds the (unrounded) quotient in base-billion; one */
/* base-billion 'digit' per uInt. */
#if DECTRACE
printf("DivQuo:");
for (uq=msuq; uq>=lsuq; uq--) printf(" %09ld", (LI)*uq);
printf("\n");
#endif
/* Now convert to BCD for rounding and cleanup, starting from the */
/* most significant end [offset by one into bcdacc to leave room */
/* for a possible carry digit if rounding for REMNEAR is needed] */
for (uq=msuq, ub=bcdacc+1; uq>=lsuq; uq--, ub+=9) {
uInt top, mid, rem; /* work */
if (*uq==0) { /* no split needed */
UBFROMUI(ub, 0); /* clear 9 BCD8s */
UBFROMUI(ub+4, 0); /* .. */
*(ub+8)=0; /* .. */
continue;
}
/* *uq is non-zero -- split the base-billion digit into */
/* hi, mid, and low three-digits */
#define divsplit9 1000000 /* divisor */
#define divsplit6 1000 /* divisor */
/* The splitting is done by simple divides and remainders, */
/* assuming the compiler will optimize these [GCC does] */
top=*uq/divsplit9;
rem=*uq%divsplit9;
mid=rem/divsplit6;
rem=rem%divsplit6;
/* lay out the nine BCD digits (plus one unwanted byte) */
UBFROMUI(ub, UBTOUI(&BIN2BCD8[top*4]));
UBFROMUI(ub+3, UBTOUI(&BIN2BCD8[mid*4]));
UBFROMUI(ub+6, UBTOUI(&BIN2BCD8[rem*4]));
} /* BCD conversion loop */
ub--; /* -> lsu */
/* complete the bcdnum; quodigits is correct, so the position of */
/* the first non-zero is known */
num.msd=bcdacc+1+(msuq-lsuq+1)*9-quodigits;
num.lsd=ub;
/* make exponent adjustments, etc */
if (lsua<acc+DIVACCLEN-DIVOPLEN) { /* used extra digits */
num.exponent-=(Int)((acc+DIVACCLEN-DIVOPLEN-lsua)*9);
/* if the result was exact then there may be up to 8 extra */
/* trailing zeros in the overflowed quotient final unit */
if (*msua==0) {
for (; *ub==0;) ub--; /* drop zeros */
num.exponent+=(Int)(num.lsd-ub); /* and adjust exponent */
num.lsd=ub;
}
} /* adjustment needed */
#if DIVCOUNT
if (divcount>maxcount) { /* new high-water nark */
maxcount=divcount;
printf("DivNewMaxCount: %ld\n", (LI)maxcount);
}
#endif
if (op&DIVIDE) return decFinalize(result, &num, set); /* all done */
/* Is DIVIDEINT or a remainder; there is more to do -- first form */
/* the integer (this is done 'after the fact', unlike as in */
/* decNumber, so as not to tax DIVIDE) */
/* The first non-zero digit will be in the first 9 digits, known */
/* from quodigits and num.msd, so there is always space for DECPMAX */
/* digits */
length=(Int)(num.lsd-num.msd+1);
/*printf("Length exp: %ld %ld\n", (LI)length, (LI)num.exponent); */
if (length+num.exponent>DECPMAX) { /* cannot fit */
decFloatZero(result);
DFWORD(result, 0)=DECFLOAT_qNaN;
set->status|=DEC_Division_impossible;
return result;
}
if (num.exponent>=0) { /* already an int, or need pad zeros */
for (ub=num.lsd+1; ub<=num.lsd+num.exponent; ub++) *ub=0;
num.lsd+=num.exponent;
}
else { /* too long: round or truncate needed */
Int drop=-num.exponent;
if (!(op&REMNEAR)) { /* simple truncate */
num.lsd-=drop;
if (num.lsd<num.msd) { /* truncated all */
num.lsd=num.msd; /* make 0 */
*num.lsd=0; /* .. [sign still relevant] */
}
}
else { /* round to nearest even [sigh] */
/* round-to-nearest, in-place; msd is at or to right of bcdacc+1 */
/* (this is a special case of Quantize -- q.v. for commentary) */
uByte *roundat; /* -> re-round digit */
uByte reround; /* reround value */
*(num.msd-1)=0; /* in case of left carry, or make 0 */
if (drop<length) roundat=num.lsd-drop+1;
else if (drop==length) roundat=num.msd;
else roundat=num.msd-1; /* [-> 0] */
reround=*roundat;
for (ub=roundat+1; ub<=num.lsd; ub++) {
if (*ub!=0) {
reround=DECSTICKYTAB[reround];
break;
}
} /* check stickies */
if (roundat>num.msd) num.lsd=roundat-1;
else {
num.msd--; /* use the 0 .. */
num.lsd=num.msd; /* .. at the new MSD place */
}
if (reround!=0) { /* discarding non-zero */
uInt bump=0;
/* rounding is DEC_ROUND_HALF_EVEN always */
if (reround>5) bump=1; /* >0.5 goes up */
else if (reround==5) /* exactly 0.5000 .. */
bump=*(num.lsd) & 0x01; /* .. up iff [new] lsd is odd */
if (bump!=0) { /* need increment */
/* increment the coefficient; this might end up with 1000... */
ub=num.lsd;
for (; UBTOUI(ub-3)==0x09090909; ub-=4) UBFROMUI(ub-3, 0);
for (; *ub==9; ub--) *ub=0; /* at most 3 more */
*ub+=1;
if (ub<num.msd) num.msd--; /* carried */
} /* bump needed */
} /* reround!=0 */
} /* remnear */
} /* round or truncate needed */
num.exponent=0; /* all paths */
/*decShowNum(&num, "int"); */
if (op&DIVIDEINT) return decFinalize(result, &num, set); /* all done */
/* Have a remainder to calculate */
decFinalize("ient, &num, set); /* lay out the integer so far */
DFWORD("ient, 0)^=DECFLOAT_Sign; /* negate it */
sign=DFWORD(dfl, 0); /* save sign of dfl */
decFloatFMA(result, "ient, dfr, dfl, set);
if (!DFISZERO(result)) return result;
/* if the result is zero the sign shall be sign of dfl */
DFWORD("ient, 0)=sign; /* construct decFloat of sign */
return decFloatCopySign(result, result, "ient);
} /* decDivide */
/* ------------------------------------------------------------------ */
/* decFiniteMultiply -- multiply two finite decFloats */
/* */
/* num gets the result of multiplying dfl and dfr */
/* bcdacc .. with the coefficient in this array */
/* dfl is the first decFloat (lhs) */
/* dfr is the second decFloat (rhs) */
/* */
/* This effects the multiplication of two decFloats, both known to be */
/* finite, leaving the result in a bcdnum ready for decFinalize (for */
/* use in Multiply) or in a following addition (FMA). */
/* */
/* bcdacc must have space for at least DECPMAX9*18+1 bytes. */
/* No error is possible and no status is set. */
/* ------------------------------------------------------------------ */
/* This routine has two separate implementations of the core */
/* multiplication; both using base-billion. One uses only 32-bit */
/* variables (Ints and uInts) or smaller; the other uses uLongs (for */
/* multiplication and addition only). Both implementations cover */
/* both arithmetic sizes (DOUBLE and QUAD) in order to allow timing */
/* comparisons. In any one compilation only one implementation for */
/* each size can be used, and if DECUSE64 is 0 then use of the 32-bit */
/* version is forced. */
/* */
/* Historical note: an earlier version of this code also supported the */
/* 256-bit format and has been preserved. That is somewhat trickier */
/* during lazy carry splitting because the initial quotient estimate */
/* (est) can exceed 32 bits. */
#define MULTBASE ((uInt)BILLION) /* the base used for multiply */
#define MULOPLEN DECPMAX9 /* operand length ('digits' base 10**9) */
#define MULACCLEN (MULOPLEN*2) /* accumulator length (ditto) */
#define LEADZEROS (MULACCLEN*9 - DECPMAX*2) /* leading zeros always */
/* Assertions: exponent not too large and MULACCLEN is a multiple of 4 */
#if DECEMAXD>9
#error Exponent may overflow when doubled for Multiply
#endif
#if MULACCLEN!=(MULACCLEN/4)*4
/* This assumption is used below only for initialization */
#error MULACCLEN is not a multiple of 4
#endif
static void decFiniteMultiply(bcdnum *num, uByte *bcdacc,
const decFloat *dfl, const decFloat *dfr) {
uInt bufl[MULOPLEN]; /* left coefficient (base-billion) */
uInt bufr[MULOPLEN]; /* right coefficient (base-billion) */
uInt *ui, *uj; /* work */
uByte *ub; /* .. */
uInt uiwork; /* for macros */
#if DECUSE64
uLong accl[MULACCLEN]; /* lazy accumulator (base-billion+) */
uLong *pl; /* work -> lazy accumulator */
uInt acc[MULACCLEN]; /* coefficent in base-billion .. */
#else
uInt acc[MULACCLEN*2]; /* accumulator in base-billion .. */
#endif
uInt *pa; /* work -> accumulator */
/*printf("Base10**9: OpLen=%d MulAcclen=%d\n", OPLEN, MULACCLEN); */
/* Calculate sign and exponent */
num->sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
num->exponent=GETEXPUN(dfl)+GETEXPUN(dfr); /* [see assertion above] */
/* Extract the coefficients and prepare the accumulator */
/* the coefficients of the operands are decoded into base-billion */
/* numbers in uInt arrays (bufl and bufr, LSD at offset 0) of the */
/* appropriate size. */
GETCOEFFBILL(dfl, bufl);
GETCOEFFBILL(dfr, bufr);
#if DECTRACE && 0
printf("CoeffbL:");
for (ui=bufl+MULOPLEN-1; ui>=bufl; ui--) printf(" %08lx", (LI)*ui);
printf("\n");
printf("CoeffbR:");
for (uj=bufr+MULOPLEN-1; uj>=bufr; uj--) printf(" %08lx", (LI)*uj);
printf("\n");
#endif
/* start the 64-bit/32-bit differing paths... */
#if DECUSE64
/* zero the accumulator */
#if MULACCLEN==4
accl[0]=0; accl[1]=0; accl[2]=0; accl[3]=0;
#else /* use a loop */
/* MULACCLEN is a multiple of four, asserted above */
for (pl=accl; pl<accl+MULACCLEN; pl+=4) {
*pl=0; *(pl+1)=0; *(pl+2)=0; *(pl+3)=0;/* [reduce overhead] */
} /* pl */
#endif
/* Effect the multiplication */
/* The multiplcation proceeds using MFC's lazy-carry resolution */
/* algorithm from decNumber. First, the multiplication is */
/* effected, allowing accumulation of the partial products (which */
/* are in base-billion at each column position) into 64 bits */
/* without resolving back to base=billion after each addition. */
/* These 64-bit numbers (which may contain up to 19 decimal digits) */
/* are then split using the Clark & Cowlishaw algorithm (see below). */
/* [Testing for 0 in the inner loop is not really a 'win'] */
for (ui=bufr; ui<bufr+MULOPLEN; ui++) { /* over each item in rhs */
if (*ui==0) continue; /* product cannot affect result */
pl=accl+(ui-bufr); /* where to add the lhs */
for (uj=bufl; uj<bufl+MULOPLEN; uj++, pl++) { /* over each item in lhs */
/* if (*uj==0) continue; // product cannot affect result */
*pl+=((uLong)*ui)*(*uj);
} /* uj */
} /* ui */
/* The 64-bit carries must now be resolved; this means that a */
/* quotient/remainder has to be calculated for base-billion (1E+9). */
/* For this, Clark & Cowlishaw's quotient estimation approach (also */
/* used in decNumber) is needed, because 64-bit divide is generally */
/* extremely slow on 32-bit machines, and may be slower than this */
/* approach even on 64-bit machines. This algorithm splits X */
/* using: */
/* */
/* magic=2**(A+B)/1E+9; // 'magic number' */
/* hop=X/2**A; // high order part of X (by shift) */
/* est=magic*hop/2**B // quotient estimate (may be low by 1) */
/* */
/* A and B are quite constrained; hop and magic must fit in 32 bits, */
/* and 2**(A+B) must be as large as possible (which is 2**61 if */
/* magic is to fit). Further, maxX increases with the length of */
/* the operands (and hence the number of partial products */
/* accumulated); maxX is OPLEN*(10**18), which is up to 19 digits. */
/* */
/* It can be shown that when OPLEN is 2 then the maximum error in */
/* the estimated quotient is <1, but for larger maximum x the */
/* maximum error is above 1 so a correction that is >1 may be */
/* needed. Values of A and B are chosen to satisfy the constraints */
/* just mentioned while minimizing the maximum error (and hence the */
/* maximum correction), as shown in the following table: */
/* */
/* Type OPLEN A B maxX maxError maxCorrection */
/* --------------------------------------------------------- */
/* DOUBLE 2 29 32 <2*10**18 0.63 1 */
/* QUAD 4 30 31 <4*10**18 1.17 2 */
/* */
/* In the OPLEN==2 case there is most choice, but the value for B */
/* of 32 has a big advantage as then the calculation of the */
/* estimate requires no shifting; the compiler can extract the high */
/* word directly after multiplying magic*hop. */
#define MULMAGIC 2305843009U /* 2**61/10**9 [both cases] */
#if DOUBLE
#define MULSHIFTA 29
#define MULSHIFTB 32
#elif QUAD
#define MULSHIFTA 30
#define MULSHIFTB 31
#else
#error Unexpected type
#endif
#if DECTRACE
printf("MulAccl:");
for (pl=accl+MULACCLEN-1; pl>=accl; pl--)
printf(" %08lx:%08lx", (LI)(*pl>>32), (LI)(*pl&0xffffffff));
printf("\n");
#endif
for (pl=accl, pa=acc; pl<accl+MULACCLEN; pl++, pa++) { /* each column position */
uInt lo, hop; /* work */
uInt est; /* cannot exceed 4E+9 */
if (*pl>=MULTBASE) {
/* *pl holds a binary number which needs to be split */
hop=(uInt)(*pl>>MULSHIFTA);
est=(uInt)(((uLong)hop*MULMAGIC)>>MULSHIFTB);
/* the estimate is now in est; now calculate hi:lo-est*10**9; */
/* happily the top word of the result is irrelevant because it */
/* will always be zero so this needs only one multiplication */
lo=(uInt)(*pl-((uLong)est*MULTBASE)); /* low word of result */
/* If QUAD, the correction here could be +2 */
if (lo>=MULTBASE) {
lo-=MULTBASE; /* correct by +1 */
est++;
#if QUAD
/* may need to correct by +2 */
if (lo>=MULTBASE) {
lo-=MULTBASE;
est++;
}
#endif
}
/* finally place lo as the new coefficient 'digit' and add est to */
/* the next place up [this is safe because this path is never */
/* taken on the final iteration as *pl will fit] */
*pa=lo;
*(pl+1)+=est;
} /* *pl needed split */
else { /* *pl<MULTBASE */
*pa=(uInt)*pl; /* just copy across */
}
} /* pl loop */
#else /* 32-bit */
for (pa=acc;; pa+=4) { /* zero the accumulator */
*pa=0; *(pa+1)=0; *(pa+2)=0; *(pa+3)=0; /* [reduce overhead] */
if (pa==acc+MULACCLEN*2-4) break; /* multiple of 4 asserted */
} /* pa */
/* Effect the multiplication */
/* uLongs are not available (and in particular, there is no uLong */
/* divide) but it is still possible to use MFC's lazy-carry */
/* resolution algorithm from decNumber. First, the multiplication */
/* is effected, allowing accumulation of the partial products */
/* (which are in base-billion at each column position) into 64 bits */
/* [with the high-order 32 bits in each position being held at */
/* offset +ACCLEN from the low-order 32 bits in the accumulator]. */
/* These 64-bit numbers (which may contain up to 19 decimal digits) */
/* are then split using the Clark & Cowlishaw algorithm (see */
/* below). */
for (ui=bufr;; ui++) { /* over each item in rhs */
uInt hi, lo; /* words of exact multiply result */
pa=acc+(ui-bufr); /* where to add the lhs */
for (uj=bufl;; uj++, pa++) { /* over each item in lhs */
LONGMUL32HI(hi, *ui, *uj); /* calculate product of digits */
lo=(*ui)*(*uj); /* .. */
*pa+=lo; /* accumulate low bits and .. */
*(pa+MULACCLEN)+=hi+(*pa<lo); /* .. high bits with any carry */
if (uj==bufl+MULOPLEN-1) break;
}
if (ui==bufr+MULOPLEN-1) break;
}
/* The 64-bit carries must now be resolved; this means that a */
/* quotient/remainder has to be calculated for base-billion (1E+9). */
/* For this, Clark & Cowlishaw's quotient estimation approach (also */
/* used in decNumber) is needed, because 64-bit divide is generally */
/* extremely slow on 32-bit machines. This algorithm splits X */
/* using: */
/* */
/* magic=2**(A+B)/1E+9; // 'magic number' */
/* hop=X/2**A; // high order part of X (by shift) */
/* est=magic*hop/2**B // quotient estimate (may be low by 1) */
/* */
/* A and B are quite constrained; hop and magic must fit in 32 bits, */
/* and 2**(A+B) must be as large as possible (which is 2**61 if */
/* magic is to fit). Further, maxX increases with the length of */
/* the operands (and hence the number of partial products */
/* accumulated); maxX is OPLEN*(10**18), which is up to 19 digits. */
/* */
/* It can be shown that when OPLEN is 2 then the maximum error in */
/* the estimated quotient is <1, but for larger maximum x the */
/* maximum error is above 1 so a correction that is >1 may be */
/* needed. Values of A and B are chosen to satisfy the constraints */
/* just mentioned while minimizing the maximum error (and hence the */
/* maximum correction), as shown in the following table: */
/* */
/* Type OPLEN A B maxX maxError maxCorrection */
/* --------------------------------------------------------- */
/* DOUBLE 2 29 32 <2*10**18 0.63 1 */
/* QUAD 4 30 31 <4*10**18 1.17 2 */
/* */
/* In the OPLEN==2 case there is most choice, but the value for B */
/* of 32 has a big advantage as then the calculation of the */
/* estimate requires no shifting; the high word is simply */
/* calculated from multiplying magic*hop. */
#define MULMAGIC 2305843009U /* 2**61/10**9 [both cases] */
#if DOUBLE
#define MULSHIFTA 29
#define MULSHIFTB 32
#elif QUAD
#define MULSHIFTA 30
#define MULSHIFTB 31
#else
#error Unexpected type
#endif
#if DECTRACE
printf("MulHiLo:");
for (pa=acc+MULACCLEN-1; pa>=acc; pa--)
printf(" %08lx:%08lx", (LI)*(pa+MULACCLEN), (LI)*pa);
printf("\n");
#endif
for (pa=acc;; pa++) { /* each low uInt */
uInt hi, lo; /* words of exact multiply result */
uInt hop, estlo; /* work */
#if QUAD
uInt esthi; /* .. */
#endif
lo=*pa;
hi=*(pa+MULACCLEN); /* top 32 bits */
/* hi and lo now hold a binary number which needs to be split */
#if DOUBLE
hop=(hi<<3)+(lo>>MULSHIFTA); /* hi:lo/2**29 */
LONGMUL32HI(estlo, hop, MULMAGIC);/* only need the high word */
/* [MULSHIFTB is 32, so estlo can be used directly] */
/* the estimate is now in estlo; now calculate hi:lo-est*10**9; */
/* happily the top word of the result is irrelevant because it */
/* will always be zero so this needs only one multiplication */
lo-=(estlo*MULTBASE);
/* esthi=0; // high word is ignored below */
/* the correction here will be at most +1; do it */
if (lo>=MULTBASE) {
lo-=MULTBASE;
estlo++;
}
#elif QUAD
hop=(hi<<2)+(lo>>MULSHIFTA); /* hi:lo/2**30 */
LONGMUL32HI(esthi, hop, MULMAGIC);/* shift will be 31 .. */
estlo=hop*MULMAGIC; /* .. so low word needed */
estlo=(esthi<<1)+(estlo>>MULSHIFTB); /* [just the top bit] */
/* esthi=0; // high word is ignored below */
lo-=(estlo*MULTBASE); /* as above */
/* the correction here could be +1 or +2 */
if (lo>=MULTBASE) {
lo-=MULTBASE;
estlo++;
}
if (lo>=MULTBASE) {
lo-=MULTBASE;
estlo++;
}
#else
#error Unexpected type
#endif
/* finally place lo as the new accumulator digit and add est to */
/* the next place up; this latter add could cause a carry of 1 */
/* to the high word of the next place */