1/*
2 * IBM Accurate Mathematical Library
3 * written by International Business Machines Corp.
4 * Copyright (C) 2001-2018 Free Software Foundation, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19/************************************************************************/
20/* MODULE_NAME: mpa.c */
21/* */
22/* FUNCTIONS: */
23/* mcr */
24/* acr */
25/* cpy */
26/* norm */
27/* denorm */
28/* mp_dbl */
29/* dbl_mp */
30/* add_magnitudes */
31/* sub_magnitudes */
32/* add */
33/* sub */
34/* mul */
35/* inv */
36/* dvd */
37/* */
38/* Arithmetic functions for multiple precision numbers. */
39/* Relative errors are bounded */
40/************************************************************************/
41
42
43#include "endian.h"
44#include "mpa.h"
45#include <sys/param.h>
46#include <alloca.h>
47
48#ifndef SECTION
49# define SECTION
50#endif
51
52#ifndef NO__CONST
53const mp_no __mpone = { 1, { 1.0, 1.0 } };
54const mp_no __mptwo = { 1, { 1.0, 2.0 } };
55#endif
56
57#ifndef NO___ACR
58/* Compare mantissa of two multiple precision numbers regardless of the sign
59 and exponent of the numbers. */
60static int
61mcr (const mp_no *x, const mp_no *y, int p)
62{
63 long i;
64 long p2 = p;
65 for (i = 1; i <= p2; i++)
66 {
67 if (X[i] == Y[i])
68 continue;
69 else if (X[i] > Y[i])
70 return 1;
71 else
72 return -1;
73 }
74 return 0;
75}
76
77/* Compare the absolute values of two multiple precision numbers. */
78int
79__acr (const mp_no *x, const mp_no *y, int p)
80{
81 long i;
82
83 if (X[0] == 0)
84 {
85 if (Y[0] == 0)
86 i = 0;
87 else
88 i = -1;
89 }
90 else if (Y[0] == 0)
91 i = 1;
92 else
93 {
94 if (EX > EY)
95 i = 1;
96 else if (EX < EY)
97 i = -1;
98 else
99 i = mcr (x, y, p);
100 }
101
102 return i;
103}
104#endif
105
106#ifndef NO___CPY
107/* Copy multiple precision number X into Y. They could be the same
108 number. */
109void
110__cpy (const mp_no *x, mp_no *y, int p)
111{
112 long i;
113
114 EY = EX;
115 for (i = 0; i <= p; i++)
116 Y[i] = X[i];
117}
118#endif
119
120#ifndef NO___MP_DBL
121/* Convert a multiple precision number *X into a double precision
122 number *Y, normalized case (|x| >= 2**(-1022))). X has precision
123 P, which is positive. */
124static void
125norm (const mp_no *x, double *y, int p)
126{
127# define R RADIXI
128 long i;
129 double c;
130 mantissa_t a, u, v, z[5];
131 if (p < 5)
132 {
133 if (p == 1)
134 c = X[1];
135 else if (p == 2)
136 c = X[1] + R * X[2];
137 else if (p == 3)
138 c = X[1] + R * (X[2] + R * X[3]);
139 else /* p == 4. */
140 c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
141 }
142 else
143 {
144 for (a = 1, z[1] = X[1]; z[1] < TWO23; )
145 {
146 a *= 2;
147 z[1] *= 2;
148 }
149
150 for (i = 2; i < 5; i++)
151 {
152 mantissa_store_t d, r;
153 d = X[i] * (mantissa_store_t) a;
154 DIV_RADIX (d, r);
155 z[i] = r;
156 z[i - 1] += d;
157 }
158
159 u = ALIGN_DOWN_TO (z[3], TWO19);
160 v = z[3] - u;
161
162 if (v == TWO18)
163 {
164 if (z[4] == 0)
165 {
166 for (i = 5; i <= p; i++)
167 {
168 if (X[i] == 0)
169 continue;
170 else
171 {
172 z[3] += 1;
173 break;
174 }
175 }
176 }
177 else
178 z[3] += 1;
179 }
180
181 c = (z[1] + R * (z[2] + R * z[3])) / a;
182 }
183
184 c *= X[0];
185
186 for (i = 1; i < EX; i++)
187 c *= RADIX;
188 for (i = 1; i > EX; i--)
189 c *= RADIXI;
190
191 *y = c;
192# undef R
193}
194
195/* Convert a multiple precision number *X into a double precision
196 number *Y, Denormal case (|x| < 2**(-1022))). */
197static void
198denorm (const mp_no *x, double *y, int p)
199{
200 long i, k;
201 long p2 = p;
202 double c;
203 mantissa_t u, z[5];
204
205# define R RADIXI
206 if (EX < -44 || (EX == -44 && X[1] < TWO5))
207 {
208 *y = 0;
209 return;
210 }
211
212 if (p2 == 1)
213 {
214 if (EX == -42)
215 {
216 z[1] = X[1] + TWO10;
217 z[2] = 0;
218 z[3] = 0;
219 k = 3;
220 }
221 else if (EX == -43)
222 {
223 z[1] = TWO10;
224 z[2] = X[1];
225 z[3] = 0;
226 k = 2;
227 }
228 else
229 {
230 z[1] = TWO10;
231 z[2] = 0;
232 z[3] = X[1];
233 k = 1;
234 }
235 }
236 else if (p2 == 2)
237 {
238 if (EX == -42)
239 {
240 z[1] = X[1] + TWO10;
241 z[2] = X[2];
242 z[3] = 0;
243 k = 3;
244 }
245 else if (EX == -43)
246 {
247 z[1] = TWO10;
248 z[2] = X[1];
249 z[3] = X[2];
250 k = 2;
251 }
252 else
253 {
254 z[1] = TWO10;
255 z[2] = 0;
256 z[3] = X[1];
257 k = 1;
258 }
259 }
260 else
261 {
262 if (EX == -42)
263 {
264 z[1] = X[1] + TWO10;
265 z[2] = X[2];
266 k = 3;
267 }
268 else if (EX == -43)
269 {
270 z[1] = TWO10;
271 z[2] = X[1];
272 k = 2;
273 }
274 else
275 {
276 z[1] = TWO10;
277 z[2] = 0;
278 k = 1;
279 }
280 z[3] = X[k];
281 }
282
283 u = ALIGN_DOWN_TO (z[3], TWO5);
284
285 if (u == z[3])
286 {
287 for (i = k + 1; i <= p2; i++)
288 {
289 if (X[i] == 0)
290 continue;
291 else
292 {
293 z[3] += 1;
294 break;
295 }
296 }
297 }
298
299 c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
300
301 *y = c * TWOM1032;
302# undef R
303}
304
305/* Convert multiple precision number *X into double precision number *Y. The
306 result is correctly rounded to the nearest/even. */
307void
308__mp_dbl (const mp_no *x, double *y, int p)
309{
310 if (X[0] == 0)
311 {
312 *y = 0;
313 return;
314 }
315
316 if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
317 norm (x, y, p);
318 else
319 denorm (x, y, p);
320}
321#endif
322
323/* Get the multiple precision equivalent of X into *Y. If the precision is too
324 small, the result is truncated. */
325void
326SECTION
327__dbl_mp (double x, mp_no *y, int p)
328{
329 long i, n;
330 long p2 = p;
331
332 /* Sign. */
333 if (x == 0)
334 {
335 Y[0] = 0;
336 return;
337 }
338 else if (x > 0)
339 Y[0] = 1;
340 else
341 {
342 Y[0] = -1;
343 x = -x;
344 }
345
346 /* Exponent. */
347 for (EY = 1; x >= RADIX; EY += 1)
348 x *= RADIXI;
349 for (; x < 1; EY -= 1)
350 x *= RADIX;
351
352 /* Digits. */
353 n = MIN (p2, 4);
354 for (i = 1; i <= n; i++)
355 {
356 INTEGER_OF (x, Y[i]);
357 x *= RADIX;
358 }
359 for (; i <= p2; i++)
360 Y[i] = 0;
361}
362
363/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
364 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
365 Y and Z. No guard digit is used. The result equals the exact sum,
366 truncated. */
367static void
368SECTION
369add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
370{
371 long i, j, k;
372 long p2 = p;
373 mantissa_t zk;
374
375 EZ = EX;
376
377 i = p2;
378 j = p2 + EY - EX;
379 k = p2 + 1;
380
381 if (__glibc_unlikely (j < 1))
382 {
383 __cpy (x, z, p);
384 return;
385 }
386
387 zk = 0;
388
389 for (; j > 0; i--, j--)
390 {
391 zk += X[i] + Y[j];
392 if (zk >= RADIX)
393 {
394 Z[k--] = zk - RADIX;
395 zk = 1;
396 }
397 else
398 {
399 Z[k--] = zk;
400 zk = 0;
401 }
402 }
403
404 for (; i > 0; i--)
405 {
406 zk += X[i];
407 if (zk >= RADIX)
408 {
409 Z[k--] = zk - RADIX;
410 zk = 1;
411 }
412 else
413 {
414 Z[k--] = zk;
415 zk = 0;
416 }
417 }
418
419 if (zk == 0)
420 {
421 for (i = 1; i <= p2; i++)
422 Z[i] = Z[i + 1];
423 }
424 else
425 {
426 Z[1] = zk;
427 EZ += 1;
428 }
429}
430
431/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
432 The sign of the difference *Z is not changed. X and Y may overlap but not X
433 and Z or Y and Z. One guard digit is used. The error is less than one
434 ULP. */
435static void
436SECTION
437sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
438{
439 long i, j, k;
440 long p2 = p;
441 mantissa_t zk;
442
443 EZ = EX;
444 i = p2;
445 j = p2 + EY - EX;
446 k = p2;
447
448 /* Y is too small compared to X, copy X over to the result. */
449 if (__glibc_unlikely (j < 1))
450 {
451 __cpy (x, z, p);
452 return;
453 }
454
455 /* The relevant least significant digit in Y is non-zero, so we factor it in
456 to enhance accuracy. */
457 if (j < p2 && Y[j + 1] > 0)
458 {
459 Z[k + 1] = RADIX - Y[j + 1];
460 zk = -1;
461 }
462 else
463 zk = Z[k + 1] = 0;
464
465 /* Subtract and borrow. */
466 for (; j > 0; i--, j--)
467 {
468 zk += (X[i] - Y[j]);
469 if (zk < 0)
470 {
471 Z[k--] = zk + RADIX;
472 zk = -1;
473 }
474 else
475 {
476 Z[k--] = zk;
477 zk = 0;
478 }
479 }
480
481 /* We're done with digits from Y, so it's just digits in X. */
482 for (; i > 0; i--)
483 {
484 zk += X[i];
485 if (zk < 0)
486 {
487 Z[k--] = zk + RADIX;
488 zk = -1;
489 }
490 else
491 {
492 Z[k--] = zk;
493 zk = 0;
494 }
495 }
496
497 /* Normalize. */
498 for (i = 1; Z[i] == 0; i++)
499 ;
500 EZ = EZ - i + 1;
501 for (k = 1; i <= p2 + 1; )
502 Z[k++] = Z[i++];
503 for (; k <= p2; )
504 Z[k++] = 0;
505}
506
507/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
508 and Z or Y and Z. One guard digit is used. The error is less than one
509 ULP. */
510void
511SECTION
512__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
513{
514 int n;
515
516 if (X[0] == 0)
517 {
518 __cpy (y, z, p);
519 return;
520 }
521 else if (Y[0] == 0)
522 {
523 __cpy (x, z, p);
524 return;
525 }
526
527 if (X[0] == Y[0])
528 {
529 if (__acr (x, y, p) > 0)
530 {
531 add_magnitudes (x, y, z, p);
532 Z[0] = X[0];
533 }
534 else
535 {
536 add_magnitudes (y, x, z, p);
537 Z[0] = Y[0];
538 }
539 }
540 else
541 {
542 if ((n = __acr (x, y, p)) == 1)
543 {
544 sub_magnitudes (x, y, z, p);
545 Z[0] = X[0];
546 }
547 else if (n == -1)
548 {
549 sub_magnitudes (y, x, z, p);
550 Z[0] = Y[0];
551 }
552 else
553 Z[0] = 0;
554 }
555}
556
557/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
558 not X and Z or Y and Z. One guard digit is used. The error is less than
559 one ULP. */
560void
561SECTION
562__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
563{
564 int n;
565
566 if (X[0] == 0)
567 {
568 __cpy (y, z, p);
569 Z[0] = -Z[0];
570 return;
571 }
572 else if (Y[0] == 0)
573 {
574 __cpy (x, z, p);
575 return;
576 }
577
578 if (X[0] != Y[0])
579 {
580 if (__acr (x, y, p) > 0)
581 {
582 add_magnitudes (x, y, z, p);
583 Z[0] = X[0];
584 }
585 else
586 {
587 add_magnitudes (y, x, z, p);
588 Z[0] = -Y[0];
589 }
590 }
591 else
592 {
593 if ((n = __acr (x, y, p)) == 1)
594 {
595 sub_magnitudes (x, y, z, p);
596 Z[0] = X[0];
597 }
598 else if (n == -1)
599 {
600 sub_magnitudes (y, x, z, p);
601 Z[0] = -Y[0];
602 }
603 else
604 Z[0] = 0;
605 }
606}
607
608#ifndef NO__MUL
609/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
610 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
611 digits. In case P > 3 the error is bounded by 1.001 ULP. */
612void
613SECTION
614__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
615{
616 long i, j, k, ip, ip2;
617 long p2 = p;
618 mantissa_store_t zk;
619 const mp_no *a;
620 mantissa_store_t *diag;
621
622 /* Is z=0? */
623 if (__glibc_unlikely (X[0] * Y[0] == 0))
624 {
625 Z[0] = 0;
626 return;
627 }
628
629 /* We need not iterate through all X's and Y's since it's pointless to
630 multiply zeroes. Here, both are zero... */
631 for (ip2 = p2; ip2 > 0; ip2--)
632 if (X[ip2] != 0 || Y[ip2] != 0)
633 break;
634
635 a = X[ip2] != 0 ? y : x;
636
637 /* ... and here, at least one of them is still zero. */
638 for (ip = ip2; ip > 0; ip--)
639 if (a->d[ip] != 0)
640 break;
641
642 /* The product looks like this for p = 3 (as an example):
643
644
645 a1 a2 a3
646 x b1 b2 b3
647 -----------------------------
648 a1*b3 a2*b3 a3*b3
649 a1*b2 a2*b2 a3*b2
650 a1*b1 a2*b1 a3*b1
651
652 So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
653 for P >= 3. We compute the above digits in two parts; the last P-1
654 digits and then the first P digits. The last P-1 digits are a sum of
655 products of the input digits from P to P-k where K is 0 for the least
656 significant digit and increases as we go towards the left. The product
657 term is of the form X[k]*X[P-k] as can be seen in the above example.
658
659 The first P digits are also a sum of products with the same product term,
660 except that the sum is from 1 to k. This is also evident from the above
661 example.
662
663 Another thing that becomes evident is that only the most significant
664 ip+ip2 digits of the result are non-zero, where ip and ip2 are the
665 'internal precision' of the input numbers, i.e. digits after ip and ip2
666 are all 0. */
667
668 k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
669
670 while (k > ip + ip2 + 1)
671 Z[k--] = 0;
672
673 zk = 0;
674
675 /* Precompute sums of diagonal elements so that we can directly use them
676 later. See the next comment to know we why need them. */
677 diag = alloca (k * sizeof (mantissa_store_t));
678 mantissa_store_t d = 0;
679 for (i = 1; i <= ip; i++)
680 {
681 d += X[i] * (mantissa_store_t) Y[i];
682 diag[i] = d;
683 }
684 while (i < k)
685 diag[i++] = d;
686
687 while (k > p2)
688 {
689 long lim = k / 2;
690
691 if (k % 2 == 0)
692 /* We want to add this only once, but since we subtract it in the sum
693 of products above, we add twice. */
694 zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
695
696 for (i = k - p2, j = p2; i < j; i++, j--)
697 zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
698
699 zk -= diag[k - 1];
700
701 DIV_RADIX (zk, Z[k]);
702 k--;
703 }
704
705 /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
706 goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
707 number of multiplications, we halve the range and if k is an even number,
708 add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
709 X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
710
711 This reduction tells us that we're summing two things, the first term
712 through the half range and the negative of the sum of the product of all
713 terms of X and Y in the full range. i.e.
714
715 SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
716 a single loop so that it completes in O(n) time and can hence be directly
717 used in the loop below. */
718 while (k > 1)
719 {
720 long lim = k / 2;
721
722 if (k % 2 == 0)
723 /* We want to add this only once, but since we subtract it in the sum
724 of products above, we add twice. */
725 zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
726
727 for (i = 1, j = k - 1; i < j; i++, j--)
728 zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
729
730 zk -= diag[k - 1];
731
732 DIV_RADIX (zk, Z[k]);
733 k--;
734 }
735 Z[k] = zk;
736
737 /* Get the exponent sum into an intermediate variable. This is a subtle
738 optimization, where given enough registers, all operations on the exponent
739 happen in registers and the result is written out only once into EZ. */
740 int e = EX + EY;
741
742 /* Is there a carry beyond the most significant digit? */
743 if (__glibc_unlikely (Z[1] == 0))
744 {
745 for (i = 1; i <= p2; i++)
746 Z[i] = Z[i + 1];
747 e--;
748 }
749
750 EZ = e;
751 Z[0] = X[0] * Y[0];
752}
753#endif
754
755#ifndef NO__SQR
756/* Square *X and store result in *Y. X and Y may not overlap. For P in
757 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
758 error is bounded by 1.001 ULP. This is a faster special case of
759 multiplication. */
760void
761SECTION
762__sqr (const mp_no *x, mp_no *y, int p)
763{
764 long i, j, k, ip;
765 mantissa_store_t yk;
766
767 /* Is z=0? */
768 if (__glibc_unlikely (X[0] == 0))
769 {
770 Y[0] = 0;
771 return;
772 }
773
774 /* We need not iterate through all X's since it's pointless to
775 multiply zeroes. */
776 for (ip = p; ip > 0; ip--)
777 if (X[ip] != 0)
778 break;
779
780 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
781
782 while (k > 2 * ip + 1)
783 Y[k--] = 0;
784
785 yk = 0;
786
787 while (k > p)
788 {
789 mantissa_store_t yk2 = 0;
790 long lim = k / 2;
791
792 if (k % 2 == 0)
793 yk += X[lim] * (mantissa_store_t) X[lim];
794
795 /* In __mul, this loop (and the one within the next while loop) run
796 between a range to calculate the mantissa as follows:
797
798 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
799 + X[n] * Y[k]
800
801 For X == Y, we can get away with summing halfway and doubling the
802 result. For cases where the range size is even, the mid-point needs
803 to be added separately (above). */
804 for (i = k - p, j = p; i < j; i++, j--)
805 yk2 += X[i] * (mantissa_store_t) X[j];
806
807 yk += 2 * yk2;
808
809 DIV_RADIX (yk, Y[k]);
810 k--;
811 }
812
813 while (k > 1)
814 {
815 mantissa_store_t yk2 = 0;
816 long lim = k / 2;
817
818 if (k % 2 == 0)
819 yk += X[lim] * (mantissa_store_t) X[lim];
820
821 /* Likewise for this loop. */
822 for (i = 1, j = k - 1; i < j; i++, j--)
823 yk2 += X[i] * (mantissa_store_t) X[j];
824
825 yk += 2 * yk2;
826
827 DIV_RADIX (yk, Y[k]);
828 k--;
829 }
830 Y[k] = yk;
831
832 /* Squares are always positive. */
833 Y[0] = 1;
834
835 /* Get the exponent sum into an intermediate variable. This is a subtle
836 optimization, where given enough registers, all operations on the exponent
837 happen in registers and the result is written out only once into EZ. */
838 int e = EX * 2;
839
840 /* Is there a carry beyond the most significant digit? */
841 if (__glibc_unlikely (Y[1] == 0))
842 {
843 for (i = 1; i <= p; i++)
844 Y[i] = Y[i + 1];
845 e--;
846 }
847
848 EY = e;
849}
850#endif
851
852/* Invert *X and store in *Y. Relative error bound:
853 - For P = 2: 1.001 * R ^ (1 - P)
854 - For P = 3: 1.063 * R ^ (1 - P)
855 - For P > 3: 2.001 * R ^ (1 - P)
856
857 *X = 0 is not permissible. */
858static void
859SECTION
860__inv (const mp_no *x, mp_no *y, int p)
861{
862 long i;
863 double t;
864 mp_no z, w;
865 static const int np1[] =
866 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
867 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
868 };
869
870 __cpy (x, &z, p);
871 z.e = 0;
872 __mp_dbl (&z, &t, p);
873 t = 1 / t;
874 __dbl_mp (t, y, p);
875 EY -= EX;
876
877 for (i = 0; i < np1[p]; i++)
878 {
879 __cpy (y, &w, p);
880 __mul (x, &w, y, p);
881 __sub (&__mptwo, y, &z, p);
882 __mul (&w, &z, y, p);
883 }
884}
885
886/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
887 or Y and Z. Relative error bound:
888 - For P = 2: 2.001 * R ^ (1 - P)
889 - For P = 3: 2.063 * R ^ (1 - P)
890 - For P > 3: 3.001 * R ^ (1 - P)
891
892 *X = 0 is not permissible. */
893void
894SECTION
895__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
896{
897 mp_no w;
898
899 if (X[0] == 0)
900 Z[0] = 0;
901 else
902 {
903 __inv (y, &w, p);
904 __mul (x, &w, z, p);
905 }
906}
907