1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
32
33/* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67#include <math.h>
68#include <math_private.h>
69
70static const _Float128 bp[] = {
71 1,
72 L(1.5),
73};
74
75/* log_2(1.5) */
76static const _Float128 dp_h[] = {
77 0.0,
78 L(5.8496250072115607565592654282227158546448E-1)
79};
80
81/* Low part of log_2(1.5) */
82static const _Float128 dp_l[] = {
83 0.0,
84 L(1.0579781240112554492329533686862998106046E-16)
85};
86
87static const _Float128 zero = 0,
88 one = 1,
89 two = 2,
90 two113 = L(1.0384593717069655257060992658440192E34),
91 huge = L(1.0e3000),
92 tiny = L(1.0e-3000);
93
94/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98static const _Float128 LN[] =
99{
100 L(-3.0779177200290054398792536829702930623200E1),
101 L(6.5135778082209159921251824580292116201640E1),
102 L(-4.6312921812152436921591152809994014413540E1),
103 L(1.2510208195629420304615674658258363295208E1),
104 L(-9.9266909031921425609179910128531667336670E-1)
105};
106static const _Float128 LD[] =
107{
108 L(-5.129862866715009066465422805058933131960E1),
109 L(1.452015077564081884387441590064272782044E2),
110 L(-1.524043275549860505277434040464085593165E2),
111 L(7.236063513651544224319663428634139768808E1),
112 L(-1.494198912340228235853027849917095580053E1)
113 /* 1.0E0 */
114};
115
116/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119static const _Float128 PN[] =
120{
121 L(5.081801691915377692446852383385968225675E8),
122 L(9.360895299872484512023336636427675327355E6),
123 L(4.213701282274196030811629773097579432957E4),
124 L(5.201006511142748908655720086041570288182E1),
125 L(9.088368420359444263703202925095675982530E-3),
126};
127static const _Float128 PD[] =
128{
129 L(3.049081015149226615468111430031590411682E9),
130 L(1.069833887183886839966085436512368982758E8),
131 L(8.259257717868875207333991924545445705394E5),
132 L(1.872583833284143212651746812884298360922E3),
133 /* 1.0E0 */
134};
135
136static const _Float128
137 /* ln 2 */
138 lg2 = L(6.9314718055994530941723212145817656807550E-1),
139 lg2_h = L(6.9314718055994528622676398299518041312695E-1),
140 lg2_l = L(2.3190468138462996154948554638754786504121E-17),
141 ovt = L(8.0085662595372944372e-0017),
142 /* 2/(3*log(2)) */
143 cp = L(9.6179669392597560490661645400126142495110E-1),
144 cp_h = L(9.6179669392597555432899980587535537779331E-1),
145 cp_l = L(5.0577616648125906047157785230014751039424E-17);
146
147_Float128
148__ieee754_powl (_Float128 x, _Float128 y)
149{
150 _Float128 z, ax, z_h, z_l, p_h, p_l;
151 _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
152 _Float128 s2, s_h, s_l, t_h, t_l, ay;
153 int32_t i, j, k, yisint, n;
154 uint32_t ix, iy;
155 int32_t hx, hy;
156 ieee854_long_double_shape_type o, p, q;
157
158 p.value = x;
159 hx = p.parts32.w0;
160 ix = hx & 0x7fffffff;
161
162 q.value = y;
163 hy = q.parts32.w0;
164 iy = hy & 0x7fffffff;
165
166
167 /* y==zero: x**0 = 1 */
168 if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
169 && !issignaling (x))
170 return one;
171
172 /* 1.0**y = 1; -1.0**+-Inf = 1 */
173 if (x == one && !issignaling (y))
174 return one;
175 if (x == -1 && iy == 0x7fff0000
176 && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
177 return one;
178
179 /* +-NaN return x+y */
180 if ((ix > 0x7fff0000)
181 || ((ix == 0x7fff0000)
182 && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
183 || (iy > 0x7fff0000)
184 || ((iy == 0x7fff0000)
185 && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
186 return x + y;
187
188 /* determine if y is an odd int when x < 0
189 * yisint = 0 ... y is not an integer
190 * yisint = 1 ... y is an odd int
191 * yisint = 2 ... y is an even int
192 */
193 yisint = 0;
194 if (hx < 0)
195 {
196 if (iy >= 0x40700000) /* 2^113 */
197 yisint = 2; /* even integer y */
198 else if (iy >= 0x3fff0000) /* 1.0 */
199 {
200 if (__floorl (y) == y)
201 {
202 z = 0.5 * y;
203 if (__floorl (z) == z)
204 yisint = 2;
205 else
206 yisint = 1;
207 }
208 }
209 }
210
211 /* special value of y */
212 if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
213 {
214 if (iy == 0x7fff0000) /* y is +-inf */
215 {
216 if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
217 == 0)
218 return y - y; /* +-1**inf is NaN */
219 else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
220 return (hy >= 0) ? y : zero;
221 else /* (|x|<1)**-,+inf = inf,0 */
222 return (hy < 0) ? -y : zero;
223 }
224 if (iy == 0x3fff0000)
225 { /* y is +-1 */
226 if (hy < 0)
227 return one / x;
228 else
229 return x;
230 }
231 if (hy == 0x40000000)
232 return x * x; /* y is 2 */
233 if (hy == 0x3ffe0000)
234 { /* y is 0.5 */
235 if (hx >= 0) /* x >= +0 */
236 return __ieee754_sqrtl (x);
237 }
238 }
239
240 ax = fabsl (x);
241 /* special value of x */
242 if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
243 {
244 if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
245 {
246 z = ax; /*x is +-0,+-inf,+-1 */
247 if (hy < 0)
248 z = one / z; /* z = (1/|x|) */
249 if (hx < 0)
250 {
251 if (((ix - 0x3fff0000) | yisint) == 0)
252 {
253 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
254 }
255 else if (yisint == 1)
256 z = -z; /* (x<0)**odd = -(|x|**odd) */
257 }
258 return z;
259 }
260 }
261
262 /* (x<0)**(non-int) is NaN */
263 if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
264 return (x - x) / (x - x);
265
266 /* sgn (sign of result -ve**odd) = -1 else = 1 */
267 sgn = one;
268 if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
269 sgn = -one; /* (-ve)**(odd int) */
270
271 /* |y| is huge.
272 2^-16495 = 1/2 of smallest representable value.
273 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
274 if (iy > 0x401d654b)
275 {
276 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
277 if (iy > 0x407d654b)
278 {
279 if (ix <= 0x3ffeffff)
280 return (hy < 0) ? huge * huge : tiny * tiny;
281 if (ix >= 0x3fff0000)
282 return (hy > 0) ? huge * huge : tiny * tiny;
283 }
284 /* over/underflow if x is not close to one */
285 if (ix < 0x3ffeffff)
286 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
287 if (ix > 0x3fff0000)
288 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
289 }
290
291 ay = y > 0 ? y : -y;
292 if (ay < 0x1p-128)
293 y = y < 0 ? -0x1p-128 : 0x1p-128;
294
295 n = 0;
296 /* take care subnormal number */
297 if (ix < 0x00010000)
298 {
299 ax *= two113;
300 n -= 113;
301 o.value = ax;
302 ix = o.parts32.w0;
303 }
304 n += ((ix) >> 16) - 0x3fff;
305 j = ix & 0x0000ffff;
306 /* determine interval */
307 ix = j | 0x3fff0000; /* normalize ix */
308 if (j <= 0x3988)
309 k = 0; /* |x|<sqrt(3/2) */
310 else if (j < 0xbb67)
311 k = 1; /* |x|<sqrt(3) */
312 else
313 {
314 k = 0;
315 n += 1;
316 ix -= 0x00010000;
317 }
318
319 o.value = ax;
320 o.parts32.w0 = ix;
321 ax = o.value;
322
323 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
324 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
325 v = one / (ax + bp[k]);
326 s = u * v;
327 s_h = s;
328
329 o.value = s_h;
330 o.parts32.w3 = 0;
331 o.parts32.w2 &= 0xf8000000;
332 s_h = o.value;
333 /* t_h=ax+bp[k] High */
334 t_h = ax + bp[k];
335 o.value = t_h;
336 o.parts32.w3 = 0;
337 o.parts32.w2 &= 0xf8000000;
338 t_h = o.value;
339 t_l = ax - (t_h - bp[k]);
340 s_l = v * ((u - s_h * t_h) - s_h * t_l);
341 /* compute log(ax) */
342 s2 = s * s;
343 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
344 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
345 r = s2 * s2 * u / v;
346 r += s_l * (s_h + s);
347 s2 = s_h * s_h;
348 t_h = 3.0 + s2 + r;
349 o.value = t_h;
350 o.parts32.w3 = 0;
351 o.parts32.w2 &= 0xf8000000;
352 t_h = o.value;
353 t_l = r - ((t_h - 3.0) - s2);
354 /* u+v = s*(1+...) */
355 u = s_h * t_h;
356 v = s_l * t_h + t_l * s;
357 /* 2/(3log2)*(s+...) */
358 p_h = u + v;
359 o.value = p_h;
360 o.parts32.w3 = 0;
361 o.parts32.w2 &= 0xf8000000;
362 p_h = o.value;
363 p_l = v - (p_h - u);
364 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
365 z_l = cp_l * p_h + p_l * cp + dp_l[k];
366 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
367 t = (_Float128) n;
368 t1 = (((z_h + z_l) + dp_h[k]) + t);
369 o.value = t1;
370 o.parts32.w3 = 0;
371 o.parts32.w2 &= 0xf8000000;
372 t1 = o.value;
373 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
374
375 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
376 y1 = y;
377 o.value = y1;
378 o.parts32.w3 = 0;
379 o.parts32.w2 &= 0xf8000000;
380 y1 = o.value;
381 p_l = (y - y1) * t1 + y * t2;
382 p_h = y1 * t1;
383 z = p_l + p_h;
384 o.value = z;
385 j = o.parts32.w0;
386 if (j >= 0x400d0000) /* z >= 16384 */
387 {
388 /* if z > 16384 */
389 if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
390 return sgn * huge * huge; /* overflow */
391 else
392 {
393 if (p_l + ovt > z - p_h)
394 return sgn * huge * huge; /* overflow */
395 }
396 }
397 else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
398 {
399 /* z < -16495 */
400 if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
401 != 0)
402 return sgn * tiny * tiny; /* underflow */
403 else
404 {
405 if (p_l <= z - p_h)
406 return sgn * tiny * tiny; /* underflow */
407 }
408 }
409 /* compute 2**(p_h+p_l) */
410 i = j & 0x7fffffff;
411 k = (i >> 16) - 0x3fff;
412 n = 0;
413 if (i > 0x3ffe0000)
414 { /* if |z| > 0.5, set n = [z+0.5] */
415 n = __floorl (z + L(0.5));
416 t = n;
417 p_h -= t;
418 }
419 t = p_l + p_h;
420 o.value = t;
421 o.parts32.w3 = 0;
422 o.parts32.w2 &= 0xf8000000;
423 t = o.value;
424 u = t * lg2_h;
425 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
426 z = u + v;
427 w = v - (z - u);
428 /* exp(z) */
429 t = z * z;
430 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
431 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
432 t1 = z - t * u / v;
433 r = (z * t1) / (t1 - two) - (w + z * w);
434 z = one - (r - z);
435 o.value = z;
436 j = o.parts32.w0;
437 j += (n << 16);
438 if ((j >> 16) <= 0)
439 {
440 z = __scalbnl (z, n); /* subnormal output */
441 _Float128 force_underflow = z * z;
442 math_force_eval (force_underflow);
443 }
444 else
445 {
446 o.parts32.w0 = j;
447 z = o.value;
448 }
449 return sgn * z;
450}
451strong_alias (__ieee754_powl, __powl_finite)
452