1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
32
33/* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67#include <math.h>
68#include <math-barriers.h>
69#include <math_private.h>
70
71static const _Float128 bp[] = {
72 1,
73 L(1.5),
74};
75
76/* log_2(1.5) */
77static const _Float128 dp_h[] = {
78 0.0,
79 L(5.8496250072115607565592654282227158546448E-1)
80};
81
82/* Low part of log_2(1.5) */
83static const _Float128 dp_l[] = {
84 0.0,
85 L(1.0579781240112554492329533686862998106046E-16)
86};
87
88static const _Float128 zero = 0,
89 one = 1,
90 two = 2,
91 two113 = L(1.0384593717069655257060992658440192E34),
92 huge = L(1.0e3000),
93 tiny = L(1.0e-3000);
94
95/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
96 z = (x-1)/(x+1)
97 1 <= x <= 1.25
98 Peak relative error 2.3e-37 */
99static const _Float128 LN[] =
100{
101 L(-3.0779177200290054398792536829702930623200E1),
102 L(6.5135778082209159921251824580292116201640E1),
103 L(-4.6312921812152436921591152809994014413540E1),
104 L(1.2510208195629420304615674658258363295208E1),
105 L(-9.9266909031921425609179910128531667336670E-1)
106};
107static const _Float128 LD[] =
108{
109 L(-5.129862866715009066465422805058933131960E1),
110 L(1.452015077564081884387441590064272782044E2),
111 L(-1.524043275549860505277434040464085593165E2),
112 L(7.236063513651544224319663428634139768808E1),
113 L(-1.494198912340228235853027849917095580053E1)
114 /* 1.0E0 */
115};
116
117/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
118 0 <= x <= 0.5
119 Peak relative error 5.7e-38 */
120static const _Float128 PN[] =
121{
122 L(5.081801691915377692446852383385968225675E8),
123 L(9.360895299872484512023336636427675327355E6),
124 L(4.213701282274196030811629773097579432957E4),
125 L(5.201006511142748908655720086041570288182E1),
126 L(9.088368420359444263703202925095675982530E-3),
127};
128static const _Float128 PD[] =
129{
130 L(3.049081015149226615468111430031590411682E9),
131 L(1.069833887183886839966085436512368982758E8),
132 L(8.259257717868875207333991924545445705394E5),
133 L(1.872583833284143212651746812884298360922E3),
134 /* 1.0E0 */
135};
136
137static const _Float128
138 /* ln 2 */
139 lg2 = L(6.9314718055994530941723212145817656807550E-1),
140 lg2_h = L(6.9314718055994528622676398299518041312695E-1),
141 lg2_l = L(2.3190468138462996154948554638754786504121E-17),
142 ovt = L(8.0085662595372944372e-0017),
143 /* 2/(3*log(2)) */
144 cp = L(9.6179669392597560490661645400126142495110E-1),
145 cp_h = L(9.6179669392597555432899980587535537779331E-1),
146 cp_l = L(5.0577616648125906047157785230014751039424E-17);
147
148_Float128
149__ieee754_powl (_Float128 x, _Float128 y)
150{
151 _Float128 z, ax, z_h, z_l, p_h, p_l;
152 _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
153 _Float128 s2, s_h, s_l, t_h, t_l, ay;
154 int32_t i, j, k, yisint, n;
155 uint32_t ix, iy;
156 int32_t hx, hy;
157 ieee854_long_double_shape_type o, p, q;
158
159 p.value = x;
160 hx = p.parts32.w0;
161 ix = hx & 0x7fffffff;
162
163 q.value = y;
164 hy = q.parts32.w0;
165 iy = hy & 0x7fffffff;
166
167
168 /* y==zero: x**0 = 1 */
169 if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
170 && !issignaling (x))
171 return one;
172
173 /* 1.0**y = 1; -1.0**+-Inf = 1 */
174 if (x == one && !issignaling (y))
175 return one;
176 if (x == -1 && iy == 0x7fff0000
177 && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
178 return one;
179
180 /* +-NaN return x+y */
181 if ((ix > 0x7fff0000)
182 || ((ix == 0x7fff0000)
183 && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
184 || (iy > 0x7fff0000)
185 || ((iy == 0x7fff0000)
186 && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
187 return x + y;
188
189 /* determine if y is an odd int when x < 0
190 * yisint = 0 ... y is not an integer
191 * yisint = 1 ... y is an odd int
192 * yisint = 2 ... y is an even int
193 */
194 yisint = 0;
195 if (hx < 0)
196 {
197 if (iy >= 0x40700000) /* 2^113 */
198 yisint = 2; /* even integer y */
199 else if (iy >= 0x3fff0000) /* 1.0 */
200 {
201 if (floorl (y) == y)
202 {
203 z = 0.5 * y;
204 if (floorl (z) == z)
205 yisint = 2;
206 else
207 yisint = 1;
208 }
209 }
210 }
211
212 /* special value of y */
213 if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
214 {
215 if (iy == 0x7fff0000) /* y is +-inf */
216 {
217 if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
218 == 0)
219 return y - y; /* +-1**inf is NaN */
220 else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
221 return (hy >= 0) ? y : zero;
222 else /* (|x|<1)**-,+inf = inf,0 */
223 return (hy < 0) ? -y : zero;
224 }
225 if (iy == 0x3fff0000)
226 { /* y is +-1 */
227 if (hy < 0)
228 return one / x;
229 else
230 return x;
231 }
232 if (hy == 0x40000000)
233 return x * x; /* y is 2 */
234 if (hy == 0x3ffe0000)
235 { /* y is 0.5 */
236 if (hx >= 0) /* x >= +0 */
237 return sqrtl (x);
238 }
239 }
240
241 ax = fabsl (x);
242 /* special value of x */
243 if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
244 {
245 if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
246 {
247 z = ax; /*x is +-0,+-inf,+-1 */
248 if (hy < 0)
249 z = one / z; /* z = (1/|x|) */
250 if (hx < 0)
251 {
252 if (((ix - 0x3fff0000) | yisint) == 0)
253 {
254 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
255 }
256 else if (yisint == 1)
257 z = -z; /* (x<0)**odd = -(|x|**odd) */
258 }
259 return z;
260 }
261 }
262
263 /* (x<0)**(non-int) is NaN */
264 if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
265 return (x - x) / (x - x);
266
267 /* sgn (sign of result -ve**odd) = -1 else = 1 */
268 sgn = one;
269 if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
270 sgn = -one; /* (-ve)**(odd int) */
271
272 /* |y| is huge.
273 2^-16495 = 1/2 of smallest representable value.
274 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
275 if (iy > 0x401d654b)
276 {
277 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
278 if (iy > 0x407d654b)
279 {
280 if (ix <= 0x3ffeffff)
281 return (hy < 0) ? huge * huge : tiny * tiny;
282 if (ix >= 0x3fff0000)
283 return (hy > 0) ? huge * huge : tiny * tiny;
284 }
285 /* over/underflow if x is not close to one */
286 if (ix < 0x3ffeffff)
287 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
288 if (ix > 0x3fff0000)
289 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
290 }
291
292 ay = y > 0 ? y : -y;
293 if (ay < 0x1p-128)
294 y = y < 0 ? -0x1p-128 : 0x1p-128;
295
296 n = 0;
297 /* take care subnormal number */
298 if (ix < 0x00010000)
299 {
300 ax *= two113;
301 n -= 113;
302 o.value = ax;
303 ix = o.parts32.w0;
304 }
305 n += ((ix) >> 16) - 0x3fff;
306 j = ix & 0x0000ffff;
307 /* determine interval */
308 ix = j | 0x3fff0000; /* normalize ix */
309 if (j <= 0x3988)
310 k = 0; /* |x|<sqrt(3/2) */
311 else if (j < 0xbb67)
312 k = 1; /* |x|<sqrt(3) */
313 else
314 {
315 k = 0;
316 n += 1;
317 ix -= 0x00010000;
318 }
319
320 o.value = ax;
321 o.parts32.w0 = ix;
322 ax = o.value;
323
324 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
325 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
326 v = one / (ax + bp[k]);
327 s = u * v;
328 s_h = s;
329
330 o.value = s_h;
331 o.parts32.w3 = 0;
332 o.parts32.w2 &= 0xf8000000;
333 s_h = o.value;
334 /* t_h=ax+bp[k] High */
335 t_h = ax + bp[k];
336 o.value = t_h;
337 o.parts32.w3 = 0;
338 o.parts32.w2 &= 0xf8000000;
339 t_h = o.value;
340 t_l = ax - (t_h - bp[k]);
341 s_l = v * ((u - s_h * t_h) - s_h * t_l);
342 /* compute log(ax) */
343 s2 = s * s;
344 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
345 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
346 r = s2 * s2 * u / v;
347 r += s_l * (s_h + s);
348 s2 = s_h * s_h;
349 t_h = 3.0 + s2 + r;
350 o.value = t_h;
351 o.parts32.w3 = 0;
352 o.parts32.w2 &= 0xf8000000;
353 t_h = o.value;
354 t_l = r - ((t_h - 3.0) - s2);
355 /* u+v = s*(1+...) */
356 u = s_h * t_h;
357 v = s_l * t_h + t_l * s;
358 /* 2/(3log2)*(s+...) */
359 p_h = u + v;
360 o.value = p_h;
361 o.parts32.w3 = 0;
362 o.parts32.w2 &= 0xf8000000;
363 p_h = o.value;
364 p_l = v - (p_h - u);
365 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
366 z_l = cp_l * p_h + p_l * cp + dp_l[k];
367 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
368 t = (_Float128) n;
369 t1 = (((z_h + z_l) + dp_h[k]) + t);
370 o.value = t1;
371 o.parts32.w3 = 0;
372 o.parts32.w2 &= 0xf8000000;
373 t1 = o.value;
374 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
375
376 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
377 y1 = y;
378 o.value = y1;
379 o.parts32.w3 = 0;
380 o.parts32.w2 &= 0xf8000000;
381 y1 = o.value;
382 p_l = (y - y1) * t1 + y * t2;
383 p_h = y1 * t1;
384 z = p_l + p_h;
385 o.value = z;
386 j = o.parts32.w0;
387 if (j >= 0x400d0000) /* z >= 16384 */
388 {
389 /* if z > 16384 */
390 if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
391 return sgn * huge * huge; /* overflow */
392 else
393 {
394 if (p_l + ovt > z - p_h)
395 return sgn * huge * huge; /* overflow */
396 }
397 }
398 else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
399 {
400 /* z < -16495 */
401 if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
402 != 0)
403 return sgn * tiny * tiny; /* underflow */
404 else
405 {
406 if (p_l <= z - p_h)
407 return sgn * tiny * tiny; /* underflow */
408 }
409 }
410 /* compute 2**(p_h+p_l) */
411 i = j & 0x7fffffff;
412 k = (i >> 16) - 0x3fff;
413 n = 0;
414 if (i > 0x3ffe0000)
415 { /* if |z| > 0.5, set n = [z+0.5] */
416 n = floorl (z + L(0.5));
417 t = n;
418 p_h -= t;
419 }
420 t = p_l + p_h;
421 o.value = t;
422 o.parts32.w3 = 0;
423 o.parts32.w2 &= 0xf8000000;
424 t = o.value;
425 u = t * lg2_h;
426 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
427 z = u + v;
428 w = v - (z - u);
429 /* exp(z) */
430 t = z * z;
431 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
432 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
433 t1 = z - t * u / v;
434 r = (z * t1) / (t1 - two) - (w + z * w);
435 z = one - (r - z);
436 o.value = z;
437 j = o.parts32.w0;
438 j += (n << 16);
439 if ((j >> 16) <= 0)
440 {
441 z = __scalbnl (z, n); /* subnormal output */
442 _Float128 force_underflow = z * z;
443 math_force_eval (force_underflow);
444 }
445 else
446 {
447 o.parts32.w0 = j;
448 z = o.value;
449 }
450 return sgn * z;
451}
452strong_alias (__ieee754_powl, __powl_finite)
453