1 | /* |
2 | * ==================================================== |
3 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
4 | * |
5 | * Developed at SunPro, a Sun Microsystems, Inc. business. |
6 | * Permission to use, copy, modify, and distribute this |
7 | * software is freely granted, provided that this notice |
8 | * is preserved. |
9 | * ==================================================== |
10 | */ |
11 | |
12 | /* |
13 | * from: @(#)fdlibm.h 5.1 93/09/24 |
14 | */ |
15 | |
16 | #ifndef _MATH_PRIVATE_H_ |
17 | #define _MATH_PRIVATE_H_ |
18 | |
19 | #include <endian.h> |
20 | #include <stdint.h> |
21 | #include <sys/types.h> |
22 | #include <fenv.h> |
23 | #include <float.h> |
24 | #include <get-rounding-mode.h> |
25 | |
26 | /* The original fdlibm code used statements like: |
27 | n0 = ((*(int*)&one)>>29)^1; * index of high word * |
28 | ix0 = *(n0+(int*)&x); * high word of x * |
29 | ix1 = *((1-n0)+(int*)&x); * low word of x * |
30 | to dig two 32 bit words out of the 64 bit IEEE floating point |
31 | value. That is non-ANSI, and, moreover, the gcc instruction |
32 | scheduler gets it wrong. We instead use the following macros. |
33 | Unlike the original code, we determine the endianness at compile |
34 | time, not at run time; I don't see much benefit to selecting |
35 | endianness at run time. */ |
36 | |
37 | /* A union which permits us to convert between a double and two 32 bit |
38 | ints. */ |
39 | |
40 | #if __FLOAT_WORD_ORDER == BIG_ENDIAN |
41 | |
42 | typedef union |
43 | { |
44 | double value; |
45 | struct |
46 | { |
47 | u_int32_t msw; |
48 | u_int32_t lsw; |
49 | } parts; |
50 | uint64_t word; |
51 | } ieee_double_shape_type; |
52 | |
53 | #endif |
54 | |
55 | #if __FLOAT_WORD_ORDER == LITTLE_ENDIAN |
56 | |
57 | typedef union |
58 | { |
59 | double value; |
60 | struct |
61 | { |
62 | u_int32_t lsw; |
63 | u_int32_t msw; |
64 | } parts; |
65 | uint64_t word; |
66 | } ieee_double_shape_type; |
67 | |
68 | #endif |
69 | |
70 | /* Get two 32 bit ints from a double. */ |
71 | |
72 | #define (ix0,ix1,d) \ |
73 | do { \ |
74 | ieee_double_shape_type ew_u; \ |
75 | ew_u.value = (d); \ |
76 | (ix0) = ew_u.parts.msw; \ |
77 | (ix1) = ew_u.parts.lsw; \ |
78 | } while (0) |
79 | |
80 | /* Get the more significant 32 bit int from a double. */ |
81 | |
82 | #ifndef GET_HIGH_WORD |
83 | # define GET_HIGH_WORD(i,d) \ |
84 | do { \ |
85 | ieee_double_shape_type gh_u; \ |
86 | gh_u.value = (d); \ |
87 | (i) = gh_u.parts.msw; \ |
88 | } while (0) |
89 | #endif |
90 | |
91 | /* Get the less significant 32 bit int from a double. */ |
92 | |
93 | #ifndef GET_LOW_WORD |
94 | # define GET_LOW_WORD(i,d) \ |
95 | do { \ |
96 | ieee_double_shape_type gl_u; \ |
97 | gl_u.value = (d); \ |
98 | (i) = gl_u.parts.lsw; \ |
99 | } while (0) |
100 | #endif |
101 | |
102 | /* Get all in one, efficient on 64-bit machines. */ |
103 | #ifndef EXTRACT_WORDS64 |
104 | # define EXTRACT_WORDS64(i,d) \ |
105 | do { \ |
106 | ieee_double_shape_type gh_u; \ |
107 | gh_u.value = (d); \ |
108 | (i) = gh_u.word; \ |
109 | } while (0) |
110 | #endif |
111 | |
112 | /* Set a double from two 32 bit ints. */ |
113 | #ifndef INSERT_WORDS |
114 | # define INSERT_WORDS(d,ix0,ix1) \ |
115 | do { \ |
116 | ieee_double_shape_type iw_u; \ |
117 | iw_u.parts.msw = (ix0); \ |
118 | iw_u.parts.lsw = (ix1); \ |
119 | (d) = iw_u.value; \ |
120 | } while (0) |
121 | #endif |
122 | |
123 | /* Get all in one, efficient on 64-bit machines. */ |
124 | #ifndef INSERT_WORDS64 |
125 | # define INSERT_WORDS64(d,i) \ |
126 | do { \ |
127 | ieee_double_shape_type iw_u; \ |
128 | iw_u.word = (i); \ |
129 | (d) = iw_u.value; \ |
130 | } while (0) |
131 | #endif |
132 | |
133 | /* Set the more significant 32 bits of a double from an int. */ |
134 | #ifndef SET_HIGH_WORD |
135 | #define SET_HIGH_WORD(d,v) \ |
136 | do { \ |
137 | ieee_double_shape_type sh_u; \ |
138 | sh_u.value = (d); \ |
139 | sh_u.parts.msw = (v); \ |
140 | (d) = sh_u.value; \ |
141 | } while (0) |
142 | #endif |
143 | |
144 | /* Set the less significant 32 bits of a double from an int. */ |
145 | #ifndef SET_LOW_WORD |
146 | # define SET_LOW_WORD(d,v) \ |
147 | do { \ |
148 | ieee_double_shape_type sl_u; \ |
149 | sl_u.value = (d); \ |
150 | sl_u.parts.lsw = (v); \ |
151 | (d) = sl_u.value; \ |
152 | } while (0) |
153 | #endif |
154 | |
155 | /* A union which permits us to convert between a float and a 32 bit |
156 | int. */ |
157 | |
158 | typedef union |
159 | { |
160 | float value; |
161 | u_int32_t word; |
162 | } ieee_float_shape_type; |
163 | |
164 | /* Get a 32 bit int from a float. */ |
165 | #ifndef GET_FLOAT_WORD |
166 | # define GET_FLOAT_WORD(i,d) \ |
167 | do { \ |
168 | ieee_float_shape_type gf_u; \ |
169 | gf_u.value = (d); \ |
170 | (i) = gf_u.word; \ |
171 | } while (0) |
172 | #endif |
173 | |
174 | /* Set a float from a 32 bit int. */ |
175 | #ifndef SET_FLOAT_WORD |
176 | # define SET_FLOAT_WORD(d,i) \ |
177 | do { \ |
178 | ieee_float_shape_type sf_u; \ |
179 | sf_u.word = (i); \ |
180 | (d) = sf_u.value; \ |
181 | } while (0) |
182 | #endif |
183 | |
184 | /* Get long double macros from a separate header. */ |
185 | #include <math_ldbl.h> |
186 | |
187 | /* ieee style elementary functions */ |
188 | extern double __ieee754_sqrt (double); |
189 | extern double __ieee754_acos (double); |
190 | extern double __ieee754_acosh (double); |
191 | extern double __ieee754_log (double); |
192 | extern double __ieee754_atanh (double); |
193 | extern double __ieee754_asin (double); |
194 | extern double __ieee754_atan2 (double,double); |
195 | extern double __ieee754_exp (double); |
196 | extern double __ieee754_exp2 (double); |
197 | extern double __ieee754_exp10 (double); |
198 | extern double __ieee754_cosh (double); |
199 | extern double __ieee754_fmod (double,double); |
200 | extern double __ieee754_pow (double,double); |
201 | extern double __ieee754_lgamma_r (double,int *); |
202 | extern double __ieee754_gamma_r (double,int *); |
203 | extern double __ieee754_lgamma (double); |
204 | extern double __ieee754_gamma (double); |
205 | extern double __ieee754_log10 (double); |
206 | extern double __ieee754_log2 (double); |
207 | extern double __ieee754_sinh (double); |
208 | extern double __ieee754_hypot (double,double); |
209 | extern double __ieee754_j0 (double); |
210 | extern double __ieee754_j1 (double); |
211 | extern double __ieee754_y0 (double); |
212 | extern double __ieee754_y1 (double); |
213 | extern double __ieee754_jn (int,double); |
214 | extern double __ieee754_yn (int,double); |
215 | extern double __ieee754_remainder (double,double); |
216 | extern int32_t __ieee754_rem_pio2 (double,double*); |
217 | extern double __ieee754_scalb (double,double); |
218 | extern int __ieee754_ilogb (double); |
219 | |
220 | /* fdlibm kernel function */ |
221 | extern double __kernel_standard (double,double,int); |
222 | extern float __kernel_standard_f (float,float,int); |
223 | extern long double __kernel_standard_l (long double,long double,int); |
224 | extern double __kernel_sin (double,double,int); |
225 | extern double __kernel_cos (double,double); |
226 | extern double __kernel_tan (double,double,int); |
227 | extern int __kernel_rem_pio2 (double*,double*,int,int,int, const int32_t*); |
228 | |
229 | /* internal functions. */ |
230 | extern double __copysign (double x, double __y); |
231 | |
232 | extern inline double __copysign (double x, double y) |
233 | { return __builtin_copysign (x, y); } |
234 | |
235 | /* ieee style elementary float functions */ |
236 | extern float __ieee754_sqrtf (float); |
237 | extern float __ieee754_acosf (float); |
238 | extern float __ieee754_acoshf (float); |
239 | extern float __ieee754_logf (float); |
240 | extern float __ieee754_atanhf (float); |
241 | extern float __ieee754_asinf (float); |
242 | extern float __ieee754_atan2f (float,float); |
243 | extern float __ieee754_expf (float); |
244 | extern float __ieee754_exp2f (float); |
245 | extern float __ieee754_exp10f (float); |
246 | extern float __ieee754_coshf (float); |
247 | extern float __ieee754_fmodf (float,float); |
248 | extern float __ieee754_powf (float,float); |
249 | extern float __ieee754_lgammaf_r (float,int *); |
250 | extern float __ieee754_gammaf_r (float,int *); |
251 | extern float __ieee754_lgammaf (float); |
252 | extern float __ieee754_gammaf (float); |
253 | extern float __ieee754_log10f (float); |
254 | extern float __ieee754_log2f (float); |
255 | extern float __ieee754_sinhf (float); |
256 | extern float __ieee754_hypotf (float,float); |
257 | extern float __ieee754_j0f (float); |
258 | extern float __ieee754_j1f (float); |
259 | extern float __ieee754_y0f (float); |
260 | extern float __ieee754_y1f (float); |
261 | extern float __ieee754_jnf (int,float); |
262 | extern float __ieee754_ynf (int,float); |
263 | extern float __ieee754_remainderf (float,float); |
264 | extern int32_t __ieee754_rem_pio2f (float,float*); |
265 | extern float __ieee754_scalbf (float,float); |
266 | extern int __ieee754_ilogbf (float); |
267 | |
268 | |
269 | /* float versions of fdlibm kernel functions */ |
270 | extern float __kernel_sinf (float,float,int); |
271 | extern float __kernel_cosf (float,float); |
272 | extern float __kernel_tanf (float,float,int); |
273 | extern int __kernel_rem_pio2f (float*,float*,int,int,int, const int32_t*); |
274 | |
275 | /* internal functions. */ |
276 | extern float __copysignf (float x, float __y); |
277 | |
278 | extern inline float __copysignf (float x, float y) |
279 | { return __builtin_copysignf (x, y); } |
280 | |
281 | /* ieee style elementary long double functions */ |
282 | extern long double __ieee754_sqrtl (long double); |
283 | extern long double __ieee754_acosl (long double); |
284 | extern long double __ieee754_acoshl (long double); |
285 | extern long double __ieee754_logl (long double); |
286 | extern long double __ieee754_atanhl (long double); |
287 | extern long double __ieee754_asinl (long double); |
288 | extern long double __ieee754_atan2l (long double,long double); |
289 | extern long double __ieee754_expl (long double); |
290 | extern long double __ieee754_exp2l (long double); |
291 | extern long double __ieee754_exp10l (long double); |
292 | extern long double __ieee754_coshl (long double); |
293 | extern long double __ieee754_fmodl (long double,long double); |
294 | extern long double __ieee754_powl (long double,long double); |
295 | extern long double __ieee754_lgammal_r (long double,int *); |
296 | extern long double __ieee754_gammal_r (long double,int *); |
297 | extern long double __ieee754_lgammal (long double); |
298 | extern long double __ieee754_gammal (long double); |
299 | extern long double __ieee754_log10l (long double); |
300 | extern long double __ieee754_log2l (long double); |
301 | extern long double __ieee754_sinhl (long double); |
302 | extern long double __ieee754_hypotl (long double,long double); |
303 | extern long double __ieee754_j0l (long double); |
304 | extern long double __ieee754_j1l (long double); |
305 | extern long double __ieee754_y0l (long double); |
306 | extern long double __ieee754_y1l (long double); |
307 | extern long double __ieee754_jnl (int,long double); |
308 | extern long double __ieee754_ynl (int,long double); |
309 | extern long double __ieee754_remainderl (long double,long double); |
310 | extern int __ieee754_rem_pio2l (long double,long double*); |
311 | extern long double __ieee754_scalbl (long double,long double); |
312 | extern int __ieee754_ilogbl (long double); |
313 | |
314 | /* long double versions of fdlibm kernel functions */ |
315 | extern long double __kernel_sinl (long double,long double,int); |
316 | extern long double __kernel_cosl (long double,long double); |
317 | extern long double __kernel_tanl (long double,long double,int); |
318 | extern void __kernel_sincosl (long double,long double, |
319 | long double *,long double *, int); |
320 | |
321 | #ifndef NO_LONG_DOUBLE |
322 | /* prototypes required to compile the ldbl-96 support without warnings */ |
323 | extern int __finitel (long double); |
324 | extern int __ilogbl (long double); |
325 | extern int __isinfl (long double); |
326 | extern int __isnanl (long double); |
327 | extern long double __atanl (long double); |
328 | extern long double __copysignl (long double, long double); |
329 | extern long double __expm1l (long double); |
330 | extern long double __floorl (long double); |
331 | extern long double __frexpl (long double, int *); |
332 | extern long double __ldexpl (long double, int); |
333 | extern long double __log1pl (long double); |
334 | extern long double __nanl (const char *); |
335 | extern long double __rintl (long double); |
336 | extern long double __scalbnl (long double, int); |
337 | extern long double __sqrtl (long double x); |
338 | extern long double fabsl (long double x); |
339 | extern void __sincosl (long double, long double *, long double *); |
340 | extern long double __logbl (long double x); |
341 | extern long double __significandl (long double x); |
342 | |
343 | extern inline long double __copysignl (long double x, long double y) |
344 | { return __builtin_copysignl (x, y); } |
345 | |
346 | #endif |
347 | |
348 | /* Prototypes for functions of the IBM Accurate Mathematical Library. */ |
349 | extern double __exp1 (double __x, double __xx, double __error); |
350 | extern double __sin (double __x); |
351 | extern double __cos (double __x); |
352 | extern int __branred (double __x, double *__a, double *__aa); |
353 | extern void __doasin (double __x, double __dx, double __v[]); |
354 | extern void __dubsin (double __x, double __dx, double __v[]); |
355 | extern void __dubcos (double __x, double __dx, double __v[]); |
356 | extern double __halfulp (double __x, double __y); |
357 | extern double __sin32 (double __x, double __res, double __res1); |
358 | extern double __cos32 (double __x, double __res, double __res1); |
359 | extern double __mpsin (double __x, double __dx, bool __range_reduce); |
360 | extern double __mpcos (double __x, double __dx, bool __range_reduce); |
361 | extern double __slowexp (double __x); |
362 | extern double __slowpow (double __x, double __y, double __z); |
363 | extern void __docos (double __x, double __dx, double __v[]); |
364 | |
365 | /* Return X^2 + Y^2 - 1, computed without large cancellation error. |
366 | It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >= |
367 | 0.5. */ |
368 | extern float __x2y2m1f (float x, float y); |
369 | extern double __x2y2m1 (double x, double y); |
370 | extern long double __x2y2m1l (long double x, long double y); |
371 | |
372 | /* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N |
373 | - 1, in the form R * (1 + *EPS) where the return value R is an |
374 | approximation to the product and *EPS is set to indicate the |
375 | approximate error in the return value. X is such that all the |
376 | values X + 1, ..., X + N - 1 are exactly representable, and X_EPS / |
377 | X is small enough that factors quadratic in it can be |
378 | neglected. */ |
379 | extern float __gamma_productf (float x, float x_eps, int n, float *eps); |
380 | extern double __gamma_product (double x, double x_eps, int n, double *eps); |
381 | extern long double __gamma_productl (long double x, long double x_eps, |
382 | int n, long double *eps); |
383 | |
384 | /* Compute lgamma of a negative argument X, if it is in a range |
385 | (depending on the floating-point format) for which expansion around |
386 | zeros is used, setting *SIGNGAMP accordingly. */ |
387 | extern float __lgamma_negf (float x, int *signgamp); |
388 | extern double __lgamma_neg (double x, int *signgamp); |
389 | extern long double __lgamma_negl (long double x, int *signgamp); |
390 | |
391 | /* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + |
392 | 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that |
393 | all the values X + 1, ..., X + N - 1 are exactly representable, and |
394 | X_EPS / X is small enough that factors quadratic in it can be |
395 | neglected. */ |
396 | extern double __lgamma_product (double t, double x, double x_eps, int n); |
397 | extern long double __lgamma_productl (long double t, long double x, |
398 | long double x_eps, int n); |
399 | |
400 | #ifndef math_opt_barrier |
401 | # define math_opt_barrier(x) \ |
402 | ({ __typeof (x) __x = (x); __asm ("" : "+m" (__x)); __x; }) |
403 | # define math_force_eval(x) \ |
404 | ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); }) |
405 | #endif |
406 | |
407 | /* math_narrow_eval reduces its floating-point argument to the range |
408 | and precision of its semantic type. (The original evaluation may |
409 | still occur with excess range and precision, so the result may be |
410 | affected by double rounding.) */ |
411 | #if FLT_EVAL_METHOD == 0 |
412 | # define math_narrow_eval(x) (x) |
413 | #else |
414 | # if FLT_EVAL_METHOD == 1 |
415 | # define excess_precision(type) __builtin_types_compatible_p (type, float) |
416 | # else |
417 | # define excess_precision(type) (__builtin_types_compatible_p (type, float) \ |
418 | || __builtin_types_compatible_p (type, \ |
419 | double)) |
420 | # endif |
421 | # define math_narrow_eval(x) \ |
422 | ({ \ |
423 | __typeof (x) math_narrow_eval_tmp = (x); \ |
424 | if (excess_precision (__typeof (math_narrow_eval_tmp))) \ |
425 | __asm__ ("" : "+m" (math_narrow_eval_tmp)); \ |
426 | math_narrow_eval_tmp; \ |
427 | }) |
428 | #endif |
429 | |
430 | #define fabs_tg(x) __MATH_TG ((x), (__typeof (x)) __builtin_fabs, (x)) |
431 | #define min_of_type(type) __builtin_choose_expr \ |
432 | (__builtin_types_compatible_p (type, float), \ |
433 | FLT_MIN, \ |
434 | __builtin_choose_expr \ |
435 | (__builtin_types_compatible_p (type, double), \ |
436 | DBL_MIN, LDBL_MIN)) |
437 | |
438 | /* If X (which is not a NaN) is subnormal, force an underflow |
439 | exception. */ |
440 | #define math_check_force_underflow(x) \ |
441 | do \ |
442 | { \ |
443 | __typeof (x) force_underflow_tmp = (x); \ |
444 | if (fabs_tg (force_underflow_tmp) \ |
445 | < min_of_type (__typeof (force_underflow_tmp))) \ |
446 | { \ |
447 | __typeof (force_underflow_tmp) force_underflow_tmp2 \ |
448 | = force_underflow_tmp * force_underflow_tmp; \ |
449 | math_force_eval (force_underflow_tmp2); \ |
450 | } \ |
451 | } \ |
452 | while (0) |
453 | /* Likewise, but X is also known to be nonnegative. */ |
454 | #define math_check_force_underflow_nonneg(x) \ |
455 | do \ |
456 | { \ |
457 | __typeof (x) force_underflow_tmp = (x); \ |
458 | if (force_underflow_tmp \ |
459 | < min_of_type (__typeof (force_underflow_tmp))) \ |
460 | { \ |
461 | __typeof (force_underflow_tmp) force_underflow_tmp2 \ |
462 | = force_underflow_tmp * force_underflow_tmp; \ |
463 | math_force_eval (force_underflow_tmp2); \ |
464 | } \ |
465 | } \ |
466 | while (0) |
467 | /* Likewise, for both real and imaginary parts of a complex |
468 | result. */ |
469 | #define math_check_force_underflow_complex(x) \ |
470 | do \ |
471 | { \ |
472 | __typeof (x) force_underflow_complex_tmp = (x); \ |
473 | math_check_force_underflow (__real__ force_underflow_complex_tmp); \ |
474 | math_check_force_underflow (__imag__ force_underflow_complex_tmp); \ |
475 | } \ |
476 | while (0) |
477 | |
478 | /* The standards only specify one variant of the fenv.h interfaces. |
479 | But at least for some architectures we can be more efficient if we |
480 | know what operations are going to be performed. Therefore we |
481 | define additional interfaces. By default they refer to the normal |
482 | interfaces. */ |
483 | |
484 | static __always_inline void |
485 | default_libc_feholdexcept (fenv_t *e) |
486 | { |
487 | (void) __feholdexcept (e); |
488 | } |
489 | |
490 | #ifndef libc_feholdexcept |
491 | # define libc_feholdexcept default_libc_feholdexcept |
492 | #endif |
493 | #ifndef libc_feholdexceptf |
494 | # define libc_feholdexceptf default_libc_feholdexcept |
495 | #endif |
496 | #ifndef libc_feholdexceptl |
497 | # define libc_feholdexceptl default_libc_feholdexcept |
498 | #endif |
499 | |
500 | static __always_inline void |
501 | default_libc_fesetround (int r) |
502 | { |
503 | (void) __fesetround (r); |
504 | } |
505 | |
506 | #ifndef libc_fesetround |
507 | # define libc_fesetround default_libc_fesetround |
508 | #endif |
509 | #ifndef libc_fesetroundf |
510 | # define libc_fesetroundf default_libc_fesetround |
511 | #endif |
512 | #ifndef libc_fesetroundl |
513 | # define libc_fesetroundl default_libc_fesetround |
514 | #endif |
515 | |
516 | static __always_inline void |
517 | default_libc_feholdexcept_setround (fenv_t *e, int r) |
518 | { |
519 | __feholdexcept (e); |
520 | __fesetround (r); |
521 | } |
522 | |
523 | #ifndef libc_feholdexcept_setround |
524 | # define libc_feholdexcept_setround default_libc_feholdexcept_setround |
525 | #endif |
526 | #ifndef libc_feholdexcept_setroundf |
527 | # define libc_feholdexcept_setroundf default_libc_feholdexcept_setround |
528 | #endif |
529 | #ifndef libc_feholdexcept_setroundl |
530 | # define libc_feholdexcept_setroundl default_libc_feholdexcept_setround |
531 | #endif |
532 | |
533 | #ifndef libc_feholdsetround_53bit |
534 | # define libc_feholdsetround_53bit libc_feholdsetround |
535 | #endif |
536 | |
537 | #ifndef libc_fetestexcept |
538 | # define libc_fetestexcept fetestexcept |
539 | #endif |
540 | #ifndef libc_fetestexceptf |
541 | # define libc_fetestexceptf fetestexcept |
542 | #endif |
543 | #ifndef libc_fetestexceptl |
544 | # define libc_fetestexceptl fetestexcept |
545 | #endif |
546 | |
547 | static __always_inline void |
548 | default_libc_fesetenv (fenv_t *e) |
549 | { |
550 | (void) __fesetenv (e); |
551 | } |
552 | |
553 | #ifndef libc_fesetenv |
554 | # define libc_fesetenv default_libc_fesetenv |
555 | #endif |
556 | #ifndef libc_fesetenvf |
557 | # define libc_fesetenvf default_libc_fesetenv |
558 | #endif |
559 | #ifndef libc_fesetenvl |
560 | # define libc_fesetenvl default_libc_fesetenv |
561 | #endif |
562 | |
563 | static __always_inline void |
564 | default_libc_feupdateenv (fenv_t *e) |
565 | { |
566 | (void) __feupdateenv (e); |
567 | } |
568 | |
569 | #ifndef libc_feupdateenv |
570 | # define libc_feupdateenv default_libc_feupdateenv |
571 | #endif |
572 | #ifndef libc_feupdateenvf |
573 | # define libc_feupdateenvf default_libc_feupdateenv |
574 | #endif |
575 | #ifndef libc_feupdateenvl |
576 | # define libc_feupdateenvl default_libc_feupdateenv |
577 | #endif |
578 | |
579 | #ifndef libc_feresetround_53bit |
580 | # define libc_feresetround_53bit libc_feresetround |
581 | #endif |
582 | |
583 | static __always_inline int |
584 | default_libc_feupdateenv_test (fenv_t *e, int ex) |
585 | { |
586 | int ret = fetestexcept (ex); |
587 | __feupdateenv (e); |
588 | return ret; |
589 | } |
590 | |
591 | #ifndef libc_feupdateenv_test |
592 | # define libc_feupdateenv_test default_libc_feupdateenv_test |
593 | #endif |
594 | #ifndef libc_feupdateenv_testf |
595 | # define libc_feupdateenv_testf default_libc_feupdateenv_test |
596 | #endif |
597 | #ifndef libc_feupdateenv_testl |
598 | # define libc_feupdateenv_testl default_libc_feupdateenv_test |
599 | #endif |
600 | |
601 | /* Save and set the rounding mode. The use of fenv_t to store the old mode |
602 | allows a target-specific version of this function to avoid converting the |
603 | rounding mode from the fpu format. By default we have no choice but to |
604 | manipulate the entire env. */ |
605 | |
606 | #ifndef libc_feholdsetround |
607 | # define libc_feholdsetround libc_feholdexcept_setround |
608 | #endif |
609 | #ifndef libc_feholdsetroundf |
610 | # define libc_feholdsetroundf libc_feholdexcept_setroundf |
611 | #endif |
612 | #ifndef libc_feholdsetroundl |
613 | # define libc_feholdsetroundl libc_feholdexcept_setroundl |
614 | #endif |
615 | |
616 | /* ... and the reverse. */ |
617 | |
618 | #ifndef libc_feresetround |
619 | # define libc_feresetround libc_feupdateenv |
620 | #endif |
621 | #ifndef libc_feresetroundf |
622 | # define libc_feresetroundf libc_feupdateenvf |
623 | #endif |
624 | #ifndef libc_feresetroundl |
625 | # define libc_feresetroundl libc_feupdateenvl |
626 | #endif |
627 | |
628 | /* ... and a version that may also discard exceptions. */ |
629 | |
630 | #ifndef libc_feresetround_noex |
631 | # define libc_feresetround_noex libc_fesetenv |
632 | #endif |
633 | #ifndef libc_feresetround_noexf |
634 | # define libc_feresetround_noexf libc_fesetenvf |
635 | #endif |
636 | #ifndef libc_feresetround_noexl |
637 | # define libc_feresetround_noexl libc_fesetenvl |
638 | #endif |
639 | |
640 | #ifndef HAVE_RM_CTX |
641 | # define HAVE_RM_CTX 0 |
642 | #endif |
643 | |
644 | #if HAVE_RM_CTX |
645 | /* Set/Restore Rounding Modes only when necessary. If defined, these functions |
646 | set/restore floating point state only if the state needed within the lexical |
647 | block is different from the current state. This saves a lot of time when |
648 | the floating point unit is much slower than the fixed point units. */ |
649 | |
650 | # ifndef libc_feholdsetround_noex_ctx |
651 | # define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx |
652 | # endif |
653 | # ifndef libc_feholdsetround_noexf_ctx |
654 | # define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx |
655 | # endif |
656 | # ifndef libc_feholdsetround_noexl_ctx |
657 | # define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx |
658 | # endif |
659 | |
660 | # ifndef libc_feresetround_noex_ctx |
661 | # define libc_feresetround_noex_ctx libc_fesetenv_ctx |
662 | # endif |
663 | # ifndef libc_feresetround_noexf_ctx |
664 | # define libc_feresetround_noexf_ctx libc_fesetenvf_ctx |
665 | # endif |
666 | # ifndef libc_feresetround_noexl_ctx |
667 | # define libc_feresetround_noexl_ctx libc_fesetenvl_ctx |
668 | # endif |
669 | |
670 | #else |
671 | |
672 | /* Default implementation using standard fenv functions. |
673 | Avoid unnecessary rounding mode changes by first checking the |
674 | current rounding mode. Note the use of __glibc_unlikely is |
675 | important for performance. */ |
676 | |
677 | static __always_inline void |
678 | libc_feholdsetround_ctx (struct rm_ctx *ctx, int round) |
679 | { |
680 | ctx->updated_status = false; |
681 | |
682 | /* Update rounding mode only if different. */ |
683 | if (__glibc_unlikely (round != get_rounding_mode ())) |
684 | { |
685 | ctx->updated_status = true; |
686 | __fegetenv (&ctx->env); |
687 | __fesetround (round); |
688 | } |
689 | } |
690 | |
691 | static __always_inline void |
692 | libc_feresetround_ctx (struct rm_ctx *ctx) |
693 | { |
694 | /* Restore the rounding mode if updated. */ |
695 | if (__glibc_unlikely (ctx->updated_status)) |
696 | __feupdateenv (&ctx->env); |
697 | } |
698 | |
699 | static __always_inline void |
700 | libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round) |
701 | { |
702 | /* Save exception flags and rounding mode. */ |
703 | __fegetenv (&ctx->env); |
704 | |
705 | /* Update rounding mode only if different. */ |
706 | if (__glibc_unlikely (round != get_rounding_mode ())) |
707 | __fesetround (round); |
708 | } |
709 | |
710 | static __always_inline void |
711 | libc_feresetround_noex_ctx (struct rm_ctx *ctx) |
712 | { |
713 | /* Restore exception flags and rounding mode. */ |
714 | __fesetenv (&ctx->env); |
715 | } |
716 | |
717 | # define libc_feholdsetroundf_ctx libc_feholdsetround_ctx |
718 | # define libc_feholdsetroundl_ctx libc_feholdsetround_ctx |
719 | # define libc_feresetroundf_ctx libc_feresetround_ctx |
720 | # define libc_feresetroundl_ctx libc_feresetround_ctx |
721 | |
722 | # define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx |
723 | # define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx |
724 | # define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx |
725 | # define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx |
726 | |
727 | #endif |
728 | |
729 | #ifndef libc_feholdsetround_53bit_ctx |
730 | # define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx |
731 | #endif |
732 | #ifndef libc_feresetround_53bit_ctx |
733 | # define libc_feresetround_53bit_ctx libc_feresetround_ctx |
734 | #endif |
735 | |
736 | #define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ |
737 | struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \ |
738 | ROUNDFUNC ## _ctx (&ctx, (RM)) |
739 | |
740 | /* Set the rounding mode within a lexical block. Restore the rounding mode to |
741 | the value at the start of the block. The exception mode must be preserved. |
742 | Exceptions raised within the block must be set in the exception flags. |
743 | Non-stop mode may be enabled inside the block. */ |
744 | |
745 | #define SET_RESTORE_ROUND(RM) \ |
746 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround) |
747 | #define SET_RESTORE_ROUNDF(RM) \ |
748 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetroundf) |
749 | #define SET_RESTORE_ROUNDL(RM) \ |
750 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl) |
751 | |
752 | /* Set the rounding mode within a lexical block. Restore the rounding mode to |
753 | the value at the start of the block. The exception mode must be preserved. |
754 | Exceptions raised within the block must be discarded, and exception flags |
755 | are restored to the value at the start of the block. |
756 | Non-stop mode may be enabled inside the block. */ |
757 | |
758 | #define SET_RESTORE_ROUND_NOEX(RM) \ |
759 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noex, \ |
760 | libc_feresetround_noex) |
761 | #define SET_RESTORE_ROUND_NOEXF(RM) \ |
762 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexf, \ |
763 | libc_feresetround_noexf) |
764 | #define SET_RESTORE_ROUND_NOEXL(RM) \ |
765 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexl, \ |
766 | libc_feresetround_noexl) |
767 | |
768 | /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ |
769 | #define SET_RESTORE_ROUND_53BIT(RM) \ |
770 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_53bit, \ |
771 | libc_feresetround_53bit) |
772 | |
773 | #endif /* _MATH_PRIVATE_H_ */ |
774 | |