1 | #ifndef FENV_PRIVATE_H |
2 | #define FENV_PRIVATE_H 1 |
3 | |
4 | #include <bits/floatn.h> |
5 | #include <fenv.h> |
6 | #include <fpu_control.h> |
7 | |
8 | #ifdef __SSE2_MATH__ |
9 | # define math_opt_barrier(x) \ |
10 | ({ __typeof(x) __x; \ |
11 | if (sizeof (x) <= sizeof (double) \ |
12 | || __builtin_types_compatible_p (__typeof (x), _Float128)) \ |
13 | __asm ("" : "=x" (__x) : "0" (x)); \ |
14 | else \ |
15 | __asm ("" : "=t" (__x) : "0" (x)); \ |
16 | __x; }) |
17 | # define math_force_eval(x) \ |
18 | do { \ |
19 | if (sizeof (x) <= sizeof (double) \ |
20 | || __builtin_types_compatible_p (__typeof (x), _Float128)) \ |
21 | __asm __volatile ("" : : "x" (x)); \ |
22 | else \ |
23 | __asm __volatile ("" : : "f" (x)); \ |
24 | } while (0) |
25 | #else |
26 | # define math_opt_barrier(x) \ |
27 | ({ __typeof (x) __x; \ |
28 | if (__builtin_types_compatible_p (__typeof (x), _Float128)) \ |
29 | { \ |
30 | __x = (x); \ |
31 | __asm ("" : "+m" (__x)); \ |
32 | } \ |
33 | else \ |
34 | __asm ("" : "=t" (__x) : "0" (x)); \ |
35 | __x; }) |
36 | # define math_force_eval(x) \ |
37 | do { \ |
38 | __typeof (x) __x = (x); \ |
39 | if (sizeof (x) <= sizeof (double) \ |
40 | || __builtin_types_compatible_p (__typeof (x), _Float128)) \ |
41 | __asm __volatile ("" : : "m" (__x)); \ |
42 | else \ |
43 | __asm __volatile ("" : : "f" (__x)); \ |
44 | } while (0) |
45 | #endif |
46 | |
47 | /* This file is used by both the 32- and 64-bit ports. The 64-bit port |
48 | has a field in the fenv_t for the mxcsr; the 32-bit port does not. |
49 | Instead, we (ab)use the only 32-bit field extant in the struct. */ |
50 | #ifndef __x86_64__ |
51 | # define __mxcsr __eip |
52 | #endif |
53 | |
54 | |
55 | /* All of these functions are private to libm, and are all used in pairs |
56 | to save+change the fp state and restore the original state. Thus we |
57 | need not care for both the 387 and the sse unit, only the one we're |
58 | actually using. */ |
59 | |
60 | #if defined __AVX__ || defined SSE2AVX |
61 | # define STMXCSR "vstmxcsr" |
62 | # define LDMXCSR "vldmxcsr" |
63 | #else |
64 | # define STMXCSR "stmxcsr" |
65 | # define LDMXCSR "ldmxcsr" |
66 | #endif |
67 | |
68 | static __always_inline void |
69 | libc_feholdexcept_sse (fenv_t *e) |
70 | { |
71 | unsigned int mxcsr; |
72 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
73 | e->__mxcsr = mxcsr; |
74 | mxcsr = (mxcsr | 0x1f80) & ~0x3f; |
75 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
76 | } |
77 | |
78 | static __always_inline void |
79 | libc_feholdexcept_387 (fenv_t *e) |
80 | { |
81 | /* Recall that fnstenv has a side-effect of masking exceptions. |
82 | Clobber all of the fp registers so that the TOS field is 0. */ |
83 | asm volatile ("fnstenv %0; fnclex" |
84 | : "=m" (*e) |
85 | : : "st" , "st(1)" , "st(2)" , "st(3)" , |
86 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
87 | } |
88 | |
89 | static __always_inline void |
90 | libc_fesetround_sse (int r) |
91 | { |
92 | unsigned int mxcsr; |
93 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
94 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
95 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
96 | } |
97 | |
98 | static __always_inline void |
99 | libc_fesetround_387 (int r) |
100 | { |
101 | fpu_control_t cw; |
102 | _FPU_GETCW (cw); |
103 | cw = (cw & ~0xc00) | r; |
104 | _FPU_SETCW (cw); |
105 | } |
106 | |
107 | static __always_inline void |
108 | libc_feholdexcept_setround_sse (fenv_t *e, int r) |
109 | { |
110 | unsigned int mxcsr; |
111 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
112 | e->__mxcsr = mxcsr; |
113 | mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
114 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
115 | } |
116 | |
117 | /* Set both rounding mode and precision. A convenience function for use |
118 | by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */ |
119 | static __always_inline void |
120 | libc_feholdexcept_setround_387_prec (fenv_t *e, int r) |
121 | { |
122 | libc_feholdexcept_387 (e); |
123 | |
124 | fpu_control_t cw = e->__control_word; |
125 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
126 | cw |= r | 0x3f; |
127 | _FPU_SETCW (cw); |
128 | } |
129 | |
130 | static __always_inline void |
131 | libc_feholdexcept_setround_387 (fenv_t *e, int r) |
132 | { |
133 | libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED); |
134 | } |
135 | |
136 | static __always_inline void |
137 | libc_feholdexcept_setround_387_53bit (fenv_t *e, int r) |
138 | { |
139 | libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE); |
140 | } |
141 | |
142 | static __always_inline int |
143 | libc_fetestexcept_sse (int e) |
144 | { |
145 | unsigned int mxcsr; |
146 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
147 | return mxcsr & e & FE_ALL_EXCEPT; |
148 | } |
149 | |
150 | static __always_inline int |
151 | libc_fetestexcept_387 (int ex) |
152 | { |
153 | fexcept_t temp; |
154 | asm volatile ("fnstsw %0" : "=a" (temp)); |
155 | return temp & ex & FE_ALL_EXCEPT; |
156 | } |
157 | |
158 | static __always_inline void |
159 | libc_fesetenv_sse (fenv_t *e) |
160 | { |
161 | asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr)); |
162 | } |
163 | |
164 | static __always_inline void |
165 | libc_fesetenv_387 (fenv_t *e) |
166 | { |
167 | /* Clobber all fp registers so that the TOS value we saved earlier is |
168 | compatible with the current state of the compiler. */ |
169 | asm volatile ("fldenv %0" |
170 | : : "m" (*e) |
171 | : "st" , "st(1)" , "st(2)" , "st(3)" , |
172 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
173 | } |
174 | |
175 | static __always_inline int |
176 | libc_feupdateenv_test_sse (fenv_t *e, int ex) |
177 | { |
178 | unsigned int mxcsr, old_mxcsr, cur_ex; |
179 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
180 | cur_ex = mxcsr & FE_ALL_EXCEPT; |
181 | |
182 | /* Merge current exceptions with the old environment. */ |
183 | old_mxcsr = e->__mxcsr; |
184 | mxcsr = old_mxcsr | cur_ex; |
185 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
186 | |
187 | /* Raise SIGFPE for any new exceptions since the hold. Expect that |
188 | the normal environment has all exceptions masked. */ |
189 | if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex)) |
190 | __feraiseexcept (cur_ex); |
191 | |
192 | /* Test for exceptions raised since the hold. */ |
193 | return cur_ex & ex; |
194 | } |
195 | |
196 | static __always_inline int |
197 | libc_feupdateenv_test_387 (fenv_t *e, int ex) |
198 | { |
199 | fexcept_t cur_ex; |
200 | |
201 | /* Save current exceptions. */ |
202 | asm volatile ("fnstsw %0" : "=a" (cur_ex)); |
203 | cur_ex &= FE_ALL_EXCEPT; |
204 | |
205 | /* Reload original environment. */ |
206 | libc_fesetenv_387 (e); |
207 | |
208 | /* Merge current exceptions. */ |
209 | __feraiseexcept (cur_ex); |
210 | |
211 | /* Test for exceptions raised since the hold. */ |
212 | return cur_ex & ex; |
213 | } |
214 | |
215 | static __always_inline void |
216 | libc_feupdateenv_sse (fenv_t *e) |
217 | { |
218 | libc_feupdateenv_test_sse (e, 0); |
219 | } |
220 | |
221 | static __always_inline void |
222 | libc_feupdateenv_387 (fenv_t *e) |
223 | { |
224 | libc_feupdateenv_test_387 (e, 0); |
225 | } |
226 | |
227 | static __always_inline void |
228 | libc_feholdsetround_sse (fenv_t *e, int r) |
229 | { |
230 | unsigned int mxcsr; |
231 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
232 | e->__mxcsr = mxcsr; |
233 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
234 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
235 | } |
236 | |
237 | static __always_inline void |
238 | libc_feholdsetround_387_prec (fenv_t *e, int r) |
239 | { |
240 | fpu_control_t cw; |
241 | |
242 | _FPU_GETCW (cw); |
243 | e->__control_word = cw; |
244 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
245 | cw |= r; |
246 | _FPU_SETCW (cw); |
247 | } |
248 | |
249 | static __always_inline void |
250 | libc_feholdsetround_387 (fenv_t *e, int r) |
251 | { |
252 | libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED); |
253 | } |
254 | |
255 | static __always_inline void |
256 | libc_feholdsetround_387_53bit (fenv_t *e, int r) |
257 | { |
258 | libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE); |
259 | } |
260 | |
261 | static __always_inline void |
262 | libc_feresetround_sse (fenv_t *e) |
263 | { |
264 | unsigned int mxcsr; |
265 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
266 | mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); |
267 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
268 | } |
269 | |
270 | static __always_inline void |
271 | libc_feresetround_387 (fenv_t *e) |
272 | { |
273 | _FPU_SETCW (e->__control_word); |
274 | } |
275 | |
276 | #ifdef __SSE_MATH__ |
277 | # define libc_feholdexceptf libc_feholdexcept_sse |
278 | # define libc_fesetroundf libc_fesetround_sse |
279 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse |
280 | # define libc_fetestexceptf libc_fetestexcept_sse |
281 | # define libc_fesetenvf libc_fesetenv_sse |
282 | # define libc_feupdateenv_testf libc_feupdateenv_test_sse |
283 | # define libc_feupdateenvf libc_feupdateenv_sse |
284 | # define libc_feholdsetroundf libc_feholdsetround_sse |
285 | # define libc_feresetroundf libc_feresetround_sse |
286 | #else |
287 | # define libc_feholdexceptf libc_feholdexcept_387 |
288 | # define libc_fesetroundf libc_fesetround_387 |
289 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_387 |
290 | # define libc_fetestexceptf libc_fetestexcept_387 |
291 | # define libc_fesetenvf libc_fesetenv_387 |
292 | # define libc_feupdateenv_testf libc_feupdateenv_test_387 |
293 | # define libc_feupdateenvf libc_feupdateenv_387 |
294 | # define libc_feholdsetroundf libc_feholdsetround_387 |
295 | # define libc_feresetroundf libc_feresetround_387 |
296 | #endif /* __SSE_MATH__ */ |
297 | |
298 | #ifdef __SSE2_MATH__ |
299 | # define libc_feholdexcept libc_feholdexcept_sse |
300 | # define libc_fesetround libc_fesetround_sse |
301 | # define libc_feholdexcept_setround libc_feholdexcept_setround_sse |
302 | # define libc_fetestexcept libc_fetestexcept_sse |
303 | # define libc_fesetenv libc_fesetenv_sse |
304 | # define libc_feupdateenv_test libc_feupdateenv_test_sse |
305 | # define libc_feupdateenv libc_feupdateenv_sse |
306 | # define libc_feholdsetround libc_feholdsetround_sse |
307 | # define libc_feresetround libc_feresetround_sse |
308 | #else |
309 | # define libc_feholdexcept libc_feholdexcept_387 |
310 | # define libc_fesetround libc_fesetround_387 |
311 | # define libc_feholdexcept_setround libc_feholdexcept_setround_387 |
312 | # define libc_fetestexcept libc_fetestexcept_387 |
313 | # define libc_fesetenv libc_fesetenv_387 |
314 | # define libc_feupdateenv_test libc_feupdateenv_test_387 |
315 | # define libc_feupdateenv libc_feupdateenv_387 |
316 | # define libc_feholdsetround libc_feholdsetround_387 |
317 | # define libc_feresetround libc_feresetround_387 |
318 | #endif /* __SSE2_MATH__ */ |
319 | |
320 | #define libc_feholdexceptl libc_feholdexcept_387 |
321 | #define libc_fesetroundl libc_fesetround_387 |
322 | #define libc_feholdexcept_setroundl libc_feholdexcept_setround_387 |
323 | #define libc_fetestexceptl libc_fetestexcept_387 |
324 | #define libc_fesetenvl libc_fesetenv_387 |
325 | #define libc_feupdateenv_testl libc_feupdateenv_test_387 |
326 | #define libc_feupdateenvl libc_feupdateenv_387 |
327 | #define libc_feholdsetroundl libc_feholdsetround_387 |
328 | #define libc_feresetroundl libc_feresetround_387 |
329 | |
330 | #ifndef __SSE2_MATH__ |
331 | # define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit |
332 | # define libc_feholdsetround_53bit libc_feholdsetround_387_53bit |
333 | #endif |
334 | |
335 | #ifdef __x86_64__ |
336 | /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on |
337 | x86_64, so that must be set for float128 computations. */ |
338 | # define SET_RESTORE_ROUNDF128(RM) \ |
339 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) |
340 | #endif |
341 | |
342 | /* We have support for rounding mode context. */ |
343 | #define HAVE_RM_CTX 1 |
344 | |
345 | static __always_inline void |
346 | libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) |
347 | { |
348 | unsigned int mxcsr, new_mxcsr; |
349 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
350 | new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
351 | |
352 | ctx->env.__mxcsr = mxcsr; |
353 | if (__glibc_unlikely (mxcsr != new_mxcsr)) |
354 | { |
355 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
356 | ctx->updated_status = true; |
357 | } |
358 | else |
359 | ctx->updated_status = false; |
360 | } |
361 | |
362 | /* Unconditional since we want to overwrite any exceptions that occurred in the |
363 | context. This is also why all fehold* functions unconditionally write into |
364 | ctx->env. */ |
365 | static __always_inline void |
366 | libc_fesetenv_sse_ctx (struct rm_ctx *ctx) |
367 | { |
368 | libc_fesetenv_sse (&ctx->env); |
369 | } |
370 | |
371 | static __always_inline void |
372 | libc_feupdateenv_sse_ctx (struct rm_ctx *ctx) |
373 | { |
374 | if (__glibc_unlikely (ctx->updated_status)) |
375 | libc_feupdateenv_test_sse (&ctx->env, 0); |
376 | } |
377 | |
378 | static __always_inline void |
379 | libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r) |
380 | { |
381 | libc_feholdexcept_387 (&ctx->env); |
382 | |
383 | fpu_control_t cw = ctx->env.__control_word; |
384 | fpu_control_t old_cw = cw; |
385 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
386 | cw |= r | 0x3f; |
387 | |
388 | if (__glibc_unlikely (old_cw != cw)) |
389 | { |
390 | _FPU_SETCW (cw); |
391 | ctx->updated_status = true; |
392 | } |
393 | else |
394 | ctx->updated_status = false; |
395 | } |
396 | |
397 | static __always_inline void |
398 | libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r) |
399 | { |
400 | libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED); |
401 | } |
402 | |
403 | static __always_inline void |
404 | libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
405 | { |
406 | libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE); |
407 | } |
408 | |
409 | static __always_inline void |
410 | libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r) |
411 | { |
412 | fpu_control_t cw, new_cw; |
413 | |
414 | _FPU_GETCW (cw); |
415 | new_cw = cw; |
416 | new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
417 | new_cw |= r; |
418 | |
419 | ctx->env.__control_word = cw; |
420 | if (__glibc_unlikely (new_cw != cw)) |
421 | { |
422 | _FPU_SETCW (new_cw); |
423 | ctx->updated_status = true; |
424 | } |
425 | else |
426 | ctx->updated_status = false; |
427 | } |
428 | |
429 | static __always_inline void |
430 | libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r) |
431 | { |
432 | libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED); |
433 | } |
434 | |
435 | static __always_inline void |
436 | libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
437 | { |
438 | libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE); |
439 | } |
440 | |
441 | static __always_inline void |
442 | libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) |
443 | { |
444 | unsigned int mxcsr, new_mxcsr; |
445 | |
446 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
447 | new_mxcsr = (mxcsr & ~0x6000) | (r << 3); |
448 | |
449 | ctx->env.__mxcsr = mxcsr; |
450 | if (__glibc_unlikely (new_mxcsr != mxcsr)) |
451 | { |
452 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
453 | ctx->updated_status = true; |
454 | } |
455 | else |
456 | ctx->updated_status = false; |
457 | } |
458 | |
459 | static __always_inline void |
460 | libc_feresetround_sse_ctx (struct rm_ctx *ctx) |
461 | { |
462 | if (__glibc_unlikely (ctx->updated_status)) |
463 | libc_feresetround_sse (&ctx->env); |
464 | } |
465 | |
466 | static __always_inline void |
467 | libc_feresetround_387_ctx (struct rm_ctx *ctx) |
468 | { |
469 | if (__glibc_unlikely (ctx->updated_status)) |
470 | _FPU_SETCW (ctx->env.__control_word); |
471 | } |
472 | |
473 | static __always_inline void |
474 | libc_feupdateenv_387_ctx (struct rm_ctx *ctx) |
475 | { |
476 | if (__glibc_unlikely (ctx->updated_status)) |
477 | libc_feupdateenv_test_387 (&ctx->env, 0); |
478 | } |
479 | |
480 | #ifdef __SSE_MATH__ |
481 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx |
482 | # define libc_fesetenvf_ctx libc_fesetenv_sse_ctx |
483 | # define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx |
484 | # define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx |
485 | # define libc_feresetroundf_ctx libc_feresetround_sse_ctx |
486 | #else |
487 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx |
488 | # define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx |
489 | # define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx |
490 | # define libc_feresetroundf_ctx libc_feresetround_387_ctx |
491 | #endif /* __SSE_MATH__ */ |
492 | |
493 | #ifdef __SSE2_MATH__ |
494 | # if defined (__x86_64__) || !defined (MATH_SET_BOTH_ROUNDING_MODES) |
495 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx |
496 | # define libc_fesetenv_ctx libc_fesetenv_sse_ctx |
497 | # define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx |
498 | # define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx |
499 | # define libc_feresetround_ctx libc_feresetround_sse_ctx |
500 | # else |
501 | # define libc_feholdexcept_setround_ctx default_libc_feholdexcept_setround_ctx |
502 | # define libc_fesetenv_ctx default_libc_fesetenv_ctx |
503 | # define libc_feupdateenv_ctx default_libc_feupdateenv_ctx |
504 | # define libc_feholdsetround_ctx default_libc_feholdsetround_ctx |
505 | # define libc_feresetround_ctx default_libc_feresetround_ctx |
506 | # endif |
507 | #else |
508 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx |
509 | # define libc_feupdateenv_ctx libc_feupdateenv_387_ctx |
510 | # define libc_feholdsetround_ctx libc_feholdsetround_387_ctx |
511 | # define libc_feresetround_ctx libc_feresetround_387_ctx |
512 | #endif /* __SSE2_MATH__ */ |
513 | |
514 | #define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx |
515 | #define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx |
516 | #define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx |
517 | #define libc_feresetroundl_ctx libc_feresetround_387_ctx |
518 | |
519 | #ifndef __SSE2_MATH__ |
520 | # define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx |
521 | # define libc_feresetround_53bit_ctx libc_feresetround_387_ctx |
522 | #endif |
523 | |
524 | #undef __mxcsr |
525 | |
526 | #endif /* FENV_PRIVATE_H */ |
527 | |