1 | #ifndef FENV_PRIVATE_H |
2 | #define FENV_PRIVATE_H 1 |
3 | |
4 | #include <fenv.h> |
5 | #include <fpu_control.h> |
6 | |
7 | #ifdef __SSE2_MATH__ |
8 | # define math_opt_barrier(x) \ |
9 | ({ __typeof(x) __x; \ |
10 | if (sizeof (x) <= sizeof (double)) \ |
11 | __asm ("" : "=x" (__x) : "0" (x)); \ |
12 | else \ |
13 | __asm ("" : "=t" (__x) : "0" (x)); \ |
14 | __x; }) |
15 | # define math_force_eval(x) \ |
16 | do { \ |
17 | if (sizeof (x) <= sizeof (double)) \ |
18 | __asm __volatile ("" : : "x" (x)); \ |
19 | else \ |
20 | __asm __volatile ("" : : "f" (x)); \ |
21 | } while (0) |
22 | #else |
23 | # define math_opt_barrier(x) \ |
24 | ({ __typeof (x) __x; \ |
25 | __asm ("" : "=t" (__x) : "0" (x)); \ |
26 | __x; }) |
27 | # define math_force_eval(x) \ |
28 | do { \ |
29 | __typeof (x) __x = (x); \ |
30 | if (sizeof (x) <= sizeof (double)) \ |
31 | __asm __volatile ("" : : "m" (__x)); \ |
32 | else \ |
33 | __asm __volatile ("" : : "f" (__x)); \ |
34 | } while (0) |
35 | #endif |
36 | |
37 | /* This file is used by both the 32- and 64-bit ports. The 64-bit port |
38 | has a field in the fenv_t for the mxcsr; the 32-bit port does not. |
39 | Instead, we (ab)use the only 32-bit field extant in the struct. */ |
40 | #ifndef __x86_64__ |
41 | # define __mxcsr __eip |
42 | #endif |
43 | |
44 | |
45 | /* All of these functions are private to libm, and are all used in pairs |
46 | to save+change the fp state and restore the original state. Thus we |
47 | need not care for both the 387 and the sse unit, only the one we're |
48 | actually using. */ |
49 | |
50 | #if defined __AVX__ || defined SSE2AVX |
51 | # define STMXCSR "vstmxcsr" |
52 | # define LDMXCSR "vldmxcsr" |
53 | #else |
54 | # define STMXCSR "stmxcsr" |
55 | # define LDMXCSR "ldmxcsr" |
56 | #endif |
57 | |
58 | static __always_inline void |
59 | libc_feholdexcept_sse (fenv_t *e) |
60 | { |
61 | unsigned int mxcsr; |
62 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
63 | e->__mxcsr = mxcsr; |
64 | mxcsr = (mxcsr | 0x1f80) & ~0x3f; |
65 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
66 | } |
67 | |
68 | static __always_inline void |
69 | libc_feholdexcept_387 (fenv_t *e) |
70 | { |
71 | /* Recall that fnstenv has a side-effect of masking exceptions. |
72 | Clobber all of the fp registers so that the TOS field is 0. */ |
73 | asm volatile ("fnstenv %0; fnclex" |
74 | : "=m" (*e) |
75 | : : "st" , "st(1)" , "st(2)" , "st(3)" , |
76 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
77 | } |
78 | |
79 | static __always_inline void |
80 | libc_fesetround_sse (int r) |
81 | { |
82 | unsigned int mxcsr; |
83 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
84 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
85 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
86 | } |
87 | |
88 | static __always_inline void |
89 | libc_fesetround_387 (int r) |
90 | { |
91 | fpu_control_t cw; |
92 | _FPU_GETCW (cw); |
93 | cw = (cw & ~0xc00) | r; |
94 | _FPU_SETCW (cw); |
95 | } |
96 | |
97 | static __always_inline void |
98 | libc_feholdexcept_setround_sse (fenv_t *e, int r) |
99 | { |
100 | unsigned int mxcsr; |
101 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
102 | e->__mxcsr = mxcsr; |
103 | mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
104 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
105 | } |
106 | |
107 | /* Set both rounding mode and precision. A convenience function for use |
108 | by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */ |
109 | static __always_inline void |
110 | libc_feholdexcept_setround_387_prec (fenv_t *e, int r) |
111 | { |
112 | libc_feholdexcept_387 (e); |
113 | |
114 | fpu_control_t cw = e->__control_word; |
115 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
116 | cw |= r | 0x3f; |
117 | _FPU_SETCW (cw); |
118 | } |
119 | |
120 | static __always_inline void |
121 | libc_feholdexcept_setround_387 (fenv_t *e, int r) |
122 | { |
123 | libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED); |
124 | } |
125 | |
126 | static __always_inline void |
127 | libc_feholdexcept_setround_387_53bit (fenv_t *e, int r) |
128 | { |
129 | libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE); |
130 | } |
131 | |
132 | static __always_inline int |
133 | libc_fetestexcept_sse (int e) |
134 | { |
135 | unsigned int mxcsr; |
136 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
137 | return mxcsr & e & FE_ALL_EXCEPT; |
138 | } |
139 | |
140 | static __always_inline int |
141 | libc_fetestexcept_387 (int ex) |
142 | { |
143 | fexcept_t temp; |
144 | asm volatile ("fnstsw %0" : "=a" (temp)); |
145 | return temp & ex & FE_ALL_EXCEPT; |
146 | } |
147 | |
148 | static __always_inline void |
149 | libc_fesetenv_sse (fenv_t *e) |
150 | { |
151 | asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr)); |
152 | } |
153 | |
154 | static __always_inline void |
155 | libc_fesetenv_387 (fenv_t *e) |
156 | { |
157 | /* Clobber all fp registers so that the TOS value we saved earlier is |
158 | compatible with the current state of the compiler. */ |
159 | asm volatile ("fldenv %0" |
160 | : : "m" (*e) |
161 | : "st" , "st(1)" , "st(2)" , "st(3)" , |
162 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
163 | } |
164 | |
165 | static __always_inline int |
166 | libc_feupdateenv_test_sse (fenv_t *e, int ex) |
167 | { |
168 | unsigned int mxcsr, old_mxcsr, cur_ex; |
169 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
170 | cur_ex = mxcsr & FE_ALL_EXCEPT; |
171 | |
172 | /* Merge current exceptions with the old environment. */ |
173 | old_mxcsr = e->__mxcsr; |
174 | mxcsr = old_mxcsr | cur_ex; |
175 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
176 | |
177 | /* Raise SIGFPE for any new exceptions since the hold. Expect that |
178 | the normal environment has all exceptions masked. */ |
179 | if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex)) |
180 | __feraiseexcept (cur_ex); |
181 | |
182 | /* Test for exceptions raised since the hold. */ |
183 | return cur_ex & ex; |
184 | } |
185 | |
186 | static __always_inline int |
187 | libc_feupdateenv_test_387 (fenv_t *e, int ex) |
188 | { |
189 | fexcept_t cur_ex; |
190 | |
191 | /* Save current exceptions. */ |
192 | asm volatile ("fnstsw %0" : "=a" (cur_ex)); |
193 | cur_ex &= FE_ALL_EXCEPT; |
194 | |
195 | /* Reload original environment. */ |
196 | libc_fesetenv_387 (e); |
197 | |
198 | /* Merge current exceptions. */ |
199 | __feraiseexcept (cur_ex); |
200 | |
201 | /* Test for exceptions raised since the hold. */ |
202 | return cur_ex & ex; |
203 | } |
204 | |
205 | static __always_inline void |
206 | libc_feupdateenv_sse (fenv_t *e) |
207 | { |
208 | libc_feupdateenv_test_sse (e, 0); |
209 | } |
210 | |
211 | static __always_inline void |
212 | libc_feupdateenv_387 (fenv_t *e) |
213 | { |
214 | libc_feupdateenv_test_387 (e, 0); |
215 | } |
216 | |
217 | static __always_inline void |
218 | libc_feholdsetround_sse (fenv_t *e, int r) |
219 | { |
220 | unsigned int mxcsr; |
221 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
222 | e->__mxcsr = mxcsr; |
223 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
224 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
225 | } |
226 | |
227 | static __always_inline void |
228 | libc_feholdsetround_387_prec (fenv_t *e, int r) |
229 | { |
230 | fpu_control_t cw; |
231 | |
232 | _FPU_GETCW (cw); |
233 | e->__control_word = cw; |
234 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
235 | cw |= r; |
236 | _FPU_SETCW (cw); |
237 | } |
238 | |
239 | static __always_inline void |
240 | libc_feholdsetround_387 (fenv_t *e, int r) |
241 | { |
242 | libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED); |
243 | } |
244 | |
245 | static __always_inline void |
246 | libc_feholdsetround_387_53bit (fenv_t *e, int r) |
247 | { |
248 | libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE); |
249 | } |
250 | |
251 | static __always_inline void |
252 | libc_feresetround_sse (fenv_t *e) |
253 | { |
254 | unsigned int mxcsr; |
255 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
256 | mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); |
257 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
258 | } |
259 | |
260 | static __always_inline void |
261 | libc_feresetround_387 (fenv_t *e) |
262 | { |
263 | _FPU_SETCW (e->__control_word); |
264 | } |
265 | |
266 | #ifdef __SSE_MATH__ |
267 | # define libc_feholdexceptf libc_feholdexcept_sse |
268 | # define libc_fesetroundf libc_fesetround_sse |
269 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse |
270 | # define libc_fetestexceptf libc_fetestexcept_sse |
271 | # define libc_fesetenvf libc_fesetenv_sse |
272 | # define libc_feupdateenv_testf libc_feupdateenv_test_sse |
273 | # define libc_feupdateenvf libc_feupdateenv_sse |
274 | # define libc_feholdsetroundf libc_feholdsetround_sse |
275 | # define libc_feresetroundf libc_feresetround_sse |
276 | #else |
277 | # define libc_feholdexceptf libc_feholdexcept_387 |
278 | # define libc_fesetroundf libc_fesetround_387 |
279 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_387 |
280 | # define libc_fetestexceptf libc_fetestexcept_387 |
281 | # define libc_fesetenvf libc_fesetenv_387 |
282 | # define libc_feupdateenv_testf libc_feupdateenv_test_387 |
283 | # define libc_feupdateenvf libc_feupdateenv_387 |
284 | # define libc_feholdsetroundf libc_feholdsetround_387 |
285 | # define libc_feresetroundf libc_feresetround_387 |
286 | #endif /* __SSE_MATH__ */ |
287 | |
288 | #ifdef __SSE2_MATH__ |
289 | # define libc_feholdexcept libc_feholdexcept_sse |
290 | # define libc_fesetround libc_fesetround_sse |
291 | # define libc_feholdexcept_setround libc_feholdexcept_setround_sse |
292 | # define libc_fetestexcept libc_fetestexcept_sse |
293 | # define libc_fesetenv libc_fesetenv_sse |
294 | # define libc_feupdateenv_test libc_feupdateenv_test_sse |
295 | # define libc_feupdateenv libc_feupdateenv_sse |
296 | # define libc_feholdsetround libc_feholdsetround_sse |
297 | # define libc_feresetround libc_feresetround_sse |
298 | #else |
299 | # define libc_feholdexcept libc_feholdexcept_387 |
300 | # define libc_fesetround libc_fesetround_387 |
301 | # define libc_feholdexcept_setround libc_feholdexcept_setround_387 |
302 | # define libc_fetestexcept libc_fetestexcept_387 |
303 | # define libc_fesetenv libc_fesetenv_387 |
304 | # define libc_feupdateenv_test libc_feupdateenv_test_387 |
305 | # define libc_feupdateenv libc_feupdateenv_387 |
306 | # define libc_feholdsetround libc_feholdsetround_387 |
307 | # define libc_feresetround libc_feresetround_387 |
308 | #endif /* __SSE2_MATH__ */ |
309 | |
310 | #define libc_feholdexceptl libc_feholdexcept_387 |
311 | #define libc_fesetroundl libc_fesetround_387 |
312 | #define libc_feholdexcept_setroundl libc_feholdexcept_setround_387 |
313 | #define libc_fetestexceptl libc_fetestexcept_387 |
314 | #define libc_fesetenvl libc_fesetenv_387 |
315 | #define libc_feupdateenv_testl libc_feupdateenv_test_387 |
316 | #define libc_feupdateenvl libc_feupdateenv_387 |
317 | #define libc_feholdsetroundl libc_feholdsetround_387 |
318 | #define libc_feresetroundl libc_feresetround_387 |
319 | |
320 | #ifndef __SSE2_MATH__ |
321 | # define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit |
322 | # define libc_feholdsetround_53bit libc_feholdsetround_387_53bit |
323 | #endif |
324 | |
325 | /* We have support for rounding mode context. */ |
326 | #define HAVE_RM_CTX 1 |
327 | |
328 | static __always_inline void |
329 | libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) |
330 | { |
331 | unsigned int mxcsr, new_mxcsr; |
332 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
333 | new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
334 | |
335 | ctx->env.__mxcsr = mxcsr; |
336 | if (__glibc_unlikely (mxcsr != new_mxcsr)) |
337 | { |
338 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
339 | ctx->updated_status = true; |
340 | } |
341 | else |
342 | ctx->updated_status = false; |
343 | } |
344 | |
345 | /* Unconditional since we want to overwrite any exceptions that occurred in the |
346 | context. This is also why all fehold* functions unconditionally write into |
347 | ctx->env. */ |
348 | static __always_inline void |
349 | libc_fesetenv_sse_ctx (struct rm_ctx *ctx) |
350 | { |
351 | libc_fesetenv_sse (&ctx->env); |
352 | } |
353 | |
354 | static __always_inline void |
355 | libc_feupdateenv_sse_ctx (struct rm_ctx *ctx) |
356 | { |
357 | if (__glibc_unlikely (ctx->updated_status)) |
358 | libc_feupdateenv_test_sse (&ctx->env, 0); |
359 | } |
360 | |
361 | static __always_inline void |
362 | libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r) |
363 | { |
364 | libc_feholdexcept_387 (&ctx->env); |
365 | |
366 | fpu_control_t cw = ctx->env.__control_word; |
367 | fpu_control_t old_cw = cw; |
368 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
369 | cw |= r | 0x3f; |
370 | |
371 | if (__glibc_unlikely (old_cw != cw)) |
372 | { |
373 | _FPU_SETCW (cw); |
374 | ctx->updated_status = true; |
375 | } |
376 | else |
377 | ctx->updated_status = false; |
378 | } |
379 | |
380 | static __always_inline void |
381 | libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r) |
382 | { |
383 | libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED); |
384 | } |
385 | |
386 | static __always_inline void |
387 | libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
388 | { |
389 | libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE); |
390 | } |
391 | |
392 | static __always_inline void |
393 | libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r) |
394 | { |
395 | fpu_control_t cw, new_cw; |
396 | |
397 | _FPU_GETCW (cw); |
398 | new_cw = cw; |
399 | new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
400 | new_cw |= r; |
401 | |
402 | ctx->env.__control_word = cw; |
403 | if (__glibc_unlikely (new_cw != cw)) |
404 | { |
405 | _FPU_SETCW (new_cw); |
406 | ctx->updated_status = true; |
407 | } |
408 | else |
409 | ctx->updated_status = false; |
410 | } |
411 | |
412 | static __always_inline void |
413 | libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r) |
414 | { |
415 | libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED); |
416 | } |
417 | |
418 | static __always_inline void |
419 | libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
420 | { |
421 | libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE); |
422 | } |
423 | |
424 | static __always_inline void |
425 | libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) |
426 | { |
427 | unsigned int mxcsr, new_mxcsr; |
428 | |
429 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
430 | new_mxcsr = (mxcsr & ~0x6000) | (r << 3); |
431 | |
432 | ctx->env.__mxcsr = mxcsr; |
433 | if (__glibc_unlikely (new_mxcsr != mxcsr)) |
434 | { |
435 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
436 | ctx->updated_status = true; |
437 | } |
438 | else |
439 | ctx->updated_status = false; |
440 | } |
441 | |
442 | static __always_inline void |
443 | libc_feresetround_sse_ctx (struct rm_ctx *ctx) |
444 | { |
445 | if (__glibc_unlikely (ctx->updated_status)) |
446 | libc_feresetround_sse (&ctx->env); |
447 | } |
448 | |
449 | static __always_inline void |
450 | libc_feresetround_387_ctx (struct rm_ctx *ctx) |
451 | { |
452 | if (__glibc_unlikely (ctx->updated_status)) |
453 | _FPU_SETCW (ctx->env.__control_word); |
454 | } |
455 | |
456 | static __always_inline void |
457 | libc_feupdateenv_387_ctx (struct rm_ctx *ctx) |
458 | { |
459 | if (__glibc_unlikely (ctx->updated_status)) |
460 | libc_feupdateenv_test_387 (&ctx->env, 0); |
461 | } |
462 | |
463 | #ifdef __SSE_MATH__ |
464 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx |
465 | # define libc_fesetenvf_ctx libc_fesetenv_sse_ctx |
466 | # define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx |
467 | # define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx |
468 | # define libc_feresetroundf_ctx libc_feresetround_sse_ctx |
469 | #else |
470 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx |
471 | # define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx |
472 | # define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx |
473 | # define libc_feresetroundf_ctx libc_feresetround_387_ctx |
474 | #endif /* __SSE_MATH__ */ |
475 | |
476 | #ifdef __SSE2_MATH__ |
477 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx |
478 | # define libc_fesetenv_ctx libc_fesetenv_sse_ctx |
479 | # define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx |
480 | # define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx |
481 | # define libc_feresetround_ctx libc_feresetround_sse_ctx |
482 | #else |
483 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx |
484 | # define libc_feupdateenv_ctx libc_feupdateenv_387_ctx |
485 | # define libc_feholdsetround_ctx libc_feholdsetround_387_ctx |
486 | # define libc_feresetround_ctx libc_feresetround_387_ctx |
487 | #endif /* __SSE2_MATH__ */ |
488 | |
489 | #define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx |
490 | #define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx |
491 | #define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx |
492 | #define libc_feresetroundl_ctx libc_feresetround_387_ctx |
493 | |
494 | #ifndef __SSE2_MATH__ |
495 | # define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx |
496 | # define libc_feresetround_53bit_ctx libc_feresetround_387_ctx |
497 | #endif |
498 | |
499 | #undef __mxcsr |
500 | |
501 | #endif /* FENV_PRIVATE_H */ |
502 | |