1 | /* Inline math functions for i387 and SSE. |
2 | Copyright (C) 1995-2017 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef _MATH_H |
20 | # error "Never use <bits/mathinline.h> directly; include <math.h> instead." |
21 | #endif |
22 | |
23 | #ifndef __extern_always_inline |
24 | # define __MATH_INLINE __inline |
25 | #else |
26 | # define __MATH_INLINE __extern_always_inline |
27 | #endif |
28 | |
29 | |
30 | #if defined __USE_ISOC99 && defined __GNUC__ && __GNUC__ >= 2 |
31 | /* GCC 2.97 and up have builtins that actually can be used. */ |
32 | # if !__GNUC_PREREQ (2,97) |
33 | /* ISO C99 defines some macros to perform unordered comparisons. The |
34 | ix87 FPU supports this with special opcodes and we should use them. |
35 | These must not be inline functions since we have to be able to handle |
36 | all floating-point types. */ |
37 | # undef isgreater |
38 | # undef isgreaterequal |
39 | # undef isless |
40 | # undef islessequal |
41 | # undef islessgreater |
42 | # undef isunordered |
43 | # ifdef __i686__ |
44 | /* For the PentiumPro and more recent processors we can provide |
45 | better code. */ |
46 | # define isgreater(x, y) \ |
47 | ({ register char __result; \ |
48 | __asm__ ("fucomip %%st(1), %%st; seta %%al" \ |
49 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ |
50 | __result; }) |
51 | # define isgreaterequal(x, y) \ |
52 | ({ register char __result; \ |
53 | __asm__ ("fucomip %%st(1), %%st; setae %%al" \ |
54 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ |
55 | __result; }) |
56 | |
57 | # define isless(x, y) \ |
58 | ({ register char __result; \ |
59 | __asm__ ("fucomip %%st(1), %%st; seta %%al" \ |
60 | : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ |
61 | __result; }) |
62 | |
63 | # define islessequal(x, y) \ |
64 | ({ register char __result; \ |
65 | __asm__ ("fucomip %%st(1), %%st; setae %%al" \ |
66 | : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ |
67 | __result; }) |
68 | |
69 | # define islessgreater(x, y) \ |
70 | ({ register char __result; \ |
71 | __asm__ ("fucomip %%st(1), %%st; setne %%al" \ |
72 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ |
73 | __result; }) |
74 | |
75 | # define isunordered(x, y) \ |
76 | ({ register char __result; \ |
77 | __asm__ ("fucomip %%st(1), %%st; setp %%al" \ |
78 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ |
79 | __result; }) |
80 | # else |
81 | /* This is the dumb, portable code for i386 and above. */ |
82 | # define isgreater(x, y) \ |
83 | ({ register char __result; \ |
84 | __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ |
85 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ |
86 | __result; }) |
87 | |
88 | # define isgreaterequal(x, y) \ |
89 | ({ register char __result; \ |
90 | __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ |
91 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ |
92 | __result; }) |
93 | |
94 | # define isless(x, y) \ |
95 | ({ register char __result; \ |
96 | __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ |
97 | : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ |
98 | __result; }) |
99 | |
100 | # define islessequal(x, y) \ |
101 | ({ register char __result; \ |
102 | __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ |
103 | : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ |
104 | __result; }) |
105 | |
106 | # define islessgreater(x, y) \ |
107 | ({ register char __result; \ |
108 | __asm__ ("fucompp; fnstsw; testb $0x44, %%ah; setz %%al" \ |
109 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ |
110 | __result; }) |
111 | |
112 | # define isunordered(x, y) \ |
113 | ({ register char __result; \ |
114 | __asm__ ("fucompp; fnstsw; sahf; setp %%al" \ |
115 | : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ |
116 | __result; }) |
117 | # endif /* __i686__ */ |
118 | # endif /* GCC 2.97 */ |
119 | |
120 | /* The gcc, version 2.7 or below, has problems with all this inlining |
121 | code. So disable it for this version of the compiler. */ |
122 | # if __GNUC_PREREQ (2, 8) |
123 | __BEGIN_NAMESPACE_C99 |
124 | |
125 | /* Test for negative number. Used in the signbit() macro. */ |
126 | __MATH_INLINE int |
127 | __NTH (__signbitf (float __x)) |
128 | { |
129 | # ifdef __SSE2_MATH__ |
130 | int __m; |
131 | __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); |
132 | return (__m & 0x8) != 0; |
133 | # else |
134 | __extension__ union { float __f; int __i; } __u = { __f: __x }; |
135 | return __u.__i < 0; |
136 | # endif |
137 | } |
138 | __MATH_INLINE int |
139 | __NTH (__signbit (double __x)) |
140 | { |
141 | # ifdef __SSE2_MATH__ |
142 | int __m; |
143 | __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); |
144 | return (__m & 0x80) != 0; |
145 | # else |
146 | __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; |
147 | return __u.__i[1] < 0; |
148 | # endif |
149 | } |
150 | __MATH_INLINE int |
151 | __NTH (__signbitl (long double __x)) |
152 | { |
153 | __extension__ union { long double __l; int __i[3]; } __u = { __l: __x }; |
154 | return (__u.__i[2] & 0x8000) != 0; |
155 | } |
156 | |
157 | __END_NAMESPACE_C99 |
158 | # endif |
159 | #endif |
160 | |
161 | |
162 | /* The gcc, version 2.7 or below, has problems with all this inlining |
163 | code. So disable it for this version of the compiler. */ |
164 | #if __GNUC_PREREQ (2, 8) |
165 | # if !__GNUC_PREREQ (3, 4) && !defined __NO_MATH_INLINES \ |
166 | && defined __OPTIMIZE__ |
167 | /* GCC 3.4 introduced builtins for all functions below, so |
168 | there's no need to define any of these inline functions. */ |
169 | |
170 | # ifdef __USE_ISOC99 |
171 | __BEGIN_NAMESPACE_C99 |
172 | |
173 | /* Round to nearest integer. */ |
174 | # ifdef __SSE_MATH__ |
175 | __MATH_INLINE long int |
176 | __NTH (lrintf (float __x)) |
177 | { |
178 | long int __res; |
179 | /* Mark as volatile since the result is dependent on the state of |
180 | the SSE control register (the rounding mode). Otherwise GCC might |
181 | remove these assembler instructions since it does not know about |
182 | the rounding mode change and cannot currently be told. */ |
183 | __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); |
184 | return __res; |
185 | } |
186 | # endif |
187 | # ifdef __SSE2_MATH__ |
188 | __MATH_INLINE long int |
189 | __NTH (lrint (double __x)) |
190 | { |
191 | long int __res; |
192 | /* Mark as volatile since the result is dependent on the state of |
193 | the SSE control register (the rounding mode). Otherwise GCC might |
194 | remove these assembler instructions since it does not know about |
195 | the rounding mode change and cannot currently be told. */ |
196 | __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); |
197 | return __res; |
198 | } |
199 | # endif |
200 | # ifdef __x86_64__ |
201 | __extension__ |
202 | __MATH_INLINE long long int |
203 | __NTH (llrintf (float __x)) |
204 | { |
205 | long long int __res; |
206 | /* Mark as volatile since the result is dependent on the state of |
207 | the SSE control register (the rounding mode). Otherwise GCC might |
208 | remove these assembler instructions since it does not know about |
209 | the rounding mode change and cannot currently be told. */ |
210 | __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); |
211 | return __res; |
212 | } |
213 | __extension__ |
214 | __MATH_INLINE long long int |
215 | __NTH (llrint (double __x)) |
216 | { |
217 | long long int __res; |
218 | /* Mark as volatile since the result is dependent on the state of |
219 | the SSE control register (the rounding mode). Otherwise GCC might |
220 | remove these assembler instructions since it does not know about |
221 | the rounding mode change and cannot currently be told. */ |
222 | __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); |
223 | return __res; |
224 | } |
225 | # endif |
226 | |
227 | # if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ |
228 | && defined __SSE2_MATH__ |
229 | /* Determine maximum of two values. */ |
230 | __MATH_INLINE float |
231 | __NTH (fmaxf (float __x, float __y)) |
232 | { |
233 | # ifdef __AVX__ |
234 | float __res; |
235 | __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); |
236 | return __res; |
237 | # else |
238 | __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y)); |
239 | return __x; |
240 | # endif |
241 | } |
242 | __MATH_INLINE double |
243 | __NTH (fmax (double __x, double __y)) |
244 | { |
245 | # ifdef __AVX__ |
246 | float __res; |
247 | __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); |
248 | return __res; |
249 | # else |
250 | __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y)); |
251 | return __x; |
252 | # endif |
253 | } |
254 | |
255 | /* Determine minimum of two values. */ |
256 | __MATH_INLINE float |
257 | __NTH (fminf (float __x, float __y)) |
258 | { |
259 | # ifdef __AVX__ |
260 | float __res; |
261 | __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); |
262 | return __res; |
263 | # else |
264 | __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y)); |
265 | return __x; |
266 | # endif |
267 | } |
268 | __MATH_INLINE double |
269 | __NTH (fmin (double __x, double __y)) |
270 | { |
271 | # ifdef __AVX__ |
272 | float __res; |
273 | __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); |
274 | return __res; |
275 | # else |
276 | __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y)); |
277 | return __x; |
278 | # endif |
279 | } |
280 | # endif |
281 | |
282 | __END_NAMESPACE_C99 |
283 | # endif |
284 | |
285 | # if defined __SSE4_1__ && defined __SSE2_MATH__ |
286 | # if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 |
287 | __BEGIN_NAMESPACE_C99 |
288 | |
289 | /* Round to nearest integer. */ |
290 | __MATH_INLINE double |
291 | __NTH (rint (double __x)) |
292 | { |
293 | double __res; |
294 | /* Mark as volatile since the result is dependent on the state of |
295 | the SSE control register (the rounding mode). Otherwise GCC might |
296 | remove these assembler instructions since it does not know about |
297 | the rounding mode change and cannot currently be told. */ |
298 | __asm __volatile__ ("roundsd $4, %1, %0" : "=x" (__res) : "xm" (__x)); |
299 | return __res; |
300 | } |
301 | __MATH_INLINE float |
302 | __NTH (rintf (float __x)) |
303 | { |
304 | float __res; |
305 | /* Mark as volatile since the result is dependent on the state of |
306 | the SSE control register (the rounding mode). Otherwise GCC might |
307 | remove these assembler instructions since it does not know about |
308 | the rounding mode change and cannot currently be told. */ |
309 | __asm __volatile__ ("roundss $4, %1, %0" : "=x" (__res) : "xm" (__x)); |
310 | return __res; |
311 | } |
312 | |
313 | # ifdef __USE_ISOC99 |
314 | /* Round to nearest integer without raising inexact exception. */ |
315 | __MATH_INLINE double |
316 | __NTH (nearbyint (double __x)) |
317 | { |
318 | double __res; |
319 | /* Mark as volatile since the result is dependent on the state of |
320 | the SSE control register (the rounding mode). Otherwise GCC might |
321 | remove these assembler instructions since it does not know about |
322 | the rounding mode change and cannot currently be told. */ |
323 | __asm __volatile__ ("roundsd $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); |
324 | return __res; |
325 | } |
326 | __MATH_INLINE float |
327 | __NTH (nearbyintf (float __x)) |
328 | { |
329 | float __res; |
330 | /* Mark as volatile since the result is dependent on the state of |
331 | the SSE control register (the rounding mode). Otherwise GCC might |
332 | remove these assembler instructions since it does not know about |
333 | the rounding mode change and cannot currently be told. */ |
334 | __asm __volatile__ ("roundss $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); |
335 | return __res; |
336 | } |
337 | # endif |
338 | |
339 | __END_NAMESPACE_C99 |
340 | # endif |
341 | |
342 | __BEGIN_NAMESPACE_STD |
343 | /* Smallest integral value not less than X. */ |
344 | __MATH_INLINE double |
345 | __NTH (ceil (double __x)) |
346 | { |
347 | double __res; |
348 | __asm ("roundsd $2, %1, %0" : "=x" (__res) : "xm" (__x)); |
349 | return __res; |
350 | } |
351 | __END_NAMESPACE_STD |
352 | |
353 | __BEGIN_NAMESPACE_C99 |
354 | __MATH_INLINE float |
355 | __NTH (ceilf (float __x)) |
356 | { |
357 | float __res; |
358 | __asm ("roundss $2, %1, %0" : "=x" (__res) : "xm" (__x)); |
359 | return __res; |
360 | } |
361 | __END_NAMESPACE_C99 |
362 | |
363 | __BEGIN_NAMESPACE_STD |
364 | /* Largest integer not greater than X. */ |
365 | __MATH_INLINE double |
366 | __NTH (floor (double __x)) |
367 | { |
368 | double __res; |
369 | __asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" (__x)); |
370 | return __res; |
371 | } |
372 | __END_NAMESPACE_STD |
373 | |
374 | __BEGIN_NAMESPACE_C99 |
375 | __MATH_INLINE float |
376 | __NTH (floorf (float __x)) |
377 | { |
378 | float __res; |
379 | __asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" (__x)); |
380 | return __res; |
381 | } |
382 | __END_NAMESPACE_C99 |
383 | # endif |
384 | # endif |
385 | #endif |
386 | |
387 | /* Disable x87 inlines when -fpmath=sse is passed and also when we're building |
388 | on x86_64. Older gcc (gcc-3.2 for example) does not define __SSE2_MATH__ |
389 | for x86_64. */ |
390 | #if !defined __SSE2_MATH__ && !defined __x86_64__ |
391 | # if ((!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ |
392 | && defined __OPTIMIZE__) |
393 | |
394 | /* The inline functions do not set errno or raise necessarily the |
395 | correct exceptions. */ |
396 | # undef math_errhandling |
397 | |
398 | /* A macro to define float, double, and long double versions of various |
399 | math functions for the ix87 FPU. FUNC is the function name (which will |
400 | be suffixed with f and l for the float and long double version, |
401 | respectively). OP is the name of the FPU operation. |
402 | We define two sets of macros. The set with the additional NP |
403 | doesn't add a prototype declaration. */ |
404 | |
405 | # ifdef __USE_ISOC99 |
406 | # define __inline_mathop(func, op) \ |
407 | __inline_mathop_ (double, func, op) \ |
408 | __inline_mathop_ (float, __CONCAT(func,f), op) \ |
409 | __inline_mathop_ (long double, __CONCAT(func,l), op) |
410 | # define __inline_mathopNP(func, op) \ |
411 | __inline_mathopNP_ (double, func, op) \ |
412 | __inline_mathopNP_ (float, __CONCAT(func,f), op) \ |
413 | __inline_mathopNP_ (long double, __CONCAT(func,l), op) |
414 | # else |
415 | # define __inline_mathop(func, op) \ |
416 | __inline_mathop_ (double, func, op) |
417 | # define __inline_mathopNP(func, op) \ |
418 | __inline_mathopNP_ (double, func, op) |
419 | # endif |
420 | |
421 | # define __inline_mathop_(float_type, func, op) \ |
422 | __inline_mathop_decl_ (float_type, func, op, "0" (__x)) |
423 | # define __inline_mathopNP_(float_type, func, op) \ |
424 | __inline_mathop_declNP_ (float_type, func, op, "0" (__x)) |
425 | |
426 | |
427 | # ifdef __USE_ISOC99 |
428 | # define __inline_mathop_decl(func, op, params...) \ |
429 | __inline_mathop_decl_ (double, func, op, params) \ |
430 | __inline_mathop_decl_ (float, __CONCAT(func,f), op, params) \ |
431 | __inline_mathop_decl_ (long double, __CONCAT(func,l), op, params) |
432 | # define __inline_mathop_declNP(func, op, params...) \ |
433 | __inline_mathop_declNP_ (double, func, op, params) \ |
434 | __inline_mathop_declNP_ (float, __CONCAT(func,f), op, params) \ |
435 | __inline_mathop_declNP_ (long double, __CONCAT(func,l), op, params) |
436 | # else |
437 | # define __inline_mathop_decl(func, op, params...) \ |
438 | __inline_mathop_decl_ (double, func, op, params) |
439 | # define __inline_mathop_declNP(func, op, params...) \ |
440 | __inline_mathop_declNP_ (double, func, op, params) |
441 | # endif |
442 | |
443 | # define __inline_mathop_decl_(float_type, func, op, params...) \ |
444 | __MATH_INLINE float_type func (float_type) __THROW; \ |
445 | __inline_mathop_declNP_ (float_type, func, op, params) |
446 | |
447 | # define __inline_mathop_declNP_(float_type, func, op, params...) \ |
448 | __MATH_INLINE float_type __NTH (func (float_type __x)) \ |
449 | { \ |
450 | register float_type __result; \ |
451 | __asm __volatile__ (op : "=t" (__result) : params); \ |
452 | return __result; \ |
453 | } |
454 | |
455 | |
456 | # ifdef __USE_ISOC99 |
457 | # define __inline_mathcode(func, arg, code) \ |
458 | __inline_mathcode_ (double, func, arg, code) \ |
459 | __inline_mathcode_ (float, __CONCAT(func,f), arg, code) \ |
460 | __inline_mathcode_ (long double, __CONCAT(func,l), arg, code) |
461 | # define __inline_mathcodeNP(func, arg, code) \ |
462 | __inline_mathcodeNP_ (double, func, arg, code) \ |
463 | __inline_mathcodeNP_ (float, __CONCAT(func,f), arg, code) \ |
464 | __inline_mathcodeNP_ (long double, __CONCAT(func,l), arg, code) |
465 | # define __inline_mathcode2(func, arg1, arg2, code) \ |
466 | __inline_mathcode2_ (double, func, arg1, arg2, code) \ |
467 | __inline_mathcode2_ (float, __CONCAT(func,f), arg1, arg2, code) \ |
468 | __inline_mathcode2_ (long double, __CONCAT(func,l), arg1, arg2, code) |
469 | # define __inline_mathcodeNP2(func, arg1, arg2, code) \ |
470 | __inline_mathcodeNP2_ (double, func, arg1, arg2, code) \ |
471 | __inline_mathcodeNP2_ (float, __CONCAT(func,f), arg1, arg2, code) \ |
472 | __inline_mathcodeNP2_ (long double, __CONCAT(func,l), arg1, arg2, code) |
473 | # define __inline_mathcode3(func, arg1, arg2, arg3, code) \ |
474 | __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) \ |
475 | __inline_mathcode3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ |
476 | __inline_mathcode3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) |
477 | # define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ |
478 | __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) \ |
479 | __inline_mathcodeNP3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ |
480 | __inline_mathcodeNP3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) |
481 | # else |
482 | # define __inline_mathcode(func, arg, code) \ |
483 | __inline_mathcode_ (double, func, (arg), code) |
484 | # define __inline_mathcodeNP(func, arg, code) \ |
485 | __inline_mathcodeNP_ (double, func, (arg), code) |
486 | # define __inline_mathcode2(func, arg1, arg2, code) \ |
487 | __inline_mathcode2_ (double, func, arg1, arg2, code) |
488 | # define __inline_mathcodeNP2(func, arg1, arg2, code) \ |
489 | __inline_mathcodeNP2_ (double, func, arg1, arg2, code) |
490 | # define __inline_mathcode3(func, arg1, arg2, arg3, code) \ |
491 | __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) |
492 | # define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ |
493 | __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) |
494 | # endif |
495 | |
496 | # define __inline_mathcode_(float_type, func, arg, code) \ |
497 | __MATH_INLINE float_type func (float_type) __THROW; \ |
498 | __inline_mathcodeNP_(float_type, func, arg, code) |
499 | |
500 | # define __inline_mathcodeNP_(float_type, func, arg, code) \ |
501 | __MATH_INLINE float_type __NTH (func (float_type arg)) \ |
502 | { \ |
503 | code; \ |
504 | } |
505 | |
506 | |
507 | # define __inline_mathcode2_(float_type, func, arg1, arg2, code) \ |
508 | __MATH_INLINE float_type func (float_type, float_type) __THROW; \ |
509 | __inline_mathcodeNP2_ (float_type, func, arg1, arg2, code) |
510 | |
511 | # define __inline_mathcodeNP2_(float_type, func, arg1, arg2, code) \ |
512 | __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2)) \ |
513 | { \ |
514 | code; \ |
515 | } |
516 | |
517 | # define __inline_mathcode3_(float_type, func, arg1, arg2, arg3, code) \ |
518 | __MATH_INLINE float_type func (float_type, float_type, float_type) __THROW; \ |
519 | __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) |
520 | |
521 | # define __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) \ |
522 | __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2, \ |
523 | float_type arg3)) \ |
524 | { \ |
525 | code; \ |
526 | } |
527 | # endif |
528 | |
529 | |
530 | # if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ |
531 | /* Miscellaneous functions */ |
532 | |
533 | /* __FAST_MATH__ is defined by gcc -ffast-math. */ |
534 | # ifdef __FAST_MATH__ |
535 | # ifdef __USE_GNU |
536 | # define __sincos_code \ |
537 | register long double __cosr; \ |
538 | register long double __sinr; \ |
539 | register unsigned int __swtmp; \ |
540 | __asm __volatile__ \ |
541 | ("fsincos\n\t" \ |
542 | "fnstsw %w2\n\t" \ |
543 | "testl $0x400, %2\n\t" \ |
544 | "jz 1f\n\t" \ |
545 | "fldpi\n\t" \ |
546 | "fadd %%st(0)\n\t" \ |
547 | "fxch %%st(1)\n\t" \ |
548 | "2: fprem1\n\t" \ |
549 | "fnstsw %w2\n\t" \ |
550 | "testl $0x400, %2\n\t" \ |
551 | "jnz 2b\n\t" \ |
552 | "fstp %%st(1)\n\t" \ |
553 | "fsincos\n\t" \ |
554 | "1:" \ |
555 | : "=t" (__cosr), "=u" (__sinr), "=a" (__swtmp) : "0" (__x)); \ |
556 | *__sinx = __sinr; \ |
557 | *__cosx = __cosr |
558 | |
559 | __MATH_INLINE void |
560 | __NTH (__sincos (double __x, double *__sinx, double *__cosx)) |
561 | { |
562 | __sincos_code; |
563 | } |
564 | |
565 | __MATH_INLINE void |
566 | __NTH (__sincosf (float __x, float *__sinx, float *__cosx)) |
567 | { |
568 | __sincos_code; |
569 | } |
570 | |
571 | __MATH_INLINE void |
572 | __NTH (__sincosl (long double __x, long double *__sinx, long double *__cosx)) |
573 | { |
574 | __sincos_code; |
575 | } |
576 | # endif |
577 | |
578 | |
579 | /* Optimized inline implementation, sometimes with reduced precision |
580 | and/or argument range. */ |
581 | |
582 | # if __GNUC_PREREQ (3, 5) |
583 | # define __expm1_code \ |
584 | register long double __temp; \ |
585 | __temp = __builtin_expm1l (__x); \ |
586 | return __temp ? __temp : __x |
587 | # else |
588 | # define __expm1_code \ |
589 | register long double __value; \ |
590 | register long double __exponent; \ |
591 | register long double __temp; \ |
592 | __asm __volatile__ \ |
593 | ("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" \ |
594 | "fmul %%st(1) # x * log2(e)\n\t" \ |
595 | "fst %%st(1)\n\t" \ |
596 | "frndint # int(x * log2(e))\n\t" \ |
597 | "fxch\n\t" \ |
598 | "fsub %%st(1) # fract(x * log2(e))\n\t" \ |
599 | "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ |
600 | "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" \ |
601 | : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ |
602 | __asm __volatile__ \ |
603 | ("fscale # 2^int(x * log2(e))\n\t" \ |
604 | : "=t" (__temp) : "0" (1.0), "u" (__exponent)); \ |
605 | __temp -= 1.0; \ |
606 | __temp += __value; \ |
607 | return __temp ? __temp : __x |
608 | # endif |
609 | __inline_mathcodeNP_ (long double, __expm1l, __x, __expm1_code) |
610 | |
611 | # if __GNUC_PREREQ (3, 4) |
612 | __inline_mathcodeNP_ (long double, __expl, __x, return __builtin_expl (__x)) |
613 | # else |
614 | # define __exp_code \ |
615 | register long double __value; \ |
616 | register long double __exponent; \ |
617 | __asm __volatile__ \ |
618 | ("fldl2e # e^x = 2^(x * log2(e))\n\t" \ |
619 | "fmul %%st(1) # x * log2(e)\n\t" \ |
620 | "fst %%st(1)\n\t" \ |
621 | "frndint # int(x * log2(e))\n\t" \ |
622 | "fxch\n\t" \ |
623 | "fsub %%st(1) # fract(x * log2(e))\n\t" \ |
624 | "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ |
625 | : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ |
626 | __value += 1.0; \ |
627 | __asm __volatile__ \ |
628 | ("fscale" \ |
629 | : "=t" (__value) : "0" (__value), "u" (__exponent)); \ |
630 | return __value |
631 | __inline_mathcodeNP (exp, __x, __exp_code) |
632 | __inline_mathcodeNP_ (long double, __expl, __x, __exp_code) |
633 | # endif |
634 | |
635 | |
636 | # if !__GNUC_PREREQ (3, 5) |
637 | __inline_mathcodeNP (tan, __x, \ |
638 | register long double __value; \ |
639 | register long double __value2 __attribute__ ((__unused__)); \ |
640 | __asm __volatile__ \ |
641 | ("fptan" \ |
642 | : "=t" (__value2), "=u" (__value) : "0" (__x)); \ |
643 | return __value) |
644 | # endif |
645 | # endif /* __FAST_MATH__ */ |
646 | |
647 | |
648 | # if __GNUC_PREREQ (3, 4) |
649 | __inline_mathcodeNP2_ (long double, __atan2l, __y, __x, |
650 | return __builtin_atan2l (__y, __x)) |
651 | # else |
652 | # define __atan2_code \ |
653 | register long double __value; \ |
654 | __asm __volatile__ \ |
655 | ("fpatan" \ |
656 | : "=t" (__value) : "0" (__x), "u" (__y) : "st(1)"); \ |
657 | return __value |
658 | # ifdef __FAST_MATH__ |
659 | __inline_mathcodeNP2 (atan2, __y, __x, __atan2_code) |
660 | # endif |
661 | __inline_mathcodeNP2_ (long double, __atan2l, __y, __x, __atan2_code) |
662 | # endif |
663 | |
664 | |
665 | # if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) |
666 | __inline_mathcodeNP2 (fmod, __x, __y, \ |
667 | register long double __value; \ |
668 | __asm __volatile__ \ |
669 | ("1: fprem\n\t" \ |
670 | "fnstsw %%ax\n\t" \ |
671 | "sahf\n\t" \ |
672 | "jp 1b" \ |
673 | : "=t" (__value) : "0" (__x), "u" (__y) : "ax" , "cc" ); \ |
674 | return __value) |
675 | # endif |
676 | |
677 | |
678 | # ifdef __FAST_MATH__ |
679 | # if !__GNUC_PREREQ (3,3) |
680 | __inline_mathopNP (sqrt, "fsqrt" ) |
681 | __inline_mathopNP_ (long double, __sqrtl, "fsqrt" ) |
682 | # define __libc_sqrtl(n) __sqrtl (n) |
683 | # else |
684 | # define __libc_sqrtl(n) __builtin_sqrtl (n) |
685 | # endif |
686 | # endif |
687 | |
688 | # if __GNUC_PREREQ (2, 8) |
689 | __inline_mathcodeNP_ (double, fabs, __x, return __builtin_fabs (__x)) |
690 | # ifdef __USE_ISOC99 |
691 | __inline_mathcodeNP_ (float, fabsf, __x, return __builtin_fabsf (__x)) |
692 | __inline_mathcodeNP_ (long double, fabsl, __x, return __builtin_fabsl (__x)) |
693 | # endif |
694 | __inline_mathcodeNP_ (long double, __fabsl, __x, return __builtin_fabsl (__x)) |
695 | # else |
696 | __inline_mathop (fabs, "fabs" ) |
697 | __inline_mathop_ (long double, __fabsl, "fabs" ) |
698 | # endif |
699 | |
700 | # ifdef __FAST_MATH__ |
701 | # if !__GNUC_PREREQ (3, 4) |
702 | /* The argument range of this inline version is reduced. */ |
703 | __inline_mathopNP (sin, "fsin" ) |
704 | /* The argument range of this inline version is reduced. */ |
705 | __inline_mathopNP (cos, "fcos" ) |
706 | |
707 | __inline_mathop_declNP (log, "fldln2; fxch; fyl2x" , "0" (__x) : "st(1)" ) |
708 | # endif |
709 | |
710 | # if !__GNUC_PREREQ (3, 5) |
711 | __inline_mathop_declNP (log10, "fldlg2; fxch; fyl2x" , "0" (__x) : "st(1)" ) |
712 | |
713 | __inline_mathcodeNP (asin, __x, return __atan2l (__x, __libc_sqrtl (1.0 - __x * __x))) |
714 | __inline_mathcodeNP (acos, __x, return __atan2l (__libc_sqrtl (1.0 - __x * __x), __x)) |
715 | # endif |
716 | |
717 | # if !__GNUC_PREREQ (3, 4) |
718 | __inline_mathop_declNP (atan, "fld1; fpatan" , "0" (__x) : "st(1)" ) |
719 | # endif |
720 | # endif /* __FAST_MATH__ */ |
721 | |
722 | __inline_mathcode_ (long double, __sgn1l, __x, \ |
723 | __extension__ union { long double __xld; unsigned int __xi[3]; } __n = \ |
724 | { __xld: __x }; \ |
725 | __n.__xi[2] = (__n.__xi[2] & 0x8000) | 0x3fff; \ |
726 | __n.__xi[1] = 0x80000000; \ |
727 | __n.__xi[0] = 0; \ |
728 | return __n.__xld) |
729 | |
730 | |
731 | # ifdef __FAST_MATH__ |
732 | /* The argument range of the inline version of sinhl is slightly reduced. */ |
733 | __inline_mathcodeNP (sinh, __x, \ |
734 | register long double __exm1 = __expm1l (__fabsl (__x)); \ |
735 | return 0.5 * (__exm1 / (__exm1 + 1.0) + __exm1) * __sgn1l (__x)) |
736 | |
737 | __inline_mathcodeNP (cosh, __x, \ |
738 | register long double __ex = __expl (__x); \ |
739 | return 0.5 * (__ex + 1.0 / __ex)) |
740 | |
741 | __inline_mathcodeNP (tanh, __x, \ |
742 | register long double __exm1 = __expm1l (-__fabsl (__x + __x)); \ |
743 | return __exm1 / (__exm1 + 2.0) * __sgn1l (-__x)) |
744 | # endif |
745 | |
746 | __inline_mathcodeNP (floor, __x, \ |
747 | register long double __value; \ |
748 | register int __ignore; \ |
749 | unsigned short int __cw; \ |
750 | unsigned short int __cwtmp; \ |
751 | __asm __volatile ("fnstcw %3\n\t" \ |
752 | "movzwl %3, %1\n\t" \ |
753 | "andl $0xf3ff, %1\n\t" \ |
754 | "orl $0x0400, %1\n\t" /* rounding down */ \ |
755 | "movw %w1, %2\n\t" \ |
756 | "fldcw %2\n\t" \ |
757 | "frndint\n\t" \ |
758 | "fldcw %3" \ |
759 | : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ |
760 | "=m" (__cw) \ |
761 | : "0" (__x)); \ |
762 | return __value) |
763 | |
764 | __inline_mathcodeNP (ceil, __x, \ |
765 | register long double __value; \ |
766 | register int __ignore; \ |
767 | unsigned short int __cw; \ |
768 | unsigned short int __cwtmp; \ |
769 | __asm __volatile ("fnstcw %3\n\t" \ |
770 | "movzwl %3, %1\n\t" \ |
771 | "andl $0xf3ff, %1\n\t" \ |
772 | "orl $0x0800, %1\n\t" /* rounding up */ \ |
773 | "movw %w1, %2\n\t" \ |
774 | "fldcw %2\n\t" \ |
775 | "frndint\n\t" \ |
776 | "fldcw %3" \ |
777 | : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ |
778 | "=m" (__cw) \ |
779 | : "0" (__x)); \ |
780 | return __value) |
781 | |
782 | # ifdef __FAST_MATH__ |
783 | # define __ldexp_code \ |
784 | register long double __value; \ |
785 | __asm __volatile__ \ |
786 | ("fscale" \ |
787 | : "=t" (__value) : "0" (__x), "u" ((long double) __y)); \ |
788 | return __value |
789 | |
790 | __MATH_INLINE double |
791 | __NTH (ldexp (double __x, int __y)) |
792 | { |
793 | __ldexp_code; |
794 | } |
795 | # endif |
796 | |
797 | |
798 | /* Optimized versions for some non-standardized functions. */ |
799 | # ifdef __USE_ISOC99 |
800 | |
801 | # ifdef __FAST_MATH__ |
802 | __inline_mathcodeNP (expm1, __x, __expm1_code) |
803 | |
804 | /* We cannot rely on M_SQRT being defined. So we do it for ourself |
805 | here. */ |
806 | # define __M_SQRT2 1.41421356237309504880L /* sqrt(2) */ |
807 | |
808 | # if !__GNUC_PREREQ (3, 5) |
809 | __inline_mathcodeNP (log1p, __x, \ |
810 | register long double __value; \ |
811 | if (__fabsl (__x) >= 1.0 - 0.5 * __M_SQRT2) \ |
812 | __value = logl (1.0 + __x); \ |
813 | else \ |
814 | __asm __volatile__ \ |
815 | ("fldln2\n\t" \ |
816 | "fxch\n\t" \ |
817 | "fyl2xp1" \ |
818 | : "=t" (__value) : "0" (__x) : "st(1)" ); \ |
819 | return __value) |
820 | # endif |
821 | |
822 | |
823 | /* The argument range of the inline version of asinhl is slightly reduced. */ |
824 | __inline_mathcodeNP (asinh, __x, \ |
825 | register long double __y = __fabsl (__x); \ |
826 | return (log1pl (__y * __y / (__libc_sqrtl (__y * __y + 1.0) + 1.0) + __y) \ |
827 | * __sgn1l (__x))) |
828 | |
829 | __inline_mathcodeNP (acosh, __x, \ |
830 | return logl (__x + __libc_sqrtl (__x - 1.0) * __libc_sqrtl (__x + 1.0))) |
831 | |
832 | __inline_mathcodeNP (atanh, __x, \ |
833 | register long double __y = __fabsl (__x); \ |
834 | return -0.5 * log1pl (-(__y + __y) / (1.0 + __y)) * __sgn1l (__x)) |
835 | |
836 | /* The argument range of the inline version of hypotl is slightly reduced. */ |
837 | __inline_mathcodeNP2 (hypot, __x, __y, |
838 | return __libc_sqrtl (__x * __x + __y * __y)) |
839 | |
840 | # if !__GNUC_PREREQ (3, 5) |
841 | __inline_mathcodeNP(logb, __x, \ |
842 | register long double __value; \ |
843 | register long double __junk; \ |
844 | __asm __volatile__ \ |
845 | ("fxtract\n\t" \ |
846 | : "=t" (__junk), "=u" (__value) : "0" (__x)); \ |
847 | return __value) |
848 | # endif |
849 | |
850 | # endif |
851 | # endif |
852 | |
853 | # ifdef __USE_ISOC99 |
854 | # ifdef __FAST_MATH__ |
855 | |
856 | # if !__GNUC_PREREQ (3, 5) |
857 | __inline_mathop_declNP (log2, "fld1; fxch; fyl2x" , "0" (__x) : "st(1)" ) |
858 | # endif |
859 | |
860 | __MATH_INLINE float |
861 | __NTH (ldexpf (float __x, int __y)) |
862 | { |
863 | __ldexp_code; |
864 | } |
865 | |
866 | __MATH_INLINE long double |
867 | __NTH (ldexpl (long double __x, int __y)) |
868 | { |
869 | __ldexp_code; |
870 | } |
871 | |
872 | __inline_mathopNP (rint, "frndint" ) |
873 | # endif /* __FAST_MATH__ */ |
874 | |
875 | # define __lrint_code \ |
876 | long int __lrintres; \ |
877 | __asm__ __volatile__ \ |
878 | ("fistpl %0" \ |
879 | : "=m" (__lrintres) : "t" (__x) : "st"); \ |
880 | return __lrintres |
881 | __MATH_INLINE long int |
882 | __NTH (lrintf (float __x)) |
883 | { |
884 | __lrint_code; |
885 | } |
886 | __MATH_INLINE long int |
887 | __NTH (lrint (double __x)) |
888 | { |
889 | __lrint_code; |
890 | } |
891 | __MATH_INLINE long int |
892 | __NTH (lrintl (long double __x)) |
893 | { |
894 | __lrint_code; |
895 | } |
896 | # undef __lrint_code |
897 | |
898 | # define __llrint_code \ |
899 | long long int __llrintres; \ |
900 | __asm__ __volatile__ \ |
901 | ("fistpll %0" \ |
902 | : "=m" (__llrintres) : "t" (__x) : "st"); \ |
903 | return __llrintres |
904 | __extension__ |
905 | __MATH_INLINE long long int |
906 | __NTH (llrintf (float __x)) |
907 | { |
908 | __llrint_code; |
909 | } |
910 | __extension__ |
911 | __MATH_INLINE long long int |
912 | __NTH (llrint (double __x)) |
913 | { |
914 | __llrint_code; |
915 | } |
916 | __extension__ |
917 | __MATH_INLINE long long int |
918 | __NTH (llrintl (long double __x)) |
919 | { |
920 | __llrint_code; |
921 | } |
922 | # undef __llrint_code |
923 | |
924 | # endif |
925 | |
926 | |
927 | # ifdef __USE_MISC |
928 | |
929 | # if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) |
930 | __inline_mathcodeNP2 (drem, __x, __y, \ |
931 | register double __value; \ |
932 | register int __clobbered; \ |
933 | __asm __volatile__ \ |
934 | ("1: fprem1\n\t" \ |
935 | "fstsw %%ax\n\t" \ |
936 | "sahf\n\t" \ |
937 | "jp 1b" \ |
938 | : "=t" (__value), "=&a" (__clobbered) : "0" (__x), "u" (__y) : "cc" ); \ |
939 | return __value) |
940 | # endif |
941 | |
942 | |
943 | /* This function is used in the `isfinite' macro. */ |
944 | __MATH_INLINE int |
945 | __NTH (__finite (double __x)) |
946 | { |
947 | return (__extension__ |
948 | (((((union { double __d; int __i[2]; }) {__d: __x}).__i[1] |
949 | | 0x800fffffu) + 1) >> 31)); |
950 | } |
951 | |
952 | # endif /* __USE_MISC */ |
953 | |
954 | /* Undefine some of the large macros which are not used anymore. */ |
955 | # undef __atan2_code |
956 | # ifdef __FAST_MATH__ |
957 | # undef __expm1_code |
958 | # undef __exp_code |
959 | # undef __sincos_code |
960 | # endif /* __FAST_MATH__ */ |
961 | |
962 | # endif /* __NO_MATH_INLINES */ |
963 | |
964 | |
965 | /* This code is used internally in the GNU libc. */ |
966 | # ifdef __LIBC_INTERNAL_MATH_INLINES |
967 | __inline_mathop (__ieee754_sqrt, "fsqrt" ) |
968 | __inline_mathcode2_ (long double, __ieee754_atan2l, __y, __x, |
969 | register long double __value; |
970 | __asm __volatile__ ("fpatan\n\t" |
971 | : "=t" (__value) |
972 | : "0" (__x), "u" (__y) : "st(1)" ); |
973 | return __value;) |
974 | # endif |
975 | |
976 | #endif /* !__SSE2_MATH__ && !__x86_64__ */ |
977 | |