1 | #ifndef X86_64_MATH_PRIVATE_H |
2 | #define X86_64_MATH_PRIVATE_H 1 |
3 | |
4 | /* We can do a few things better on x86-64. */ |
5 | |
6 | #if defined __AVX__ || defined SSE2AVX |
7 | # define MOVD "vmovd" |
8 | # define MOVQ "vmovq" |
9 | #else |
10 | # define MOVD "movd" |
11 | # define MOVQ "movq" |
12 | #endif |
13 | |
14 | /* Direct movement of float into integer register. */ |
15 | #define (i, d) \ |
16 | do { \ |
17 | int64_t i_; \ |
18 | asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \ |
19 | (i) = i_; \ |
20 | } while (0) |
21 | |
22 | /* And the reverse. */ |
23 | #define INSERT_WORDS64(d, i) \ |
24 | do { \ |
25 | int64_t i_ = i; \ |
26 | double d__; \ |
27 | asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_)); \ |
28 | d = d__; \ |
29 | } while (0) |
30 | |
31 | /* Direct movement of float into integer register. */ |
32 | #define GET_FLOAT_WORD(i, d) \ |
33 | do { \ |
34 | int i_; \ |
35 | asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \ |
36 | (i) = i_; \ |
37 | } while (0) |
38 | |
39 | /* And the reverse. */ |
40 | #define SET_FLOAT_WORD(f, i) \ |
41 | do { \ |
42 | int i_ = i; \ |
43 | float f__; \ |
44 | asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \ |
45 | f = f__; \ |
46 | } while (0) |
47 | |
48 | #include <sysdeps/i386/fpu/fenv_private.h> |
49 | #include_next <math_private.h> |
50 | |
51 | extern __always_inline double |
52 | __ieee754_sqrt (double d) |
53 | { |
54 | double res; |
55 | #if defined __AVX__ || defined SSE2AVX |
56 | asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d)); |
57 | #else |
58 | asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d)); |
59 | #endif |
60 | return res; |
61 | } |
62 | |
63 | extern __always_inline float |
64 | __ieee754_sqrtf (float d) |
65 | { |
66 | float res; |
67 | #if defined __AVX__ || defined SSE2AVX |
68 | asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d)); |
69 | #else |
70 | asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d)); |
71 | #endif |
72 | return res; |
73 | } |
74 | |
75 | extern __always_inline long double |
76 | __ieee754_sqrtl (long double d) |
77 | { |
78 | long double res; |
79 | asm ("fsqrt" : "=t" (res) : "0" (d)); |
80 | return res; |
81 | } |
82 | |
83 | #ifdef __SSE4_1__ |
84 | extern __always_inline double |
85 | __rint (double d) |
86 | { |
87 | double res; |
88 | # if defined __AVX__ || defined SSE2AVX |
89 | asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d)); |
90 | # else |
91 | asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d)); |
92 | # endif |
93 | return res; |
94 | } |
95 | |
96 | extern __always_inline float |
97 | __rintf (float d) |
98 | { |
99 | float res; |
100 | # if defined __AVX__ || defined SSE2AVX |
101 | asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d)); |
102 | # else |
103 | asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d)); |
104 | # endif |
105 | return res; |
106 | } |
107 | |
108 | extern __always_inline double |
109 | __floor (double d) |
110 | { |
111 | double res; |
112 | # if defined __AVX__ || defined SSE2AVX |
113 | asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d)); |
114 | # else |
115 | asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d)); |
116 | # endif |
117 | return res; |
118 | } |
119 | |
120 | extern __always_inline float |
121 | __floorf (float d) |
122 | { |
123 | float res; |
124 | # if defined __AVX__ || defined SSE2AVX |
125 | asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d)); |
126 | # else |
127 | asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d)); |
128 | # endif |
129 | return res; |
130 | } |
131 | #endif /* __SSE4_1__ */ |
132 | |
133 | #endif /* X86_64_MATH_PRIVATE_H */ |
134 | |