1#ifndef X86_64_MATH_PRIVATE_H
2#define X86_64_MATH_PRIVATE_H 1
3
4/* We can do a few things better on x86-64. */
5
6#if defined __AVX__ || defined SSE2AVX
7# define MOVD "vmovd"
8# define MOVQ "vmovq"
9#else
10# define MOVD "movd"
11# define MOVQ "movq"
12#endif
13
14/* Direct movement of float into integer register. */
15#define EXTRACT_WORDS64(i, d) \
16 do { \
17 int64_t i_; \
18 asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \
19 (i) = i_; \
20 } while (0)
21
22/* And the reverse. */
23#define INSERT_WORDS64(d, i) \
24 do { \
25 int64_t i_ = i; \
26 double d__; \
27 asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_)); \
28 d = d__; \
29 } while (0)
30
31/* Direct movement of float into integer register. */
32#define GET_FLOAT_WORD(i, d) \
33 do { \
34 int i_; \
35 asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \
36 (i) = i_; \
37 } while (0)
38
39/* And the reverse. */
40#define SET_FLOAT_WORD(f, i) \
41 do { \
42 int i_ = i; \
43 float f__; \
44 asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \
45 f = f__; \
46 } while (0)
47
48#include <sysdeps/i386/fpu/fenv_private.h>
49#include_next <math_private.h>
50
51extern __always_inline double
52__ieee754_sqrt (double d)
53{
54 double res;
55#if defined __AVX__ || defined SSE2AVX
56 asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
57#else
58 asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
59#endif
60 return res;
61}
62
63extern __always_inline float
64__ieee754_sqrtf (float d)
65{
66 float res;
67#if defined __AVX__ || defined SSE2AVX
68 asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
69#else
70 asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
71#endif
72 return res;
73}
74
75extern __always_inline long double
76__ieee754_sqrtl (long double d)
77{
78 long double res;
79 asm ("fsqrt" : "=t" (res) : "0" (d));
80 return res;
81}
82
83#ifdef __SSE4_1__
84extern __always_inline double
85__rint (double d)
86{
87 double res;
88# if defined __AVX__ || defined SSE2AVX
89 asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));
90# else
91 asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));
92# endif
93 return res;
94}
95
96extern __always_inline float
97__rintf (float d)
98{
99 float res;
100# if defined __AVX__ || defined SSE2AVX
101 asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));
102# else
103 asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));
104# endif
105 return res;
106}
107
108extern __always_inline double
109__floor (double d)
110{
111 double res;
112# if defined __AVX__ || defined SSE2AVX
113 asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));
114# else
115 asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));
116# endif
117 return res;
118}
119
120extern __always_inline float
121__floorf (float d)
122{
123 float res;
124# if defined __AVX__ || defined SSE2AVX
125 asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));
126# else
127 asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));
128# endif
129 return res;
130}
131#endif /* __SSE4_1__ */
132
133#endif /* X86_64_MATH_PRIVATE_H */
134