1/* Compute sine and cosine of argument optimized with vector.
2 Copyright (C) 2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <errno.h>
20#include <math.h>
21#include <math_private.h>
22#include <x86intrin.h>
23#include <libm-alias-float.h>
24#include "s_sincosf.h"
25
26#define SINCOSF __sincosf_fma
27
28#ifndef SINCOSF
29# define SINCOSF_FUNC __sincosf
30#else
31# define SINCOSF_FUNC SINCOSF
32#endif
33
34/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */
35static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};
36static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };
37static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 };
38static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 };
39static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 };
40
41/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5. */
42static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 };
43static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 };
44
45static const __v2df v2ones = { 1.0, 1.0 };
46
47/* Compute the sine and cosine values using Chebyshev polynomials where
48 THETA is the range reduced absolute value of the input
49 and it is less than Pi/4,
50 N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
51 whether a sine or cosine approximation is more accurate and
52 SIGNBIT is used to add the correct sign after the Chebyshev
53 polynomial is computed. */
54static void
55reduced_sincos (const double theta, const unsigned int n,
56 const unsigned int signbit, float *sinx, float *cosx)
57{
58 __v2df v2x, v2sx, v2cx;
59 const __v2df v2theta = { theta, theta };
60 const __v2df v2theta2 = v2theta * v2theta;
61 /* Here sinf() and cosf() are calculated using sin Chebyshev polynomial:
62 x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
63 v2x = V3 + v2theta2 * V4; /* S3+x^2*S4. */
64 v2x = V2 + v2theta2 * v2x; /* S2+x^2*(S3+x^2*S4). */
65 v2x = V1 + v2theta2 * v2x; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */
66 v2x = V0 + v2theta2 * v2x; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */
67 v2x = v2theta2 * v2x;
68 v2cx = v2ones + v2x;
69 v2sx = v2theta + v2theta * v2x;
70 /* We are operating on |x|, so we need to add back the original
71 signbit for sinf. */
72 /* Determine positive or negative primary interval. */
73 /* Are we in the primary interval of sin or cos? */
74 if ((n & 2) == 0)
75 {
76 const __v2df v2sign =
77 {
78 ones[((n >> 2) & 1) ^ signbit],
79 ones[((n + 2) >> 2) & 1]
80 };
81 v2cx[0] = v2sx[0];
82 v2cx *= v2sign;
83 __v4sf v4sx = _mm_cvtpd_ps (v2cx);
84 *sinx = v4sx[0];
85 *cosx = v4sx[1];
86 }
87 else
88 {
89 const __v2df v2sign =
90 {
91 ones[((n + 2) >> 2) & 1],
92 ones[((n >> 2) & 1) ^ signbit]
93 };
94 v2cx[0] = v2sx[0];
95 v2cx *= v2sign;
96 __v4sf v4sx = _mm_cvtpd_ps (v2cx);
97 *sinx = v4sx[1];
98 *cosx = v4sx[0];
99 }
100}
101
102void
103SINCOSF_FUNC (float x, float *sinx, float *cosx)
104{
105 double theta = x;
106 double abstheta = fabs (theta);
107 uint32_t ix, xi;
108 GET_FLOAT_WORD (xi, x);
109 /* |x| */
110 ix = xi & 0x7fffffff;
111 /* If |x|< Pi/4. */
112 if (ix < 0x3f490fdb)
113 {
114 if (ix >= 0x3d000000) /* |x| >= 2^-5. */
115 {
116 __v2df v2x, v2sx, v2cx;
117 const __v2df v2theta = { theta, theta };
118 const __v2df v2theta2 = v2theta * v2theta;
119 /* Chebyshev polynomial of the form for sin and cos. */
120 v2x = V3 + v2theta2 * V4;
121 v2x = V2 + v2theta2 * v2x;
122 v2x = V1 + v2theta2 * v2x;
123 v2x = V0 + v2theta2 * v2x;
124 v2x = v2theta2 * v2x;
125 v2cx = v2ones + v2x;
126 v2sx = v2theta + v2theta * v2x;
127 v2cx[0] = v2sx[0];
128 __v4sf v4sx = _mm_cvtpd_ps (v2cx);
129 *sinx = v4sx[0];
130 *cosx = v4sx[1];
131 }
132 else if (ix >= 0x32000000) /* |x| >= 2^-27. */
133 {
134 /* A simpler Chebyshev approximation is close enough for this range:
135 for sin: x+x^3*(SS0+x^2*SS1)
136 for cos: 1.0+x^2*(CC0+x^3*CC1). */
137 __v2df v2x, v2sx, v2cx;
138 const __v2df v2theta = { theta, theta };
139 const __v2df v2theta2 = v2theta * v2theta;
140 v2x = VC0 + v2theta * v2theta2 * VC1;
141 v2x = v2theta2 * v2x;
142 v2cx = v2ones + v2x;
143 v2sx = v2theta + v2theta * v2x;
144 v2cx[0] = v2sx[0];
145 __v4sf v4sx = _mm_cvtpd_ps (v2cx);
146 *sinx = v4sx[0];
147 *cosx = v4sx[1];
148 }
149 else
150 {
151 /* Handle some special cases. */
152 if (ix)
153 *sinx = theta - (theta * SMALL);
154 else
155 *sinx = theta;
156 *cosx = 1.0 - abstheta;
157 }
158 }
159 else /* |x| >= Pi/4. */
160 {
161 unsigned int signbit = xi >> 31;
162 if (ix < 0x40e231d6) /* |x| < 9*Pi/4. */
163 {
164 /* There are cases where FE_UPWARD rounding mode can
165 produce a result of abstheta * inv_PI_4 == 9,
166 where abstheta < 9pi/4, so the domain for
167 pio2_table must go to 5 (9 / 2 + 1). */
168 unsigned int n = (abstheta * inv_PI_4) + 1;
169 theta = abstheta - pio2_table[n / 2];
170 reduced_sincos (theta, n, signbit, sinx, cosx);
171 }
172 else if (ix < 0x7f800000)
173 {
174 if (ix < 0x4b000000) /* |x| < 2^23. */
175 {
176 unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
177 double x = n / 2;
178 theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
179 /* Argument reduction needed. */
180 reduced_sincos (theta, n, signbit, sinx, cosx);
181 }
182 else /* |x| >= 2^23. */
183 {
184 x = fabsf (x);
185 int exponent
186 = (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
187 exponent += 3;
188 exponent /= 28;
189 double a = invpio4_table[exponent] * x;
190 double b = invpio4_table[exponent + 1] * x;
191 double c = invpio4_table[exponent + 2] * x;
192 double d = invpio4_table[exponent + 3] * x;
193 uint64_t l = a;
194 l &= ~0x7;
195 a -= l;
196 double e = a + b;
197 l = e;
198 e = a - l;
199 if (l & 1)
200 {
201 e -= 1.0;
202 e += b;
203 e += c;
204 e += d;
205 e *= M_PI_4;
206 reduced_sincos (e, l + 1, signbit, sinx, cosx);
207 }
208 else
209 {
210 e += b;
211 e += c;
212 e += d;
213 if (e <= 1.0)
214 {
215 e *= M_PI_4;
216 reduced_sincos (e, l + 1, signbit, sinx, cosx);
217 }
218 else
219 {
220 l++;
221 e -= 2.0;
222 e *= M_PI_4;
223 reduced_sincos (e, l + 1, signbit, sinx, cosx);
224 }
225 }
226 }
227 }
228 else
229 {
230 if (ix == 0x7f800000)
231 __set_errno (EDOM);
232 /* sin/cos(Inf or NaN) is NaN. */
233 *sinx = *cosx = x - x;
234 }
235 }
236}
237
238#ifndef SINCOSF
239libm_alias_float (__sincos, sincos)
240#endif
241