s_sincosf-fma.c source code [glibc_src_2.27/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c]

1	/ Compute sine and cosine of argument optimized with vector.*
2	Copyright (C) 2017 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <errno.h>
20	#include <math.h>
21	#include <math_private.h>
22	#include <x86intrin.h>
23	#include <libm-alias-float.h>
24	#include "s_sincosf.h"
25
26	#define SINCOSF __sincosf_fma
27
28	#ifndef SINCOSF
29	# define SINCOSF_FUNC __sincosf
30	#else
31	# define SINCOSF_FUNC SINCOSF
32	#endif
33
34	/ Chebyshev constants for sin and cos, range -PI/4 - PI/4. /
35	static const __v2df V0 = { -`0x1.5555555551cd9p-3`, -`0x1.ffffffffe98aep-2`};
36	static const __v2df V1 = { `0x1.1111110c2688bp-7`, `0x1.55555545c50c7p-5` };
37	static const __v2df V2 = { -`0x1.a019f8b4bd1f9p-13`, -`0x1.6c16b348b6874p-10` };
38	static const __v2df V3 = { `0x1.71d7264e6b5b4p-19`, `0x1.a00eb9ac43ccp-16` };
39	static const __v2df V4 = { -`0x1.a947e1674b58ap-26`, -`0x1.23c97dd8844d7p-22` };
40
41	/ Chebyshev constants for sin and cos, range 2^-27 - 2^-5. /
42	static const __v2df VC0 = { -`0x1.555555543d49dp-3`, -`0x1.fffffff5cc6fdp-2` };
43	static const __v2df VC1 = { `0x1.110f475cec8c5p-7`, `0x1.55514b178dac5p-5` };
44
45	static const __v2df v2ones = { `1.0`, `1.0` };
46
47	/ Compute the sine and cosine values using Chebyshev polynomials where*
48	THETA is the range reduced absolute value of the input
49	and it is less than Pi/4,
50	N is calculated as trunc(\|x\|/(Pi/4)) + 1 and it is used to decide
51	whether a sine or cosine approximation is more accurate and
52	SIGNBIT is used to add the correct sign after the Chebyshev
53	polynomial is computed. /*
54	static void
55	reduced_sincos (const double theta, const unsigned int n,
56	const unsigned int signbit, float sinx, float* *cosx)
57	{
58	__v2df v2x, v2sx, v2cx;
59	const __v2df v2theta = { theta, theta };
60	const __v2df v2theta2 = v2theta * v2theta;
61	/ Here sinf() and cosf() are calculated using sin Chebyshev polynomial:*
62	x+x^3(S0+x^2(S1+x^2(S2+x^2(S3+x^2S4)))). /
63	v2x = V3 + v2theta2 * V4; / S3+x^2S4. /*
64	v2x = V2 + v2theta2 * v2x; / S2+x^2(S3+x^2S4). /
65	v2x = V1 + v2theta2 * v2x; / S1+x^2(S2+x^2(S3+x^2S4)). /*
66	v2x = V0 + v2theta2 * v2x; / S0+x^2(S1+x^2(S2+x^2(S3+x^2S4))). /
67	v2x = v2theta2 * v2x;
68	v2cx = v2ones + v2x;
69	v2sx = v2theta + v2theta * v2x;
70	/ We are operating on \|x\|, so we need to add back the original*
71	signbit for sinf. /*
72	/ Determine positive or negative primary interval. /
73	/ Are we in the primary interval of sin or cos? /
74	if ((n & `2`) == `0`)
75	{
76	const __v2df v2sign =
77	{
78	ones[((n >> `2`) & `1`) ^ signbit],
79	ones[((n + `2`) >> `2`) & `1`]
80	};
81	v2cx[`0`] = v2sx[`0`];
82	v2cx *= v2sign;
83	__v4sf v4sx = _mm_cvtpd_ps (v2cx);
84	*sinx = v4sx[`0`];
85	*cosx = v4sx[`1`];
86	}
87	else
88	{
89	const __v2df v2sign =
90	{
91	ones[((n + `2`) >> `2`) & `1`],
92	ones[((n >> `2`) & `1`) ^ signbit]
93	};
94	v2cx[`0`] = v2sx[`0`];
95	v2cx *= v2sign;
96	__v4sf v4sx = _mm_cvtpd_ps (v2cx);
97	*sinx = v4sx[`1`];
98	*cosx = v4sx[`0`];
99	}
100	}
101
102	void
103	SINCOSF_FUNC (float x, float sinx, float* *cosx)
104	{
105	double theta = x;
106	double abstheta = fabs (theta);
107	uint32_t ix, xi;
108	GET_FLOAT_WORD (xi, x);
109	/ \|x\| /
110	ix = xi & `0x7fffffff`;
111	/ If \|x\|< Pi/4. /
112	if (ix < `0x3f490fdb`)
113	{
114	if (ix >= `0x3d000000`) / \|x\| >= 2^-5. /
115	{
116	__v2df v2x, v2sx, v2cx;
117	const __v2df v2theta = { theta, theta };
118	const __v2df v2theta2 = v2theta * v2theta;
119	/ Chebyshev polynomial of the form for sin and cos. /
120	v2x = V3 + v2theta2 * V4;
121	v2x = V2 + v2theta2 * v2x;
122	v2x = V1 + v2theta2 * v2x;
123	v2x = V0 + v2theta2 * v2x;
124	v2x = v2theta2 * v2x;
125	v2cx = v2ones + v2x;
126	v2sx = v2theta + v2theta * v2x;
127	v2cx[`0`] = v2sx[`0`];
128	__v4sf v4sx = _mm_cvtpd_ps (v2cx);
129	*sinx = v4sx[`0`];
130	*cosx = v4sx[`1`];
131	}
132	else if (ix >= `0x32000000`) / \|x\| >= 2^-27. /
133	{
134	/ A simpler Chebyshev approximation is close enough for this range:*
135	for sin: x+x^3(SS0+x^2SS1)
136	for cos: 1.0+x^2(CC0+x^3CC1). /*
137	__v2df v2x, v2sx, v2cx;
138	const __v2df v2theta = { theta, theta };
139	const __v2df v2theta2 = v2theta * v2theta;
140	v2x = VC0 + v2theta * v2theta2 * VC1;
141	v2x = v2theta2 * v2x;
142	v2cx = v2ones + v2x;
143	v2sx = v2theta + v2theta * v2x;
144	v2cx[`0`] = v2sx[`0`];
145	__v4sf v4sx = _mm_cvtpd_ps (v2cx);
146	*sinx = v4sx[`0`];
147	*cosx = v4sx[`1`];
148	}
149	else
150	{
151	/ Handle some special cases. /
152	if (ix)
153	sinx = theta - (theta SMALL);
154	else
155	*sinx = theta;
156	*cosx = `1.0` - abstheta;
157	}
158	}
159	else / \|x\| >= Pi/4. /
160	{
161	unsigned int signbit = xi >> `31`;
162	if (ix < `0x40e231d6`) / \|x\| < 9Pi/4. /*
163	{
164	/ There are cases where FE_UPWARD rounding mode can*
165	produce a result of abstheta inv_PI_4 == 9,*
166	where abstheta < 9pi/4, so the domain for
167	pio2_table must go to 5 (9 / 2 + 1). /*
168	unsigned int n = (abstheta * inv_PI_4) + `1`;
169	theta = abstheta - pio2_table[n / `2`];
170	reduced_sincos (theta, n, signbit, sinx, cosx);
171	}
172	else if (ix < `0x7f800000`)
173	{
174	if (ix < `0x4b000000`) / \|x\| < 2^23. /
175	{
176	unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + `1`;
177	double x = n / `2`;
178	theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
179	/ Argument reduction needed. /
180	reduced_sincos (theta, n, signbit, sinx, cosx);
181	}
182	else / \|x\| >= 2^23. /
183	{
184	x = fabsf (x);
185	int exponent
186	= (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
187	exponent += `3`;
188	exponent /= `28`;
189	double a = invpio4_table[exponent] * x;
190	double b = invpio4_table[exponent + `1`] * x;
191	double c = invpio4_table[exponent + `2`] * x;
192	double d = invpio4_table[exponent + `3`] * x;
193	uint64_t l = a;
194	l &= ~`0x7`;
195	a -= l;
196	double e = a + b;
197	l = e;
198	e = a - l;
199	if (l & `1`)
200	{
201	e -= `1.0`;
202	e += b;
203	e += c;
204	e += d;
205	e *= M_PI_4;
206	reduced_sincos (e, l + `1`, signbit, sinx, cosx);
207	}
208	else
209	{
210	e += b;
211	e += c;
212	e += d;
213	if (e <= `1.0`)
214	{
215	e *= M_PI_4;
216	reduced_sincos (e, l + `1`, signbit, sinx, cosx);
217	}
218	else
219	{
220	l++;
221	e -= `2.0`;
222	e *= M_PI_4;
223	reduced_sincos (e, l + `1`, signbit, sinx, cosx);
224	}
225	}
226	}
227	}
228	else
229	{
230	if (ix == `0x7f800000`)
231	__set_errno (EDOM);
232	/ sin/cos(Inf or NaN) is NaN. /
233	sinx = cosx = x - x;
234	}
235	}
236	}
237
238	#ifndef SINCOSF
239	libm_alias_float (__sincos, sincos)
240	#endif
241

Browse the source code of glibc_src_2.27/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c