s_sinf.S source code [glibc_src_2.24/sysdeps/x86_64/fpu/s_sinf.S]

1	/ Optimized sinf function.*
2	Copyright (C) 2012-2016 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#define __need_Emath
21	#include <bits/errno.h>
22
23	/ Short algorithm description:*
24	*
25	* 1) if \|x\| == 0: return x.
26	* 2) if \|x\| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
27	* 3) if \|x\| < 2^-5 : return x+x^3DP_SIN2_0+x^5DP_SIN2_1.
28	* 4) if \|x\| < Pi/4: return x+x^3(S0+x^2(S1+x^2(S2+x^2(S3+x^2*S4)))).
29	* 5) if \|x\| < 9*Pi/4:
30	* 5.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
31	* t=\|x\|-j*Pi/4.
32	* 5.2) Reconstruction:
33	* s = sign(x) * (-1.0)^((n>>2)&1)
34	* if(n&2 != 0) {
35	* using cos(t) polynomial for \|t\|<Pi/4, result is
36	* s * (1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2*C4))))).
37	* } else {
38	* using sin(t) polynomial for \|t\|<Pi/4, result is
39	* s * t * (1.0+t^2(S0+t^2(S1+t^2(S2+t^2(S3+t^2*S4))))).
40	* }
41	* 6) if \|x\| < 2^23, large args:
42	* 6.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
43	* t=\|x\|-j*Pi/4.
44	* 6.2) Reconstruction same as (5.2).
45	* 7) if \|x\| >= 2^23, very large args:
46	* 7.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
47	* t=\|x\|-j*Pi/4.
48	* 7.2) Reconstruction same as (5.2).
49	* 8) if x is Inf, return x-x, and set errno=EDOM.
50	* 9) if x is NaN, return x-x.
51	*
52	* Special cases:
53	* sin(+-0) = +-0 not raising inexact/underflow,
54	* sin(subnormal) raises inexact/underflow,
55	* sin(min_normalized) raises inexact/underflow,
56	* sin(normalized) raises inexact,
57	* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
58	* sin(NaN) = NaN.
59	*/
60
61	.text
62	ENTRY(__sinf)
63	/ Input: single precision x in %xmm0 /
64
65	movd %xmm0, %eax / Bits of x /
66	movaps %xmm0, %xmm7 / Copy of x /
67	cvtss2sd %xmm0, %xmm0 / DP x /
68	movss L(SP_ABS_MASK)(%rip), %xmm3
69	movl %eax, %edi / Copy of x bits /
70	andl $`0x7fffffff`, %eax / \|x\| /
71
72	cmpl $`0x3f490fdb`, %eax / \|x\|<Pi/4? /
73	jb L(arg_less_pio4)
74
75	/ Here if \|x\|>=Pi/4 /
76	andps %xmm7, %xmm3 / SP \|x\| /
77	andpd L(DP_ABS_MASK)(%rip),%xmm0 / DP \|x\| /
78	movss L(SP_INVPIO4)(%rip), %xmm2 / SP 1/(Pi/4) /
79
80	cmpl $`0x40e231d6`, %eax / \|x\|<9Pi/4? /*
81	jae L(large_args)
82
83	/ Here if Pi/4<=\|x\|<9Pi/4 /*
84	mulss %xmm3, %xmm2 / SP \|x\|/(Pi/4) /
85	movl %edi, %ecx / Load x /
86	cvttss2si %xmm2, %eax / k, number of Pi/4 in x /
87	lea L(PIO4J)(%rip), %rsi
88	shrl $`31`, %ecx / sign of x /
89	addl $`1`, %eax / k+1 /
90	movl $`0x0e`, %edx
91	andl %eax, %edx / j = (k+1)&0x0e /
92	subsd (%rsi,%rdx,`8`), %xmm0 / t = \|x\| - j * Pi/4 /
93
94	L(reconstruction):
95	/ Input: %eax=n, %xmm0=t, %ecx=sign(x) /
96	testl $`2`, %eax / n&2 != 0? /
97	jz L(sin_poly)
98
99	/L(cos_poly):/
100	/ Here if sin(x) calculated using cos(t) polynomial for \|t\|<Pi/4:*
101	* y = tt; z = yy;
102	* s = sign(x) * (-1.0)^((n>>2)&1)
103	* result = s * (1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2*C4)))))
104	*/
105	shrl $`2`, %eax / n>>2 /
106	mulsd %xmm0, %xmm0 / y=t^2 /
107	andl $`1`, %eax / (n>>2)&1 /
108	movaps %xmm0, %xmm1 / y /
109	mulsd %xmm0, %xmm0 / z=t^4 /
110
111	movsd L(DP_C4)(%rip), %xmm4 / C4 /
112	mulsd %xmm0, %xmm4 / zC4 /*
113	xorl %eax, %ecx / (-1.0)^((n>>2)&1) XOR sign(x) /
114	movsd L(DP_C3)(%rip), %xmm3 / C3 /
115	mulsd %xmm0, %xmm3 / zC3 /*
116	lea L(DP_ONES)(%rip), %rsi
117	addsd L(DP_C2)(%rip), %xmm4 / C2+zC4 /*
118	mulsd %xmm0, %xmm4 / z(C2+zC4) /
119	addsd L(DP_C1)(%rip), %xmm3 / C1+zC3 /*
120	mulsd %xmm0, %xmm3 / z(C1+zC3) /
121	addsd L(DP_C0)(%rip), %xmm4 / C0+z(C2+zC4) /
122	mulsd %xmm1, %xmm4 / y(C0+z(C2+zC4)) /*
123
124	/ y(C0+y(C1+y(C2+y(C3+yC4)))) /*
125	addsd %xmm4, %xmm3
126	/ 1.0+y(C0+y(C1+y(C2+y(C3+yC4)))) /*
127	addsd L(DP_ONES)(%rip), %xmm3
128
129	mulsd (%rsi,%rcx,`8`), %xmm3 / DP result /
130	cvtsd2ss %xmm3, %xmm0 / SP result /
131	ret
132
133	.p2align `4`
134	L(sin_poly):
135	/ Here if sin(x) calculated using sin(t) polynomial for \|t\|<Pi/4:*
136	* y = tt; z = yy;
137	* s = sign(x) * (-1.0)^((n>>2)&1)
138	* result = s * t * (1.0+t^2(S0+t^2(S1+t^2(S2+t^2(S3+t^2*S4)))))
139	*/
140
141	movaps %xmm0, %xmm4 / t /
142	shrl $`2`, %eax / n>>2 /
143	mulsd %xmm0, %xmm0 / y=t^2 /
144	andl $`1`, %eax / (n>>2)&1 /
145	movaps %xmm0, %xmm1 / y /
146	xorl %eax, %ecx / (-1.0)^((n>>2)&1) XOR sign(x) /
147	mulsd %xmm0, %xmm0 / z=t^4 /
148
149	movsd L(DP_S4)(%rip), %xmm2 / S4 /
150	mulsd %xmm0, %xmm2 / zS4 /*
151	movsd L(DP_S3)(%rip), %xmm3 / S3 /
152	mulsd %xmm0, %xmm3 / zS3 /*
153	lea L(DP_ONES)(%rip), %rsi
154	addsd L(DP_S2)(%rip), %xmm2 / S2+zS4 /*
155	mulsd %xmm0, %xmm2 / z(S2+zS4) /
156	addsd L(DP_S1)(%rip), %xmm3 / S1+zS3 /*
157	mulsd %xmm0, %xmm3 / z(S1+zS3) /
158	addsd L(DP_S0)(%rip), %xmm2 / S0+z(S2+zS4) /
159	mulsd %xmm1, %xmm2 / y(S0+z(S2+zS4)) /*
160	/ ts, where s = sign(x) (-1.0)^((n>>2)&1) /
161	mulsd (%rsi,%rcx,`8`), %xmm4
162	/ y(S0+y(S1+y(S2+y(S3+yS4)))) /*
163	addsd %xmm2, %xmm3
164	/ tsy(S0+y(S1+y(S2+y(S3+yS4)))) /*
165	mulsd %xmm4, %xmm3
166	/ ts(1.0+y(S0+y(S1+y(S2+y(S3+yS4)))) /*
167	addsd %xmm4, %xmm3
168	cvtsd2ss %xmm3, %xmm0 / SP result /
169	ret
170
171	.p2align `4`
172	L(large_args):
173	/ Here if \|x\|>=9Pi/4 /*
174	cmpl $`0x7f800000`, %eax / x is Inf or NaN? /
175	jae L(arg_inf_or_nan)
176
177	/ Here if finite \|x\|>=9Pi/4 /*
178	cmpl $`0x4b000000`, %eax / \|x\|<2^23? /
179	jae L(very_large_args)
180
181	/ Here if 9Pi/4<=\|x\|<2^23 /*
182	movsd L(DP_INVPIO4)(%rip), %xmm1 / 1/(Pi/4) /
183	mulsd %xmm0, %xmm1 / \|x\|/(Pi/4) /
184	cvttsd2si %xmm1, %eax / k=trunc(\|x\|/(Pi/4)) /
185	addl $`1`, %eax / k+1 /
186	movl %eax, %edx
187	andl $`0xfffffffe`, %edx / j=(k+1)&0xfffffffe /
188	cvtsi2sdl %edx, %xmm4 / DP j /
189	movl %edi, %ecx / Load x /
190	movsd L(DP_PIO4HI)(%rip), %xmm2 / -PIO4HI = high part of -Pi/4 /
191	shrl $`31`, %ecx / sign bit of x /
192	mulsd %xmm4, %xmm2 / -jPIO4HI /*
193	movsd L(DP_PIO4LO)(%rip), %xmm3 / -PIO4LO = low part of -Pi/4 /
194	addsd %xmm2, %xmm0 / \|x\| - jPIO4HI /*
195	mulsd %xmm3, %xmm4 / jPIO4LO /*
196	addsd %xmm4, %xmm0 / t = \|x\| - jPIO4HI - jPIO4LO /
197	jmp L(reconstruction)
198
199	.p2align `4`
200	L(very_large_args):
201	/ Here if finite \|x\|>=2^23 /
202
203	/ bitpos = (ix>>23) - BIAS_32 + 59; /
204	shrl $`23`, %eax / eb = biased exponent of x /
205	/ bitpos = eb - 0x7f + 59, where 0x7f is exponent bias /
206	subl $`68`, %eax
207	movl $`28`, %ecx / %cl=28 /
208	movl %eax, %edx / bitpos copy /
209
210	/ j = bitpos/28; /
211	div %cl / j in register %al=%ax/%cl /
212	movapd %xmm0, %xmm3 / \|x\| /
213	/ clear unneeded remainder from %ah /
214	andl $`0xff`, %eax
215
216	imull $`28`, %eax, %ecx / j28 /*
217	lea L(_FPI)(%rip), %rsi
218	movsd L(DP_HI_MASK)(%rip), %xmm4 / DP_HI_MASK /
219	movapd %xmm0, %xmm5 / \|x\| /
220	mulsd -`16`(%rsi,%rax,`8`), %xmm3 / tmp3 = FPI[j-2]\|x\| /*
221	movapd %xmm0, %xmm1 / \|x\| /
222	mulsd -`8`(%rsi,%rax,`8`), %xmm5 / tmp2 = FPI[j-1]\|x\| /*
223	mulsd (%rsi,%rax,`8`), %xmm0 / tmp0 = FPI[j]\|x\| /*
224	addl $`19`, %ecx / j28+19 /*
225	mulsd `8`(%rsi,%rax,`8`), %xmm1 / tmp1 = FPI[j+1]\|x\| /*
226	cmpl %ecx, %edx / bitpos>=j28+19? /*
227	jl L(very_large_skip1)
228
229	/ Here if bitpos>=j28+19 /*
230	andpd %xmm3, %xmm4 / HI(tmp3) /
231	subsd %xmm4, %xmm3 / tmp3 = tmp3 - HI(tmp3) /
232	L(very_large_skip1):
233
234	movsd L(DP_2POW52)(%rip), %xmm6
235	movapd %xmm5, %xmm2 / tmp2 copy /
236	addsd %xmm3, %xmm5 / tmp5 = tmp3 + tmp2 /
237	movl $`1`, %edx
238	addsd %xmm5, %xmm6 / tmp6 = tmp5 + 2^52 /
239	movsd `8`+L(DP_2POW52)(%rip), %xmm4
240	movd %xmm6, %eax / k = I64_LO(tmp6); /
241	addsd %xmm6, %xmm4 / tmp4 = tmp6 - 2^52 /
242	movl %edi, %ecx / Load x /
243	comisd %xmm5, %xmm4 / tmp4 > tmp5? /
244	jbe L(very_large_skip2)
245
246	/ Here if tmp4 > tmp5 /
247	subl $`1`, %eax / k-- /
248	addsd `8`+L(DP_ONES)(%rip), %xmm4 / tmp4 -= 1.0 /
249	L(very_large_skip2):
250
251	andl %eax, %edx / k&1 /
252	lea L(DP_ZERONE)(%rip), %rsi
253	subsd %xmm4, %xmm3 / tmp3 -= tmp4 /
254	addsd (%rsi,%rdx,`8`), %xmm3 / t = DP_ZERONE[k&1] + tmp3 /
255	addsd %xmm2, %xmm3 / t += tmp2 /
256	shrl $`31`, %ecx / sign of x /
257	addsd %xmm3, %xmm0 / t += tmp0 /
258	addl $`1`, %eax / n=k+1 /
259	addsd %xmm1, %xmm0 / t += tmp1 /
260	mulsd L(DP_PIO4)(%rip), %xmm0 / t = PI04 /*
261
262	jmp L(reconstruction) / end of very_large_args peth /
263
264	.p2align `4`
265	L(arg_less_pio4):
266	/ Here if \|x\|<Pi/4 /
267	cmpl $`0x3d000000`, %eax / \|x\|<2^-5? /
268	jl L(arg_less_2pn5)
269
270	/ Here if 2^-5<=\|x\|<Pi/4 /
271	movaps %xmm0, %xmm3 / x /
272	mulsd %xmm0, %xmm0 / y=x^2 /
273	movaps %xmm0, %xmm1 / y /
274	mulsd %xmm0, %xmm0 / z=x^4 /
275	movsd L(DP_S4)(%rip), %xmm4 / S4 /
276	mulsd %xmm0, %xmm4 / zS4 /*
277	movsd L(DP_S3)(%rip), %xmm5 / S3 /
278	mulsd %xmm0, %xmm5 / zS3 /*
279	addsd L(DP_S2)(%rip), %xmm4 / S2+zS4 /*
280	mulsd %xmm0, %xmm4 / z(S2+zS4) /
281	addsd L(DP_S1)(%rip), %xmm5 / S1+zS3 /*
282	mulsd %xmm0, %xmm5 / z(S1+zS3) /
283	addsd L(DP_S0)(%rip), %xmm4 / S0+z(S2+zS4) /
284	mulsd %xmm1, %xmm4 / y(S0+z(S2+zS4)) /*
285	mulsd %xmm3, %xmm5 / xz(S1+zS3) /*
286	mulsd %xmm3, %xmm4 / xy(S0+z(S2+zS4)) /
287	/ xy(S0+y(S1+y(S2+y(S3+yS4)))) /
288	addsd %xmm5, %xmm4
289	/ x + xy(S0+y(S1+y(S2+y(S3+yS4)))) /
290	addsd %xmm4, %xmm3
291	cvtsd2ss %xmm3, %xmm0 / SP result /
292	ret
293
294	.p2align `4`
295	L(arg_less_2pn5):
296	/ Here if \|x\|<2^-5 /
297	cmpl $`0x32000000`, %eax / \|x\|<2^-27? /
298	jl L(arg_less_2pn27)
299
300	/ Here if 2^-27<=\|x\|<2^-5 /
301	movaps %xmm0, %xmm1 / DP x /
302	mulsd %xmm0, %xmm0 / DP x^2 /
303	movsd L(DP_SIN2_1)(%rip), %xmm3 / DP DP_SIN2_1 /
304	mulsd %xmm0, %xmm3 / DP x^2DP_SIN2_1 /*
305	addsd L(DP_SIN2_0)(%rip), %xmm3 / DP DP_SIN2_0+x^2DP_SIN2_1 /*
306	mulsd %xmm0, %xmm3 / DP x^2DP_SIN2_0+x^4DP_SIN2_1 /
307	mulsd %xmm1, %xmm3 / DP x^3DP_SIN2_0+x^5DP_SIN2_1 /
308	addsd %xmm1, %xmm3 / DP x+x^3DP_SIN2_0+x^5DP_SIN2_1 /
309	cvtsd2ss %xmm3, %xmm0 / SP result /
310	ret
311
312	.p2align `4`
313	L(arg_less_2pn27):
314	cmpl $`0`, %eax / x=0? /
315	je L(arg_zero) / in case x=0 return sin(+-0)==+-0 /
316	/ Here if \|x\|<2^-27 /
317	/*
318	* Special cases here:
319	* sin(subnormal) raises inexact/underflow
320	* sin(min_normalized) raises inexact/underflow
321	* sin(normalized) raises inexact
322	*/
323	movaps %xmm0, %xmm3 / Copy of DP x /
324	mulsd L(DP_SMALL)(%rip), %xmm0 / xDP_SMALL /*
325	subsd %xmm0, %xmm3 / Result is x-xDP_SMALL /*
326	cvtsd2ss %xmm3, %xmm0 / Result converted to SP /
327	ret
328
329	.p2align `4`
330	L(arg_zero):
331	movaps %xmm7, %xmm0 / SP x /
332	ret
333
334	.p2align `4`
335	L(arg_inf_or_nan):
336	/ Here if \|x\| is Inf or NAN /
337	jne L(skip_errno_setting) / in case of x is NaN /
338
339	/ Align stack to 16 bytes. /
340	subq $`8`, %rsp
341	cfi_adjust_cfa_offset (`8`)
342	/ Here if x is Inf. Set errno to EDOM. /
343	call JUMPTARGET(__errno_location)
344	addq $`8`, %rsp
345	cfi_adjust_cfa_offset (-`8`)
346
347	movl $EDOM, (%rax)
348
349	.p2align `4`
350	L(skip_errno_setting):
351	/ Here if \|x\| is Inf or NAN. Continued. /
352	movaps %xmm7, %xmm0 / load x /
353	subss %xmm0, %xmm0 / Result is NaN /
354	ret
355	END(__sinf)
356
357	.section .rodata, "a"
358	.p2align `3`
359	L(PIO4J): / Table of jPi/4, for j=0,1,..,10 /*
360	.long `0x00000000`,`0x00000000`
361	.long `0x54442d18`,`0x3fe921fb`
362	.long `0x54442d18`,`0x3ff921fb`
363	.long `0x7f3321d2`,`0x4002d97c`
364	.long `0x54442d18`,`0x400921fb`
365	.long `0x2955385e`,`0x400f6a7a`
366	.long `0x7f3321d2`,`0x4012d97c`
367	.long `0xe9bba775`,`0x4015fdbb`
368	.long `0x54442d18`,`0x401921fb`
369	.long `0xbeccb2bb`,`0x401c463a`
370	.long `0x2955385e`,`0x401f6a7a`
371	.type L(PIO4J), @object
372	ASM_SIZE_DIRECTIVE(L(PIO4J))
373
374	.p2align `3`
375	L(_FPI): / 4/Pi broken into sum of positive DP values /
376	.long `0x00000000`,`0x00000000`
377	.long `0x6c000000`,`0x3ff45f30`
378	.long `0x2a000000`,`0x3e3c9c88`
379	.long `0xa8000000`,`0x3c54fe13`
380	.long `0xd0000000`,`0x3aaf47d4`
381	.long `0x6c000000`,`0x38fbb81b`
382	.long `0xe0000000`,`0x3714acc9`
383	.long `0x7c000000`,`0x3560e410`
384	.long `0x56000000`,`0x33bca2c7`
385	.long `0xac000000`,`0x31fbd778`
386	.long `0xe0000000`,`0x300b7246`
387	.long `0xe8000000`,`0x2e5d2126`
388	.long `0x48000000`,`0x2c970032`
389	.long `0xe8000000`,`0x2ad77504`
390	.long `0xe0000000`,`0x290921cf`
391	.long `0xb0000000`,`0x274deb1c`
392	.long `0xe0000000`,`0x25829a73`
393	.long `0xbe000000`,`0x23fd1046`
394	.long `0x10000000`,`0x2224baed`
395	.long `0x8e000000`,`0x20709d33`
396	.long `0x80000000`,`0x1e535a2f`
397	.long `0x64000000`,`0x1cef904e`
398	.long `0x30000000`,`0x1b0d6398`
399	.long `0x24000000`,`0x1964ce7d`
400	.long `0x16000000`,`0x17b908bf`
401	.type L(_FPI), @object
402	ASM_SIZE_DIRECTIVE(L(_FPI))
403
404	/ Coefficients of polynomial*
405	for sin(x)~=x+x^3DP_SIN2_0+x^5DP_SIN2_1, \|x\|<2^-5. /*
406	.p2align `3`
407	L(DP_SIN2_0):
408	.long `0x5543d49d`,`0xbfc55555`
409	.type L(DP_SIN2_0), @object
410	ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
411
412	.p2align `3`
413	L(DP_SIN2_1):
414	.long `0x75cec8c5`,`0x3f8110f4`
415	.type L(DP_SIN2_1), @object
416	ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
417
418	.p2align `3`
419	L(DP_ZERONE):
420	.long `0x00000000`,`0x00000000` / 0.0 /
421	.long `0x00000000`,`0xbff00000` / 1.0 /
422	.type L(DP_ZERONE), @object
423	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
424
425	.p2align `3`
426	L(DP_ONES):
427	.long `0x00000000`,`0x3ff00000` / +1.0 /
428	.long `0x00000000`,`0xbff00000` / -1.0 /
429	.type L(DP_ONES), @object
430	ASM_SIZE_DIRECTIVE(L(DP_ONES))
431
432	/ Coefficients of polynomial*
433	for sin(t)~=t+t^3(S0+t^2(S1+t^2(S2+t^2(S3+t^2S4)))), \|t\|<Pi/4. /
434	.p2align `3`
435	L(DP_S3):
436	.long `0x64e6b5b4`,`0x3ec71d72`
437	.type L(DP_S3), @object
438	ASM_SIZE_DIRECTIVE(L(DP_S3))
439
440	.p2align `3`
441	L(DP_S1):
442	.long `0x10c2688b`,`0x3f811111`
443	.type L(DP_S1), @object
444	ASM_SIZE_DIRECTIVE(L(DP_S1))
445
446	.p2align `3`
447	L(DP_S4):
448	.long `0x1674b58a`,`0xbe5a947e`
449	.type L(DP_S4), @object
450	ASM_SIZE_DIRECTIVE(L(DP_S4))
451
452	.p2align `3`
453	L(DP_S2):
454	.long `0x8b4bd1f9`,`0xbf2a019f`
455	.type L(DP_S2), @object
456	ASM_SIZE_DIRECTIVE(L(DP_S2))
457
458	.p2align `3`
459	L(DP_S0):
460	.long `0x55551cd9`,`0xbfc55555`
461	.type L(DP_S0), @object
462	ASM_SIZE_DIRECTIVE(L(DP_S0))
463
464	.p2align `3`
465	L(DP_SMALL):
466	.long `0x00000000`,`0x3cd00000` / 2^(-50) /
467	.type L(DP_SMALL), @object
468	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
469
470	/ Coefficients of polynomial*
471	for cos(t)~=1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2C4)))), \|t\|<Pi/4. /
472	.p2align `3`
473	L(DP_C3):
474	.long `0x9ac43cc0`,`0x3efa00eb`
475	.type L(DP_C3), @object
476	ASM_SIZE_DIRECTIVE(L(DP_C3))
477
478	.p2align `3`
479	L(DP_C1):
480	.long `0x545c50c7`,`0x3fa55555`
481	.type L(DP_C1), @object
482	ASM_SIZE_DIRECTIVE(L(DP_C1))
483
484	.p2align `3`
485	L(DP_C4):
486	.long `0xdd8844d7`,`0xbe923c97`
487	.type L(DP_C4), @object
488	ASM_SIZE_DIRECTIVE(L(DP_C4))
489
490	.p2align `3`
491	L(DP_C2):
492	.long `0x348b6874`,`0xbf56c16b`
493	.type L(DP_C2), @object
494	ASM_SIZE_DIRECTIVE(L(DP_C2))
495
496	.p2align `3`
497	L(DP_C0):
498	.long `0xfffe98ae`,`0xbfdfffff`
499	.type L(DP_C0), @object
500	ASM_SIZE_DIRECTIVE(L(DP_C0))
501
502	.p2align `3`
503	L(DP_PIO4):
504	.long `0x54442d18`,`0x3fe921fb` / Pi/4 /
505	.type L(DP_PIO4), @object
506	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
507
508	.p2align `3`
509	L(DP_2POW52):
510	.long `0x00000000`,`0x43300000` / +2^52 /
511	.long `0x00000000`,`0xc3300000` / -2^52 /
512	.type L(DP_2POW52), @object
513	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
514
515	.p2align `3`
516	L(DP_INVPIO4):
517	.long `0x6dc9c883`,`0x3ff45f30` / 4/Pi /
518	.type L(DP_INVPIO4), @object
519	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
520
521	.p2align `3`
522	L(DP_PIO4HI):
523	.long `0x54000000`,`0xbfe921fb` / High part of Pi/4 /
524	.type L(DP_PIO4HI), @object
525	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
526
527	.p2align `3`
528	L(DP_PIO4LO):
529	.long `0x11A62633`,`0xbe010b46` / Low part of Pi/4 /
530	.type L(DP_PIO4LO), @object
531	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
532
533	.p2align `2`
534	L(SP_INVPIO4):
535	.long `0x3fa2f983` / 4/Pi /
536	.type L(SP_INVPIO4), @object
537	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
538
539	.p2align `4`
540	L(DP_ABS_MASK): / Mask for getting DP absolute value /
541	.long `0xffffffff`,`0x7fffffff`
542	.long `0xffffffff`,`0x7fffffff`
543	.type L(DP_ABS_MASK), @object
544	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
545
546	.p2align `3`
547	L(DP_HI_MASK): / Mask for getting high 21 bits of DP value /
548	.long `0x00000000`,`0xffffffff`
549	.type L(DP_HI_MASK),@object
550	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
551
552	.p2align `4`
553	L(SP_ABS_MASK): / Mask for getting SP absolute value /
554	.long `0x7fffffff`,`0x7fffffff`
555	.long `0x7fffffff`,`0x7fffffff`
556	.type L(SP_ABS_MASK), @object
557	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
558
559	weak_alias(__sinf, sinf)
560

Browse the source code of glibc_src_2.24/sysdeps/x86_64/fpu/s_sinf.S