1/* Optimized cosf function.
2 Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#define __need_Emath
21#include <bits/errno.h>
22
23/* Short algorithm description:
24 *
25 * 1) if |x| == 0: return 1.0-|x|.
26 * 2) if |x| < 2^-27: return 1.0-|x|.
27 * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
28 * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
29 * 5) if |x| < 9*Pi/4:
30 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
31 * t=|x|-j*Pi/4.
32 * 5.2) Reconstruction:
33 * s = (-1.0)^((n>>2)&1)
34 * if(n&2 != 0) {
35 * using cos(t) polynomial for |t|<Pi/4, result is
36 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
37 * } else {
38 * using sin(t) polynomial for |t|<Pi/4, result is
39 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
40 * }
41 * 6) if |x| < 2^23, large args:
42 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
43 * t=|x|-j*Pi/4.
44 * 6.2) Reconstruction same as (5.2).
45 * 7) if |x| >= 2^23, very large args:
46 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
47 * t=|x|-j*Pi/4.
48 * 7.2) Reconstruction same as (5.2).
49 * 8) if x is Inf, return x-x, and set errno=EDOM.
50 * 9) if x is NaN, return x-x.
51 *
52 * Special cases:
53 * cos(+-0) = 1 not raising inexact,
54 * cos(subnormal) raises inexact,
55 * cos(min_normalized) raises inexact,
56 * cos(normalized) raises inexact,
57 * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
58 * cos(NaN) = NaN.
59 */
60
61 .text
62ENTRY(__cosf)
63 /* Input: single precision x in %xmm0 */
64
65 movd %xmm0, %eax /* Bits of x */
66 movaps %xmm0, %xmm7 /* Copy of x */
67 cvtss2sd %xmm0, %xmm0 /* DP x */
68 movss L(SP_ABS_MASK)(%rip), %xmm3
69 andl $0x7fffffff, %eax /* |x| */
70
71 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
72 jb L(arg_less_pio4)
73
74 /* Here if |x|>=Pi/4 */
75 andps %xmm7, %xmm3 /* SP |x| */
76 andpd L(DP_ABS_MASK)(%rip), %xmm0 /* DP |x| */
77 movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
78
79 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
80 jae L(large_args)
81
82 /* Here if Pi/4<=|x|<9*Pi/4 */
83 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
84 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
85 lea L(PIO4J)(%rip), %rsi
86 addl $1, %eax /* k+1 */
87 movl $0x0e, %edx
88 andl %eax, %edx /* j = (k+1)&0x0e */
89 addl $2, %eax /* n */
90 subsd (%rsi,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */
91
92L(reconstruction):
93 /* Input: %eax=n, %xmm0=t */
94 testl $2, %eax /* n&2 != 0? */
95 jz L(sin_poly)
96
97/*L(cos_poly):*/
98 /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
99 * y = t*t; z = y*y;
100 * s = sign(x) * (-1.0)^((n>>2)&1)
101 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
102 */
103 shrl $2, %eax /* n>>2 */
104 mulsd %xmm0, %xmm0 /* y=t^2 */
105 andl $1, %eax /* (n>>2)&1 */
106 movaps %xmm0, %xmm1 /* y */
107 mulsd %xmm0, %xmm0 /* z=t^4 */
108
109 movsd L(DP_C4)(%rip), %xmm4 /* C4 */
110 mulsd %xmm0, %xmm4 /* z*C4 */
111 movsd L(DP_C3)(%rip), %xmm3 /* C3 */
112 mulsd %xmm0, %xmm3 /* z*C3 */
113 lea L(DP_ONES)(%rip), %rsi
114 addsd L(DP_C2)(%rip), %xmm4 /* C2+z*C4 */
115 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
116 addsd L(DP_C1)(%rip), %xmm3 /* C1+z*C3 */
117 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
118 addsd L(DP_C0)(%rip), %xmm4 /* C0+z*(C2+z*C4) */
119 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
120
121 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
122 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
123 addsd L(DP_ONES)(%rip), %xmm3
124
125 mulsd (%rsi,%rax,8), %xmm3 /* DP result */
126 cvtsd2ss %xmm3, %xmm0 /* SP result */
127 ret
128
129 .p2align 4
130L(sin_poly):
131 /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
132 * y = t*t; z = y*y;
133 * s = sign(x) * (-1.0)^((n>>2)&1)
134 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
135 */
136
137 movaps %xmm0, %xmm4 /* t */
138 shrl $2, %eax /* n>>2 */
139 mulsd %xmm0, %xmm0 /* y=t^2 */
140 andl $1, %eax /* (n>>2)&1 */
141 movaps %xmm0, %xmm1 /* y */
142 mulsd %xmm0, %xmm0 /* z=t^4 */
143
144 movsd L(DP_S4)(%rip), %xmm2 /* S4 */
145 mulsd %xmm0, %xmm2 /* z*S4 */
146 movsd L(DP_S3)(%rip), %xmm3 /* S3 */
147 mulsd %xmm0, %xmm3 /* z*S3 */
148 lea L(DP_ONES)(%rip), %rsi
149 addsd L(DP_S2)(%rip), %xmm2 /* S2+z*S4 */
150 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
151 addsd L(DP_S1)(%rip), %xmm3 /* S1+z*S3 */
152 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
153 addsd L(DP_S0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */
154 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
155 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
156 mulsd (%rsi,%rax,8), %xmm4
157 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
158 addsd %xmm2, %xmm3
159 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
160 mulsd %xmm4, %xmm3
161 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
162 addsd %xmm4, %xmm3
163 cvtsd2ss %xmm3, %xmm0 /* SP result */
164 ret
165
166 .p2align 4
167L(large_args):
168 /* Here if |x|>=9*Pi/4 */
169 cmpl $0x7f800000, %eax /* x is Inf or NaN? */
170 jae L(arg_inf_or_nan)
171
172 /* Here if finite |x|>=9*Pi/4 */
173 cmpl $0x4b000000, %eax /* |x|<2^23? */
174 jae L(very_large_args)
175
176 /* Here if 9*Pi/4<=|x|<2^23 */
177 movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
178 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
179 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
180 addl $1, %eax /* k+1 */
181 movl %eax, %edx
182 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
183 cvtsi2sdl %edx, %xmm4 /* DP j */
184 movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
185 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
186 movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
187 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
188 addl $2, %eax /* n */
189 mulsd %xmm3, %xmm4 /* j*PIO4LO */
190 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
191 jmp L(reconstruction)
192
193 .p2align 4
194L(very_large_args):
195 /* Here if finite |x|>=2^23 */
196
197 /* bitpos = (ix>>23) - BIAS_32 + 59; */
198 shrl $23, %eax /* eb = biased exponent of x */
199 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
200 subl $68, %eax
201 movl $28, %ecx /* %cl=28 */
202 movl %eax, %edx /* bitpos copy */
203
204 /* j = bitpos/28; */
205 div %cl /* j in register %al=%ax/%cl */
206 movapd %xmm0, %xmm3 /* |x| */
207 /* clear unneeded remainder from %ah */
208 andl $0xff, %eax
209
210 imull $28, %eax, %ecx /* j*28 */
211 lea L(_FPI)(%rip), %rsi
212 movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
213 movapd %xmm0, %xmm5 /* |x| */
214 mulsd -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
215 movapd %xmm0, %xmm1 /* |x| */
216 mulsd -8(%rsi,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
217 mulsd (%rsi,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
218 addl $19, %ecx /* j*28+19 */
219 mulsd 8(%rsi,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
220 cmpl %ecx, %edx /* bitpos>=j*28+19? */
221 jl L(very_large_skip1)
222
223 /* Here if bitpos>=j*28+19 */
224 andpd %xmm3, %xmm4 /* HI(tmp3) */
225 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
226L(very_large_skip1):
227
228 movsd L(DP_2POW52)(%rip), %xmm6
229 movapd %xmm5, %xmm2 /* tmp2 copy */
230 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
231 movl $1, %edx
232 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
233 movsd 8+L(DP_2POW52)(%rip), %xmm4
234 movd %xmm6, %eax /* k = I64_LO(tmp6); */
235 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
236 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
237 jbe L(very_large_skip2)
238
239 /* Here if tmp4 > tmp5 */
240 subl $1, %eax /* k-- */
241 addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
242L(very_large_skip2):
243
244 andl %eax, %edx /* k&1 */
245 lea L(DP_ZERONE)(%rip), %rsi
246 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
247 addsd (%rsi,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
248 addsd %xmm2, %xmm3 /* t += tmp2 */
249 addsd %xmm3, %xmm0 /* t += tmp0 */
250 addl $3, %eax /* n=k+3 */
251 addsd %xmm1, %xmm0 /* t += tmp1 */
252 mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
253
254 jmp L(reconstruction) /* end of very_large_args peth */
255
256 .p2align 4
257L(arg_less_pio4):
258 /* Here if |x|<Pi/4 */
259 cmpl $0x3d000000, %eax /* |x|<2^-5? */
260 jl L(arg_less_2pn5)
261
262 /* Here if 2^-5<=|x|<Pi/4 */
263 mulsd %xmm0, %xmm0 /* y=x^2 */
264 movaps %xmm0, %xmm1 /* y */
265 mulsd %xmm0, %xmm0 /* z=x^4 */
266 movsd L(DP_C4)(%rip), %xmm3 /* C4 */
267 mulsd %xmm0, %xmm3 /* z*C4 */
268 movsd L(DP_C3)(%rip), %xmm5 /* C3 */
269 mulsd %xmm0, %xmm5 /* z*C3 */
270 addsd L(DP_C2)(%rip), %xmm3 /* C2+z*C4 */
271 mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
272 addsd L(DP_C1)(%rip), %xmm5 /* C1+z*C3 */
273 mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
274 addsd L(DP_C0)(%rip), %xmm3 /* C0+z*(C2+z*C4) */
275 mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
276 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
277 addsd %xmm5, %xmm3
278 /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
279 addsd L(DP_ONES)(%rip), %xmm3
280 cvtsd2ss %xmm3, %xmm0 /* SP result */
281 ret
282
283 .p2align 4
284L(arg_less_2pn5):
285 /* Here if |x|<2^-5 */
286 cmpl $0x32000000, %eax /* |x|<2^-27? */
287 jl L(arg_less_2pn27)
288
289 /* Here if 2^-27<=|x|<2^-5 */
290 mulsd %xmm0, %xmm0 /* DP x^2 */
291 movsd L(DP_COS2_1)(%rip), %xmm3 /* DP DP_COS2_1 */
292 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
293 addsd L(DP_COS2_0)(%rip), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
294 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
295 /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
296 addsd L(DP_ONES)(%rip), %xmm3
297 cvtsd2ss %xmm3, %xmm0 /* SP result */
298 ret
299
300 .p2align 4
301L(arg_less_2pn27):
302 /* Here if |x|<2^-27 */
303 andps L(SP_ABS_MASK)(%rip),%xmm7 /* |x| */
304 movss L(SP_ONE)(%rip), %xmm0 /* 1.0 */
305 subss %xmm7, %xmm0 /* result is 1.0-|x| */
306 ret
307
308 .p2align 4
309L(arg_inf_or_nan):
310 /* Here if |x| is Inf or NAN */
311 jne L(skip_errno_setting) /* in case of x is NaN */
312
313 /* Align stack to 16 bytes. */
314 subq $8, %rsp
315 cfi_adjust_cfa_offset (8)
316 /* Here if x is Inf. Set errno to EDOM. */
317 call JUMPTARGET(__errno_location)
318 addq $8, %rsp
319 cfi_adjust_cfa_offset (-8)
320
321 movl $EDOM, (%rax)
322
323 .p2align 4
324L(skip_errno_setting):
325 /* Here if |x| is Inf or NAN. Continued. */
326 movaps %xmm7, %xmm0 /* load x */
327 subss %xmm0, %xmm0 /* Result is NaN */
328 ret
329END(__cosf)
330
331 .section .rodata, "a"
332 .p2align 3
333L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
334 .long 0x00000000,0x00000000
335 .long 0x54442d18,0x3fe921fb
336 .long 0x54442d18,0x3ff921fb
337 .long 0x7f3321d2,0x4002d97c
338 .long 0x54442d18,0x400921fb
339 .long 0x2955385e,0x400f6a7a
340 .long 0x7f3321d2,0x4012d97c
341 .long 0xe9bba775,0x4015fdbb
342 .long 0x54442d18,0x401921fb
343 .long 0xbeccb2bb,0x401c463a
344 .long 0x2955385e,0x401f6a7a
345 .type L(PIO4J), @object
346 ASM_SIZE_DIRECTIVE(L(PIO4J))
347
348 .p2align 3
349L(_FPI): /* 4/Pi broken into sum of positive DP values */
350 .long 0x00000000,0x00000000
351 .long 0x6c000000,0x3ff45f30
352 .long 0x2a000000,0x3e3c9c88
353 .long 0xa8000000,0x3c54fe13
354 .long 0xd0000000,0x3aaf47d4
355 .long 0x6c000000,0x38fbb81b
356 .long 0xe0000000,0x3714acc9
357 .long 0x7c000000,0x3560e410
358 .long 0x56000000,0x33bca2c7
359 .long 0xac000000,0x31fbd778
360 .long 0xe0000000,0x300b7246
361 .long 0xe8000000,0x2e5d2126
362 .long 0x48000000,0x2c970032
363 .long 0xe8000000,0x2ad77504
364 .long 0xe0000000,0x290921cf
365 .long 0xb0000000,0x274deb1c
366 .long 0xe0000000,0x25829a73
367 .long 0xbe000000,0x23fd1046
368 .long 0x10000000,0x2224baed
369 .long 0x8e000000,0x20709d33
370 .long 0x80000000,0x1e535a2f
371 .long 0x64000000,0x1cef904e
372 .long 0x30000000,0x1b0d6398
373 .long 0x24000000,0x1964ce7d
374 .long 0x16000000,0x17b908bf
375 .type L(_FPI), @object
376 ASM_SIZE_DIRECTIVE(L(_FPI))
377
378/* Coefficients of polynomial
379 for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
380 .p2align 3
381L(DP_COS2_0):
382 .long 0xff5cc6fd,0xbfdfffff
383 .type L(DP_COS2_0), @object
384 ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
385
386 .p2align 3
387L(DP_COS2_1):
388 .long 0xb178dac5,0x3fa55514
389 .type L(DP_COS2_1), @object
390 ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
391
392 .p2align 3
393L(DP_ZERONE):
394 .long 0x00000000,0x00000000 /* 0.0 */
395 .long 0x00000000,0xbff00000 /* 1.0 */
396 .type L(DP_ZERONE), @object
397 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
398
399 .p2align 3
400L(DP_ONES):
401 .long 0x00000000,0x3ff00000 /* +1.0 */
402 .long 0x00000000,0xbff00000 /* -1.0 */
403 .type L(DP_ONES), @object
404 ASM_SIZE_DIRECTIVE(L(DP_ONES))
405
406/* Coefficients of polynomial
407 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
408 .p2align 3
409L(DP_S3):
410 .long 0x64e6b5b4,0x3ec71d72
411 .type L(DP_S3), @object
412 ASM_SIZE_DIRECTIVE(L(DP_S3))
413
414 .p2align 3
415L(DP_S1):
416 .long 0x10c2688b,0x3f811111
417 .type L(DP_S1), @object
418 ASM_SIZE_DIRECTIVE(L(DP_S1))
419
420 .p2align 3
421L(DP_S4):
422 .long 0x1674b58a,0xbe5a947e
423 .type L(DP_S4), @object
424 ASM_SIZE_DIRECTIVE(L(DP_S4))
425
426 .p2align 3
427L(DP_S2):
428 .long 0x8b4bd1f9,0xbf2a019f
429 .type L(DP_S2),@object
430 ASM_SIZE_DIRECTIVE(L(DP_S2))
431
432 .p2align 3
433L(DP_S0):
434 .long 0x55551cd9,0xbfc55555
435 .type L(DP_S0), @object
436 ASM_SIZE_DIRECTIVE(L(DP_S0))
437
438/* Coefficients of polynomial
439 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
440 .p2align 3
441L(DP_C3):
442 .long 0x9ac43cc0,0x3efa00eb
443 .type L(DP_C3), @object
444 ASM_SIZE_DIRECTIVE(L(DP_C3))
445
446 .p2align 3
447L(DP_C1):
448 .long 0x545c50c7,0x3fa55555
449 .type L(DP_C1), @object
450 ASM_SIZE_DIRECTIVE(L(DP_C1))
451
452 .p2align 3
453L(DP_C4):
454 .long 0xdd8844d7,0xbe923c97
455 .type L(DP_C4), @object
456 ASM_SIZE_DIRECTIVE(L(DP_C4))
457
458 .p2align 3
459L(DP_C2):
460 .long 0x348b6874,0xbf56c16b
461 .type L(DP_C2), @object
462 ASM_SIZE_DIRECTIVE(L(DP_C2))
463
464 .p2align 3
465L(DP_C0):
466 .long 0xfffe98ae,0xbfdfffff
467 .type L(DP_C0), @object
468 ASM_SIZE_DIRECTIVE(L(DP_C0))
469
470 .p2align 3
471L(DP_PIO4):
472 .long 0x54442d18,0x3fe921fb /* Pi/4 */
473 .type L(DP_PIO4), @object
474 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
475
476 .p2align 3
477L(DP_2POW52):
478 .long 0x00000000,0x43300000 /* +2^52 */
479 .long 0x00000000,0xc3300000 /* -2^52 */
480 .type L(DP_2POW52), @object
481 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
482
483 .p2align 3
484L(DP_INVPIO4):
485 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
486 .type L(DP_INVPIO4), @object
487 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
488
489 .p2align 3
490L(DP_PIO4HI):
491 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
492 .type L(DP_PIO4HI), @object
493 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
494
495 .p2align 3
496L(DP_PIO4LO):
497 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
498 .type L(DP_PIO4LO), @object
499 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
500
501 .p2align 2
502L(SP_INVPIO4):
503 .long 0x3fa2f983 /* 4/Pi */
504 .type L(SP_INVPIO4), @object
505 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
506
507 .p2align 4
508L(DP_ABS_MASK): /* Mask for getting DP absolute value */
509 .long 0xffffffff,0x7fffffff
510 .long 0xffffffff,0x7fffffff
511 .type L(DP_ABS_MASK), @object
512 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
513
514 .p2align 3
515L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
516 .long 0x00000000,0xffffffff
517 .type L(DP_HI_MASK), @object
518 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
519
520 .p2align 4
521L(SP_ABS_MASK): /* Mask for getting SP absolute value */
522 .long 0x7fffffff,0x7fffffff
523 .long 0x7fffffff,0x7fffffff
524 .type L(SP_ABS_MASK), @object
525 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
526
527 .p2align 2
528L(SP_ONE):
529 .long 0x3f800000 /* 1.0 */
530 .type L(SP_ONE), @object
531 ASM_SIZE_DIRECTIVE(L(SP_ONE))
532
533weak_alias(__cosf, cosf)
534