e_expl.S source code [glibc_src_2.31/sysdeps/x86_64/fpu/e_expl.S]

1	/*
2	* Written by J.T. Conklin <jtc@netbsd.org>.
3	* Public domain.
4	*
5	* Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
6	*/
7
8	/*
9	* The 8087 method for the exponential function is to calculate
10	* exp(x) = 2^(x log2(e))
11	* after separating integer and fractional parts
12	* x log2(e) = i + f, \|f\| <= .5
13	* 2^i is immediate but f needs to be precise for long double accuracy.
14	* Suppress range reduction error in computing f by the following.
15	* Separate x into integer and fractional parts
16	* x = xi + xf, \|xf\| <= .5
17	* Separate log2(e) into the sum of an exact number c0 and small part c1.
18	* c0 + c1 = log2(e) to extra precision
19	* Then
20	* f = (c0 xi - i) + c0 xf + c1 x
21	* where c0 xi is exact and so also is (c0 xi - i).
22	* -- moshier@na-net.ornl.gov
23	*/
24
25	#include <libm-alias-ldouble.h>
26	#include <machine/asm.h>
27	#include <x86_64-math-asm.h>
28	#include <libm-alias-finite.h>
29
30	#ifdef USE_AS_EXP10L
31	# define IEEE754_EXPL __ieee754_exp10l
32	# define EXPL_FINITE __exp10l_finite
33	# define FLDLOG fldl2t
34	#elif defined USE_AS_EXPM1L
35	# define IEEE754_EXPL __expm1l
36	# undef EXPL_FINITE
37	# define FLDLOG fldl2e
38	#else
39	# define IEEE754_EXPL __ieee754_expl
40	# define EXPL_FINITE __expl_finite
41	# define FLDLOG fldl2e
42	#endif
43
44	.section .rodata.cst16,"aM",@progbits,`16`
45
46	.p2align `4`
47	#ifdef USE_AS_EXP10L
48	.type c0,@object
49	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0x9a`, `0xd4`, `0x00`, `0x40`
50	.byte `0`, `0`, `0`, `0`, `0`, `0`
51	ASM_SIZE_DIRECTIVE(c0)
52	.type c1,@object
53	c1: .byte `0x58`, `0x92`, `0xfc`, `0x15`, `0x37`, `0x9a`, `0x97`, `0xf0`, `0xef`, `0x3f`
54	.byte `0`, `0`, `0`, `0`, `0`, `0`
55	ASM_SIZE_DIRECTIVE(c1)
56	#else
57	.type c0,@object
58	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0xaa`, `0xb8`, `0xff`, `0x3f`
59	.byte `0`, `0`, `0`, `0`, `0`, `0`
60	ASM_SIZE_DIRECTIVE(c0)
61	.type c1,@object
62	c1: .byte `0x20`, `0xfa`, `0xee`, `0xc2`, `0x5f`, `0x70`, `0xa5`, `0xec`, `0xed`, `0x3f`
63	.byte `0`, `0`, `0`, `0`, `0`, `0`
64	ASM_SIZE_DIRECTIVE(c1)
65	#endif
66	#ifndef USE_AS_EXPM1L
67	.type csat,@object
68	csat: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0x80`, `0x0e`, `0x40`
69	.byte `0`, `0`, `0`, `0`, `0`, `0`
70	ASM_SIZE_DIRECTIVE(csat)
71	DEFINE_LDBL_MIN
72	#endif
73
74	#ifdef PIC
75	# define MO(op) op##(%rip)
76	#else
77	# define MO(op) op
78	#endif
79
80	.text
81	ENTRY(IEEE754_EXPL)
82	#ifdef USE_AS_EXPM1L
83	movzwl `8`+`8`(%rsp), %eax
84	xorb $`0x80`, %ah // invert sign bit (now 1 is "positive")
85	cmpl $`0xc006`, %eax // is num positive and exp >= 6 (number is >= 128.0)?
86	jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
87	#endif
88	fldt `8`(%rsp)
89	/ I added the following ugly construct because expl(+-Inf) resulted*
90	in NaN. The ugliness results from the bright minds at Intel.
91	For the i686 the code can be written better.
92	-- drepper@cygnus.com. /*
93	fxam / Is NaN or +-Inf? /
94	#ifdef USE_AS_EXPM1L
95	xorb $`0x80`, %ah
96	cmpl $`0xc006`, %eax
97	fstsw %ax
98	movb $`0x45`, %dh
99	jb `4f`
100
101	/ Below -64.0 (may be -NaN or -Inf). /
102	andb %ah, %dh
103	cmpb $`0x01`, %dh
104	je `6f` / Is +-NaN, jump. /
105	jmp `1f` / -large, possibly -Inf. /
106
107	`4`: / In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). /
108	/ Test for +-0 as argument. /
109	andb %ah, %dh
110	cmpb $`0x40`, %dh
111	je `2f`
112
113	/ Test for arguments that are small but not subnormal. /
114	movzwl `8`+`8`(%rsp), %eax
115	andl $`0x7fff`, %eax
116	cmpl $`0x3fbf`, %eax
117	jge `3f`
118	/ Argument's exponent below -64; avoid spurious underflow if*
119	normal. /*
120	cmpl $`0x0001`, %eax
121	jge `2f`
122	/ Force underflow and return the argument, to avoid wrong signs*
123	of zero results from the code below in some rounding modes. /*
124	fld %st
125	fmul %st
126	fstp %st
127	jmp `2f`
128	#else
129	movzwl `8`+`8`(%rsp), %eax
130	andl $`0x7fff`, %eax
131	cmpl $`0x400d`, %eax
132	jg `5f`
133	cmpl $`0x3fbc`, %eax
134	jge `3f`
135	/ Argument's exponent below -67, result rounds to 1. /
136	fld1
137	faddp
138	jmp `2f`
139	`5`: / Overflow, underflow or infinity or NaN as argument. /
140	fstsw %ax
141	movb $`0x45`, %dh
142	andb %ah, %dh
143	cmpb $`0x05`, %dh
144	je `1f` / Is +-Inf, jump. /
145	cmpb $`0x01`, %dh
146	je `6f` / Is +-NaN, jump. /
147	/ Overflow or underflow; saturate. /
148	fstp %st
149	fldt MO(csat)
150	andb $`2`, %ah
151	jz `3f`
152	fchs
153	#endif
154	`3`: FLDLOG / 1 log2(base) /
155	fmul %st(`1`), %st / 1 x log2(base) /
156	/ Set round-to-nearest temporarily. /
157	fstcw -`4`(%rsp)
158	movl $`0xf3ff`, %edx
159	andl -`4`(%rsp), %edx
160	movl %edx, -`8`(%rsp)
161	fldcw -`8`(%rsp)
162	frndint / 1 i /
163	fld %st(`1`) / 2 x /
164	frndint / 2 xi /
165	fldcw -`4`(%rsp)
166	fld %st(`1`) / 3 i /
167	fldt MO(c0) / 4 c0 /
168	fld %st(`2`) / 5 xi /
169	fmul %st(`1`), %st / 5 c0 xi /
170	fsubp %st, %st(`2`) / 4 f = c0 xi - i /
171	fld %st(`4`) / 5 x /
172	fsub %st(`3`), %st / 5 xf = x - xi /
173	fmulp %st, %st(`1`) / 4 c0 xf /
174	faddp %st, %st(`1`) / 3 f = f + c0 xf /
175	fldt MO(c1) / 4 /
176	fmul %st(`4`), %st / 4 c1 * x /
177	faddp %st, %st(`1`) / 3 f = f + c1 * x /
178	f2xm1 / 3 2^(fract(x * log2(base))) - 1 /
179	#ifdef USE_AS_EXPM1L
180	fstp %st(`1`) / 2 /
181	fscale / 2 scale factor is st(1); base^x - 2^i /
182	fxch / 2 i /
183	fld1 / 3 1.0 /
184	fscale / 3 2^i /
185	fld1 / 4 1.0 /
186	fsubrp %st, %st(`1`) / 3 2^i - 1.0 /
187	fstp %st(`1`) / 2 /
188	faddp %st, %st(`1`) / 1 base^x - 1.0 /
189	#else
190	fld1 / 4 1.0 /
191	faddp / 3 2^(fract(x * log2(base))) /
192	fstp %st(`1`) / 2 /
193	fscale / 2 scale factor is st(1); base^x /
194	fstp %st(`1`) / 1 /
195	LDBL_CHECK_FORCE_UFLOW_NONNEG
196	#endif
197	fstp %st(`1`) / 0 /
198	jmp `2f`
199	`1`:
200	#ifdef USE_AS_EXPM1L
201	/ For expm1l, only negative sign gets here. /
202	fstp %st
203	fld1
204	fchs
205	#else
206	testl $`0x200`, %eax / Test sign. /
207	jz `2f` / If positive, jump. /
208	fstp %st
209	fldz / Set result to 0. /
210	#endif
211	`2`: ret
212	`6`: / NaN argument. /
213	fadd %st
214	ret
215	END(IEEE754_EXPL)
216
217	#ifdef USE_AS_EXPM1L
218	libm_hidden_def (__expm1l)
219	libm_alias_ldouble (__expm1, expm1)
220	#elif defined USE_AS_EXP10L
221	libm_alias_finite (__ieee754_exp10l, __exp10l)
222	#else
223	libm_alias_finite (__ieee754_expl, __expl)
224	#endif
225

Browse the source code of glibc_src_2.31/sysdeps/x86_64/fpu/e_expl.S