e_expl.S source code [glibc_src_2.27/sysdeps/x86_64/fpu/e_expl.S]

1	/*
2	* Written by J.T. Conklin <jtc@netbsd.org>.
3	* Public domain.
4	*
5	* Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
6	*/
7
8	/*
9	* The 8087 method for the exponential function is to calculate
10	* exp(x) = 2^(x log2(e))
11	* after separating integer and fractional parts
12	* x log2(e) = i + f, \|f\| <= .5
13	* 2^i is immediate but f needs to be precise for long double accuracy.
14	* Suppress range reduction error in computing f by the following.
15	* Separate x into integer and fractional parts
16	* x = xi + xf, \|xf\| <= .5
17	* Separate log2(e) into the sum of an exact number c0 and small part c1.
18	* c0 + c1 = log2(e) to extra precision
19	* Then
20	* f = (c0 xi - i) + c0 xf + c1 x
21	* where c0 xi is exact and so also is (c0 xi - i).
22	* -- moshier@na-net.ornl.gov
23	*/
24
25	#include <libm-alias-ldouble.h>
26	#include <machine/asm.h>
27	#include <x86_64-math-asm.h>
28
29	#ifdef USE_AS_EXP10L
30	# define IEEE754_EXPL __ieee754_exp10l
31	# define EXPL_FINITE __exp10l_finite
32	# define FLDLOG fldl2t
33	#elif defined USE_AS_EXPM1L
34	# define IEEE754_EXPL __expm1l
35	# undef EXPL_FINITE
36	# define FLDLOG fldl2e
37	#else
38	# define IEEE754_EXPL __ieee754_expl
39	# define EXPL_FINITE __expl_finite
40	# define FLDLOG fldl2e
41	#endif
42
43	.section .rodata.cst16,"aM",@progbits,`16`
44
45	.p2align `4`
46	#ifdef USE_AS_EXP10L
47	.type c0,@object
48	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0x9a`, `0xd4`, `0x00`, `0x40`
49	.byte `0`, `0`, `0`, `0`, `0`, `0`
50	ASM_SIZE_DIRECTIVE(c0)
51	.type c1,@object
52	c1: .byte `0x58`, `0x92`, `0xfc`, `0x15`, `0x37`, `0x9a`, `0x97`, `0xf0`, `0xef`, `0x3f`
53	.byte `0`, `0`, `0`, `0`, `0`, `0`
54	ASM_SIZE_DIRECTIVE(c1)
55	#else
56	.type c0,@object
57	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0xaa`, `0xb8`, `0xff`, `0x3f`
58	.byte `0`, `0`, `0`, `0`, `0`, `0`
59	ASM_SIZE_DIRECTIVE(c0)
60	.type c1,@object
61	c1: .byte `0x20`, `0xfa`, `0xee`, `0xc2`, `0x5f`, `0x70`, `0xa5`, `0xec`, `0xed`, `0x3f`
62	.byte `0`, `0`, `0`, `0`, `0`, `0`
63	ASM_SIZE_DIRECTIVE(c1)
64	#endif
65	#ifndef USE_AS_EXPM1L
66	.type csat,@object
67	csat: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0x80`, `0x0e`, `0x40`
68	.byte `0`, `0`, `0`, `0`, `0`, `0`
69	ASM_SIZE_DIRECTIVE(csat)
70	DEFINE_LDBL_MIN
71	#endif
72
73	#ifdef PIC
74	# define MO(op) op##(%rip)
75	#else
76	# define MO(op) op
77	#endif
78
79	.text
80	ENTRY(IEEE754_EXPL)
81	#ifdef USE_AS_EXPM1L
82	movzwl `8`+`8`(%rsp), %eax
83	xorb $`0x80`, %ah // invert sign bit (now 1 is "positive")
84	cmpl $`0xc006`, %eax // is num positive and exp >= 6 (number is >= 128.0)?
85	jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
86	#endif
87	fldt `8`(%rsp)
88	/ I added the following ugly construct because expl(+-Inf) resulted*
89	in NaN. The ugliness results from the bright minds at Intel.
90	For the i686 the code can be written better.
91	-- drepper@cygnus.com. /*
92	fxam / Is NaN or +-Inf? /
93	#ifdef USE_AS_EXPM1L
94	xorb $`0x80`, %ah
95	cmpl $`0xc006`, %eax
96	fstsw %ax
97	movb $`0x45`, %dh
98	jb `4f`
99
100	/ Below -64.0 (may be -NaN or -Inf). /
101	andb %ah, %dh
102	cmpb $`0x01`, %dh
103	je `6f` / Is +-NaN, jump. /
104	jmp `1f` / -large, possibly -Inf. /
105
106	`4`: / In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). /
107	/ Test for +-0 as argument. /
108	andb %ah, %dh
109	cmpb $`0x40`, %dh
110	je `2f`
111
112	/ Test for arguments that are small but not subnormal. /
113	movzwl `8`+`8`(%rsp), %eax
114	andl $`0x7fff`, %eax
115	cmpl $`0x3fbf`, %eax
116	jge `3f`
117	/ Argument's exponent below -64; avoid spurious underflow if*
118	normal. /*
119	cmpl $`0x0001`, %eax
120	jge `2f`
121	/ Force underflow and return the argument, to avoid wrong signs*
122	of zero results from the code below in some rounding modes. /*
123	fld %st
124	fmul %st
125	fstp %st
126	jmp `2f`
127	#else
128	movzwl `8`+`8`(%rsp), %eax
129	andl $`0x7fff`, %eax
130	cmpl $`0x400d`, %eax
131	jg `5f`
132	cmpl $`0x3fbc`, %eax
133	jge `3f`
134	/ Argument's exponent below -67, result rounds to 1. /
135	fld1
136	faddp
137	jmp `2f`
138	`5`: / Overflow, underflow or infinity or NaN as argument. /
139	fstsw %ax
140	movb $`0x45`, %dh
141	andb %ah, %dh
142	cmpb $`0x05`, %dh
143	je `1f` / Is +-Inf, jump. /
144	cmpb $`0x01`, %dh
145	je `6f` / Is +-NaN, jump. /
146	/ Overflow or underflow; saturate. /
147	fstp %st
148	fldt MO(csat)
149	andb $`2`, %ah
150	jz `3f`
151	fchs
152	#endif
153	`3`: FLDLOG / 1 log2(base) /
154	fmul %st(`1`), %st / 1 x log2(base) /
155	/ Set round-to-nearest temporarily. /
156	fstcw -`4`(%rsp)
157	movl $`0xf3ff`, %edx
158	andl -`4`(%rsp), %edx
159	movl %edx, -`8`(%rsp)
160	fldcw -`8`(%rsp)
161	frndint / 1 i /
162	fld %st(`1`) / 2 x /
163	frndint / 2 xi /
164	fldcw -`4`(%rsp)
165	fld %st(`1`) / 3 i /
166	fldt MO(c0) / 4 c0 /
167	fld %st(`2`) / 5 xi /
168	fmul %st(`1`), %st / 5 c0 xi /
169	fsubp %st, %st(`2`) / 4 f = c0 xi - i /
170	fld %st(`4`) / 5 x /
171	fsub %st(`3`), %st / 5 xf = x - xi /
172	fmulp %st, %st(`1`) / 4 c0 xf /
173	faddp %st, %st(`1`) / 3 f = f + c0 xf /
174	fldt MO(c1) / 4 /
175	fmul %st(`4`), %st / 4 c1 * x /
176	faddp %st, %st(`1`) / 3 f = f + c1 * x /
177	f2xm1 / 3 2^(fract(x * log2(base))) - 1 /
178	#ifdef USE_AS_EXPM1L
179	fstp %st(`1`) / 2 /
180	fscale / 2 scale factor is st(1); base^x - 2^i /
181	fxch / 2 i /
182	fld1 / 3 1.0 /
183	fscale / 3 2^i /
184	fld1 / 4 1.0 /
185	fsubrp %st, %st(`1`) / 3 2^i - 1.0 /
186	fstp %st(`1`) / 2 /
187	faddp %st, %st(`1`) / 1 base^x - 1.0 /
188	#else
189	fld1 / 4 1.0 /
190	faddp / 3 2^(fract(x * log2(base))) /
191	fstp %st(`1`) / 2 /
192	fscale / 2 scale factor is st(1); base^x /
193	fstp %st(`1`) / 1 /
194	LDBL_CHECK_FORCE_UFLOW_NONNEG
195	#endif
196	fstp %st(`1`) / 0 /
197	jmp `2f`
198	`1`:
199	#ifdef USE_AS_EXPM1L
200	/ For expm1l, only negative sign gets here. /
201	fstp %st
202	fld1
203	fchs
204	#else
205	testl $`0x200`, %eax / Test sign. /
206	jz `2f` / If positive, jump. /
207	fstp %st
208	fldz / Set result to 0. /
209	#endif
210	`2`: ret
211	`6`: / NaN argument. /
212	fadd %st
213	ret
214	END(IEEE754_EXPL)
215	#ifdef USE_AS_EXPM1L
216	libm_hidden_def (__expm1l)
217	libm_alias_ldouble (__expm1, expm1)
218	#else
219	strong_alias (IEEE754_EXPL, EXPL_FINITE)
220	#endif
221

Browse the source code of glibc_src_2.27/sysdeps/x86_64/fpu/e_expl.S