s_fmal.c source code [glibc_src_2.28/sysdeps/ieee754/ldbl-96/s_fmal.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <float.h>
21	#include <math.h>
22	#include <fenv.h>
23	#include <ieee754.h>
24	#include <math-barriers.h>
25	#include <math_private.h>
26	#include <libm-alias-ldouble.h>
27	#include <tininess.h>
28
29	/ This implementation uses rounding to odd to avoid problems with*
30	double rounding. See a paper by Boldo and Melquiond:
31	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
32
33	long double
34	__fmal (long double x, long double y, long double z)
35	{
36	union ieee854_long_double u, v, w;
37	int adjust = `0`;
38	u.d = x;
39	v.d = y;
40	w.d = z;
41	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
42	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS
43	- LDBL_MANT_DIG, `0`)
44	\|\| __builtin_expect (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
45	\|\| __builtin_expect (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
46	\|\| __builtin_expect (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
47	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
48	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, `0`))
49	{
50	/ If z is Inf, but x and y are finite, the result should be*
51	z rather than NaN. /*
52	if (w.ieee.exponent == `0x7fff`
53	&& u.ieee.exponent != `0x7fff`
54	&& v.ieee.exponent != `0x7fff`)
55	return (z + x) + y;
56	/ If z is zero and x are y are nonzero, compute the result*
57	as x y to avoid the wrong sign of a zero result if x * y*
58	underflows to 0. /*
59	if (z == `0` && x != `0` && y != `0`)
60	return x * y;
61	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
62	x y + z. /
63	if (u.ieee.exponent == `0x7fff`
64	\|\| v.ieee.exponent == `0x7fff`
65	\|\| w.ieee.exponent == `0x7fff`
66	\|\| x == `0`
67	\|\| y == `0`)
68	return x * y + z;
69	/ If fma will certainly overflow, compute as x * y. /
70	if (u.ieee.exponent + v.ieee.exponent
71	> `0x7fff` + IEEE854_LONG_DOUBLE_BIAS)
72	return x * y;
73	/ If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the*
74	result nor whether there is underflow depends on its exact
75	value, only on its sign. /*
76	if (u.ieee.exponent + v.ieee.exponent
77	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - `2`)
78	{
79	int neg = u.ieee.negative ^ v.ieee.negative;
80	long double tiny = neg ? -`0x1p-16445L` : `0x1p-16445L`;
81	if (w.ieee.exponent >= `3`)
82	return tiny + z;
83	/ Scaling up, adding TINY and scaling down produces the*
84	correct result, because in round-to-nearest mode adding
85	TINY has no effect and in other modes double rounding is
86	harmless. But it may not produce required underflow
87	exceptions. /*
88	v.d = z * `0x1p65L` + tiny;
89	if (TININESS_AFTER_ROUNDING
90	? v.ieee.exponent < `66`
91	: (w.ieee.exponent == `0`
92	\|\| (w.ieee.exponent == `1`
93	&& w.ieee.negative != neg
94	&& w.ieee.mantissa1 == `0`
95	&& w.ieee.mantissa0 == `0x80000000`)))
96	{
97	long double force_underflow = x * y;
98	math_force_eval (force_underflow);
99	}
100	return v.d * `0x1p-65L`;
101	}
102	if (u.ieee.exponent + v.ieee.exponent
103	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
104	{
105	/ Compute 1p-64 times smaller result and multiply*
106	at the end. /*
107	if (u.ieee.exponent > v.ieee.exponent)
108	u.ieee.exponent -= LDBL_MANT_DIG;
109	else
110	v.ieee.exponent -= LDBL_MANT_DIG;
111	/ If x + y exponent is very large and z exponent is very small,*
112	it doesn't matter if we don't adjust it. /*
113	if (w.ieee.exponent > LDBL_MANT_DIG)
114	w.ieee.exponent -= LDBL_MANT_DIG;
115	adjust = `1`;
116	}
117	else if (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
118	{
119	/ Similarly.*
120	If z exponent is very large and x and y exponents are
121	very small, adjust them up to avoid spurious underflows,
122	rather than down. /*
123	if (u.ieee.exponent + v.ieee.exponent
124	<= IEEE854_LONG_DOUBLE_BIAS + `2` * LDBL_MANT_DIG)
125	{
126	if (u.ieee.exponent > v.ieee.exponent)
127	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
128	else
129	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
130	}
131	else if (u.ieee.exponent > v.ieee.exponent)
132	{
133	if (u.ieee.exponent > LDBL_MANT_DIG)
134	u.ieee.exponent -= LDBL_MANT_DIG;
135	}
136	else if (v.ieee.exponent > LDBL_MANT_DIG)
137	v.ieee.exponent -= LDBL_MANT_DIG;
138	w.ieee.exponent -= LDBL_MANT_DIG;
139	adjust = `1`;
140	}
141	else if (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
142	{
143	u.ieee.exponent -= LDBL_MANT_DIG;
144	if (v.ieee.exponent)
145	v.ieee.exponent += LDBL_MANT_DIG;
146	else
147	v.d *= `0x1p64L`;
148	}
149	else if (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
150	{
151	v.ieee.exponent -= LDBL_MANT_DIG;
152	if (u.ieee.exponent)
153	u.ieee.exponent += LDBL_MANT_DIG;
154	else
155	u.d *= `0x1p64L`;
156	}
157	else / if (u.ieee.exponent + v.ieee.exponent*
158	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) /*
159	{
160	if (u.ieee.exponent > v.ieee.exponent)
161	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
162	else
163	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
164	if (w.ieee.exponent <= `4` * LDBL_MANT_DIG + `6`)
165	{
166	if (w.ieee.exponent)
167	w.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
168	else
169	w.d *= `0x1p130L`;
170	adjust = -`1`;
171	}
172	/ Otherwise x * y should just affect inexact*
173	and nothing else. /*
174	}
175	x = u.d;
176	y = v.d;
177	z = w.d;
178	}
179
180	/ Ensure correct sign of exact 0 + 0. /
181	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
182	{
183	x = math_opt_barrier (x);
184	return x * y + z;
185	}
186
187	fenv_t env;
188	feholdexcept (&env);
189	fesetround (FE_TONEAREST);
190
191	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
192	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
193	long double x1 = x * C;
194	long double y1 = y * C;
195	long double m1 = x * y;
196	x1 = (x - x1) + x1;
197	y1 = (y - y1) + y1;
198	long double x2 = x - x1;
199	long double y2 = y - y1;
200	long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
201
202	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
203	long double a1 = z + m1;
204	long double t1 = a1 - z;
205	long double t2 = a1 - t1;
206	t1 = m1 - t1;
207	t2 = z - t2;
208	long double a2 = t1 + t2;
209	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
210	math_force_eval (m2);
211	math_force_eval (a2);
212	feclearexcept (FE_INEXACT);
213
214	/ If the result is an exact zero, ensure it has the correct sign. /
215	if (a1 == `0` && m2 == `0`)
216	{
217	feupdateenv (&env);
218	/ Ensure that round-to-nearest value of z + m1 is not reused. /
219	z = math_opt_barrier (z);
220	return z + m1;
221	}
222
223	fesetround (FE_TOWARDZERO);
224	/ Perform m2 + a2 addition with round to odd. /
225	u.d = a2 + m2;
226
227	if (__glibc_likely (adjust == `0`))
228	{
229	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
230	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
231	feupdateenv (&env);
232	/ Result is a1 + u.d. /
233	return a1 + u.d;
234	}
235	else if (__glibc_likely (adjust > `0`))
236	{
237	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
238	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
239	feupdateenv (&env);
240	/ Result is a1 + u.d, scaled up. /
241	return (a1 + u.d) * `0x1p64L`;
242	}
243	else
244	{
245	if ((u.ieee.mantissa1 & `1`) == `0`)
246	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
247	v.d = a1 + u.d;
248	/ Ensure the addition is not scheduled after fetestexcept call. /
249	math_force_eval (v.d);
250	int j = fetestexcept (FE_INEXACT) != `0`;
251	feupdateenv (&env);
252	/ Ensure the following computations are performed in default rounding*
253	mode instead of just reusing the round to zero computation. /*
254	asm volatile ("" : "=m" (u) : "m" (u));
255	/ If a1 + u.d is exact, the only rounding happens during*
256	scaling down. /*
257	if (j == `0`)
258	return v.d * `0x1p-130L`;
259	/ If result rounded to zero is not subnormal, no double*
260	rounding will occur. /*
261	if (v.ieee.exponent > `130`)
262	return (a1 + u.d) * `0x1p-130L`;
263	/ If v.d * 0x1p-130L with round to zero is a subnormal above*
264	or equal to LDBL_MIN / 2, then v.d 0x1p-130L shifts mantissa*
265	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
266	change the round bit, not sticky or guard bit.
267	v.d 0x1p-130L never normalizes by shifting up,*
268	so round bit plus sticky bit should be already enough
269	for proper rounding. /*
270	if (v.ieee.exponent == `130`)
271	{
272	/ If the exponent would be in the normal range when*
273	rounding to normal precision with unbounded exponent
274	range, the exact result is known and spurious underflows
275	must be avoided on systems detecting tininess after
276	rounding. /*
277	if (TININESS_AFTER_ROUNDING)
278	{
279	w.d = a1 + u.d;
280	if (w.ieee.exponent == `131`)
281	return w.d * `0x1p-130L`;
282	}
283	/ v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,*
284	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
285	bit. /*
286	w.d = `0.0L`;
287	w.ieee.mantissa1 = ((v.ieee.mantissa1 & `3`) << `1`) \| j;
288	w.ieee.negative = v.ieee.negative;
289	v.ieee.mantissa1 &= ~`3U`;
290	v.d *= `0x1p-130L`;
291	w.d *= `0x1p-2L`;
292	return v.d + w.d;
293	}
294	v.ieee.mantissa1 \|= j;
295	return v.d * `0x1p-130L`;
296	}
297	}
298	libm_alias_ldouble (__fma, fma)
299

Browse the source code of glibc_src_2.28/sysdeps/ieee754/ldbl-96/s_fmal.c