s_fma.c source code [glibc_src_2.27/sysdeps/ieee754/dbl-64/s_fma.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <float.h>
21	#include <math.h>
22	#include <fenv.h>
23	#include <ieee754.h>
24	#include <math_private.h>
25	#include <libm-alias-double.h>
26	#include <tininess.h>
27
28	/ This implementation uses rounding to odd to avoid problems with*
29	double rounding. See a paper by Boldo and Melquiond:
30	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
31
32	double
33	__fma (double x, double y, double z)
34	{
35	union ieee754_double u, v, w;
36	int adjust = `0`;
37	u.d = x;
38	v.d = y;
39	w.d = z;
40	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
41	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, `0`)
42	\|\| __builtin_expect (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
43	\|\| __builtin_expect (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
44	\|\| __builtin_expect (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
45	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
46	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, `0`))
47	{
48	/ If z is Inf, but x and y are finite, the result should be*
49	z rather than NaN. /*
50	if (w.ieee.exponent == `0x7ff`
51	&& u.ieee.exponent != `0x7ff`
52	&& v.ieee.exponent != `0x7ff`)
53	return (z + x) + y;
54	/ If z is zero and x are y are nonzero, compute the result*
55	as x y to avoid the wrong sign of a zero result if x * y*
56	underflows to 0. /*
57	if (z == `0` && x != `0` && y != `0`)
58	return x * y;
59	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
60	x y + z. /
61	if (u.ieee.exponent == `0x7ff`
62	\|\| v.ieee.exponent == `0x7ff`
63	\|\| w.ieee.exponent == `0x7ff`
64	\|\| x == `0`
65	\|\| y == `0`)
66	return x * y + z;
67	/ If fma will certainly overflow, compute as x * y. /
68	if (u.ieee.exponent + v.ieee.exponent > `0x7ff` + IEEE754_DOUBLE_BIAS)
69	return x * y;
70	/ If x * y is less than 1/4 of DBL_TRUE_MIN, neither the*
71	result nor whether there is underflow depends on its exact
72	value, only on its sign. /*
73	if (u.ieee.exponent + v.ieee.exponent
74	< IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - `2`)
75	{
76	int neg = u.ieee.negative ^ v.ieee.negative;
77	double tiny = neg ? -`0x1p-1074` : `0x1p-1074`;
78	if (w.ieee.exponent >= `3`)
79	return tiny + z;
80	/ Scaling up, adding TINY and scaling down produces the*
81	correct result, because in round-to-nearest mode adding
82	TINY has no effect and in other modes double rounding is
83	harmless. But it may not produce required underflow
84	exceptions. /*
85	v.d = z * `0x1p54` + tiny;
86	if (TININESS_AFTER_ROUNDING
87	? v.ieee.exponent < `55`
88	: (w.ieee.exponent == `0`
89	\|\| (w.ieee.exponent == `1`
90	&& w.ieee.negative != neg
91	&& w.ieee.mantissa1 == `0`
92	&& w.ieee.mantissa0 == `0`)))
93	{
94	double force_underflow = x * y;
95	math_force_eval (force_underflow);
96	}
97	return v.d * `0x1p-54`;
98	}
99	if (u.ieee.exponent + v.ieee.exponent
100	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
101	{
102	/ Compute 1p-53 times smaller result and multiply*
103	at the end. /*
104	if (u.ieee.exponent > v.ieee.exponent)
105	u.ieee.exponent -= DBL_MANT_DIG;
106	else
107	v.ieee.exponent -= DBL_MANT_DIG;
108	/ If x + y exponent is very large and z exponent is very small,*
109	it doesn't matter if we don't adjust it. /*
110	if (w.ieee.exponent > DBL_MANT_DIG)
111	w.ieee.exponent -= DBL_MANT_DIG;
112	adjust = `1`;
113	}
114	else if (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
115	{
116	/ Similarly.*
117	If z exponent is very large and x and y exponents are
118	very small, adjust them up to avoid spurious underflows,
119	rather than down. /*
120	if (u.ieee.exponent + v.ieee.exponent
121	<= IEEE754_DOUBLE_BIAS + `2` * DBL_MANT_DIG)
122	{
123	if (u.ieee.exponent > v.ieee.exponent)
124	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
125	else
126	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
127	}
128	else if (u.ieee.exponent > v.ieee.exponent)
129	{
130	if (u.ieee.exponent > DBL_MANT_DIG)
131	u.ieee.exponent -= DBL_MANT_DIG;
132	}
133	else if (v.ieee.exponent > DBL_MANT_DIG)
134	v.ieee.exponent -= DBL_MANT_DIG;
135	w.ieee.exponent -= DBL_MANT_DIG;
136	adjust = `1`;
137	}
138	else if (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
139	{
140	u.ieee.exponent -= DBL_MANT_DIG;
141	if (v.ieee.exponent)
142	v.ieee.exponent += DBL_MANT_DIG;
143	else
144	v.d *= `0x1p53`;
145	}
146	else if (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
147	{
148	v.ieee.exponent -= DBL_MANT_DIG;
149	if (u.ieee.exponent)
150	u.ieee.exponent += DBL_MANT_DIG;
151	else
152	u.d *= `0x1p53`;
153	}
154	else / if (u.ieee.exponent + v.ieee.exponent*
155	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) /*
156	{
157	if (u.ieee.exponent > v.ieee.exponent)
158	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
159	else
160	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
161	if (w.ieee.exponent <= `4` * DBL_MANT_DIG + `6`)
162	{
163	if (w.ieee.exponent)
164	w.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
165	else
166	w.d *= `0x1p108`;
167	adjust = -`1`;
168	}
169	/ Otherwise x * y should just affect inexact*
170	and nothing else. /*
171	}
172	x = u.d;
173	y = v.d;
174	z = w.d;
175	}
176
177	/ Ensure correct sign of exact 0 + 0. /
178	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
179	{
180	x = math_opt_barrier (x);
181	return x * y + z;
182	}
183
184	fenv_t env;
185	libc_feholdexcept_setround (&env, FE_TONEAREST);
186
187	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
188	#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
189	double x1 = x * C;
190	double y1 = y * C;
191	double m1 = x * y;
192	x1 = (x - x1) + x1;
193	y1 = (y - y1) + y1;
194	double x2 = x - x1;
195	double y2 = y - y1;
196	double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
197
198	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
199	double a1 = z + m1;
200	double t1 = a1 - z;
201	double t2 = a1 - t1;
202	t1 = m1 - t1;
203	t2 = z - t2;
204	double a2 = t1 + t2;
205	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
206	math_force_eval (m2);
207	math_force_eval (a2);
208	feclearexcept (FE_INEXACT);
209
210	/ If the result is an exact zero, ensure it has the correct sign. /
211	if (a1 == `0` && m2 == `0`)
212	{
213	libc_feupdateenv (&env);
214	/ Ensure that round-to-nearest value of z + m1 is not reused. /
215	z = math_opt_barrier (z);
216	return z + m1;
217	}
218
219	libc_fesetround (FE_TOWARDZERO);
220
221	/ Perform m2 + a2 addition with round to odd. /
222	u.d = a2 + m2;
223
224	if (__glibc_unlikely (adjust < `0`))
225	{
226	if ((u.ieee.mantissa1 & `1`) == `0`)
227	u.ieee.mantissa1 \|= libc_fetestexcept (FE_INEXACT) != `0`;
228	v.d = a1 + u.d;
229	/ Ensure the addition is not scheduled after fetestexcept call. /
230	math_force_eval (v.d);
231	}
232
233	/ Reset rounding mode and test for inexact simultaneously. /
234	int j = libc_feupdateenv_test (&env, FE_INEXACT) != `0`;
235
236	if (__glibc_likely (adjust == `0`))
237	{
238	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
239	u.ieee.mantissa1 \|= j;
240	/ Result is a1 + u.d. /
241	return a1 + u.d;
242	}
243	else if (__glibc_likely (adjust > `0`))
244	{
245	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
246	u.ieee.mantissa1 \|= j;
247	/ Result is a1 + u.d, scaled up. /
248	return (a1 + u.d) * `0x1p53`;
249	}
250	else
251	{
252	/ If a1 + u.d is exact, the only rounding happens during*
253	scaling down. /*
254	if (j == `0`)
255	return v.d * `0x1p-108`;
256	/ If result rounded to zero is not subnormal, no double*
257	rounding will occur. /*
258	if (v.ieee.exponent > `108`)
259	return (a1 + u.d) * `0x1p-108`;
260	/ If v.d * 0x1p-108 with round to zero is a subnormal above*
261	or equal to DBL_MIN / 2, then v.d 0x1p-108 shifts mantissa*
262	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
263	change the round bit, not sticky or guard bit.
264	v.d 0x1p-108 never normalizes by shifting up,*
265	so round bit plus sticky bit should be already enough
266	for proper rounding. /*
267	if (v.ieee.exponent == `108`)
268	{
269	/ If the exponent would be in the normal range when*
270	rounding to normal precision with unbounded exponent
271	range, the exact result is known and spurious underflows
272	must be avoided on systems detecting tininess after
273	rounding. /*
274	if (TININESS_AFTER_ROUNDING)
275	{
276	w.d = a1 + u.d;
277	if (w.ieee.exponent == `109`)
278	return w.d * `0x1p-108`;
279	}
280	/ v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,*
281	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
282	bit. /*
283	w.d = `0.0`;
284	w.ieee.mantissa1 = ((v.ieee.mantissa1 & `3`) << `1`) \| j;
285	w.ieee.negative = v.ieee.negative;
286	v.ieee.mantissa1 &= ~`3U`;
287	v.d *= `0x1p-108`;
288	w.d *= `0x1p-2`;
289	return v.d + w.d;
290	}
291	v.ieee.mantissa1 \|= j;
292	return v.d * `0x1p-108`;
293	}
294	}
295	#ifndef __fma
296	libm_alias_double (__fma, fma)
297	#endif
298

Browse the source code of glibc_src_2.27/sysdeps/ieee754/dbl-64/s_fma.c