x2y2m1l.c source code [glibc_src_2.23/sysdeps/ieee754/ldbl-96/x2y2m1l.c]

1	/ Compute x^2 + y^2 - 1, without large cancellation error.*
2	Copyright (C) 2012-2016 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <math.h>
20	#include <math_private.h>
21	#include <float.h>
22	#include <stdlib.h>
23
24	/ Calculate X + Y exactly and store the result in HI + LO. It is*
25	given that \|X\| >= \|Y\| and the values are small enough that no
26	overflow occurs. /*
27
28	static inline void
29	add_split (long double hi, long* double lo, long* double x, long double y)
30	{
31	/ Apply Dekker's algorithm. /
32	*hi = x + y;
33	lo = (x - hi) + y;
34	}
35
36	/ Calculate X * Y exactly and store the result in HI + LO. It is*
37	given that the values are small enough that no overflow occurs and
38	large enough (or zero) that no underflow occurs. /*
39
40	static inline void
41	mul_split (long double hi, long* double lo, long* double x, long double y)
42	{
43	#ifdef __FP_FAST_FMAL
44	/ Fast built-in fused multiply-add. /
45	hi = x y;
46	lo = __builtin_fmal (x, y, -hi);
47	#elif defined FP_FAST_FMAL
48	/ Fast library fused multiply-add, compiler before GCC 4.6. /
49	hi = x y;
50	lo = __fmal (x, y, -hi);
51	#else
52	/ Apply Dekker's algorithm. /
53	hi = x y;
54	# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
55	long double x1 = x * C;
56	long double y1 = y * C;
57	# undef C
58	x1 = (x - x1) + x1;
59	y1 = (y - y1) + y1;
60	long double x2 = x - x1;
61	long double y2 = y - y1;
62	lo = (((x1 y1 - hi) + x1 y2) + x2 * y1) + x2 * y2;
63	#endif
64	}
65
66	/ Compare absolute values of floating-point values pointed to by P*
67	and Q for qsort. /*
68
69	static int
70	compare (const void p, const* void *q)
71	{
72	long double pld = fabsl ((const* long double *) p);
73	long double qld = fabsl ((const* long double *) q);
74	if (pld < qld)
75	return -`1`;
76	else if (pld == qld)
77	return `0`;
78	else
79	return `1`;
80	}
81
82	/ Return X^2 + Y^2 - 1, computed without large cancellation error.*
83	It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >=
84	0.5. /*
85
86	long double
87	__x2y2m1l (long double x, long double y)
88	{
89	long double vals[`5`];
90	SET_RESTORE_ROUNDL (FE_TONEAREST);
91	mul_split (&vals[`1`], &vals[`0`], x, x);
92	mul_split (&vals[`3`], &vals[`2`], y, y);
93	vals[`4`] = -`1.0L`;
94	qsort (vals, `5`, sizeof (long double), compare);
95	/ Add up the values so that each element of VALS has absolute value*
96	at most equal to the last set bit of the next nonzero
97	element. /*
98	for (size_t i = `0`; i <= `3`; i++)
99	{
100	add_split (&vals[i + `1`], &vals[i], vals[i + `1`], vals[i]);
101	qsort (vals + i + `1`, `4` - i, sizeof (long double), compare);
102	}
103	/ Now any error from this addition will be small. /
104	return vals[`4`] + vals[`3`] + vals[`2`] + vals[`1`] + vals[`0`];
105	}
106

Browse the source code of glibc_src_2.23/sysdeps/ieee754/ldbl-96/x2y2m1l.c