| 1 | /* Compute x * y + z as ternary operation. | 
|---|
| 2 | Copyright (C) 2010-2016 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 | Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. | 
|---|
| 5 |  | 
|---|
| 6 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 7 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 8 | License as published by the Free Software Foundation; either | 
|---|
| 9 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 10 |  | 
|---|
| 11 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 14 | Lesser General Public License for more details. | 
|---|
| 15 |  | 
|---|
| 16 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 17 | License along with the GNU C Library; if not, see | 
|---|
| 18 | <http://www.gnu.org/licenses/>.  */ | 
|---|
| 19 |  | 
|---|
| 20 | #include <float.h> | 
|---|
| 21 | #include <math.h> | 
|---|
| 22 | #include <fenv.h> | 
|---|
| 23 | #include <ieee754.h> | 
|---|
| 24 | #include <math_private.h> | 
|---|
| 25 | #include <tininess.h> | 
|---|
| 26 |  | 
|---|
| 27 | /* This implementation uses rounding to odd to avoid problems with | 
|---|
| 28 | double rounding.  See a paper by Boldo and Melquiond: | 
|---|
| 29 | http://www.lri.fr/~melquion/doc/08-tc.pdf  */ | 
|---|
| 30 |  | 
|---|
| 31 | long double | 
|---|
| 32 | __fmal (long double x, long double y, long double z) | 
|---|
| 33 | { | 
|---|
| 34 | union ieee854_long_double u, v, w; | 
|---|
| 35 | int adjust = 0; | 
|---|
| 36 | u.d = x; | 
|---|
| 37 | v.d = y; | 
|---|
| 38 | w.d = z; | 
|---|
| 39 | if (__builtin_expect (u.ieee.exponent + v.ieee.exponent | 
|---|
| 40 | >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS | 
|---|
| 41 | - LDBL_MANT_DIG, 0) | 
|---|
| 42 | || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
|---|
| 43 | || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
|---|
| 44 | || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
|---|
| 45 | || __builtin_expect (u.ieee.exponent + v.ieee.exponent | 
|---|
| 46 | <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0)) | 
|---|
| 47 | { | 
|---|
| 48 | /* If z is Inf, but x and y are finite, the result should be | 
|---|
| 49 | z rather than NaN.  */ | 
|---|
| 50 | if (w.ieee.exponent == 0x7fff | 
|---|
| 51 | && u.ieee.exponent != 0x7fff | 
|---|
| 52 | && v.ieee.exponent != 0x7fff) | 
|---|
| 53 | return (z + x) + y; | 
|---|
| 54 | /* If z is zero and x are y are nonzero, compute the result | 
|---|
| 55 | as x * y to avoid the wrong sign of a zero result if x * y | 
|---|
| 56 | underflows to 0.  */ | 
|---|
| 57 | if (z == 0 && x != 0 && y != 0) | 
|---|
| 58 | return x * y; | 
|---|
| 59 | /* If x or y or z is Inf/NaN, or if x * y is zero, compute as | 
|---|
| 60 | x * y + z.  */ | 
|---|
| 61 | if (u.ieee.exponent == 0x7fff | 
|---|
| 62 | || v.ieee.exponent == 0x7fff | 
|---|
| 63 | || w.ieee.exponent == 0x7fff | 
|---|
| 64 | || x == 0 | 
|---|
| 65 | || y == 0) | 
|---|
| 66 | return x * y + z; | 
|---|
| 67 | /* If fma will certainly overflow, compute as x * y.  */ | 
|---|
| 68 | if (u.ieee.exponent + v.ieee.exponent | 
|---|
| 69 | > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) | 
|---|
| 70 | return x * y; | 
|---|
| 71 | /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the | 
|---|
| 72 | result nor whether there is underflow depends on its exact | 
|---|
| 73 | value, only on its sign.  */ | 
|---|
| 74 | if (u.ieee.exponent + v.ieee.exponent | 
|---|
| 75 | < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) | 
|---|
| 76 | { | 
|---|
| 77 | int neg = u.ieee.negative ^ v.ieee.negative; | 
|---|
| 78 | long double tiny = neg ? -0x1p-16445L : 0x1p-16445L; | 
|---|
| 79 | if (w.ieee.exponent >= 3) | 
|---|
| 80 | return tiny + z; | 
|---|
| 81 | /* Scaling up, adding TINY and scaling down produces the | 
|---|
| 82 | correct result, because in round-to-nearest mode adding | 
|---|
| 83 | TINY has no effect and in other modes double rounding is | 
|---|
| 84 | harmless.  But it may not produce required underflow | 
|---|
| 85 | exceptions.  */ | 
|---|
| 86 | v.d = z * 0x1p65L + tiny; | 
|---|
| 87 | if (TININESS_AFTER_ROUNDING | 
|---|
| 88 | ? v.ieee.exponent < 66 | 
|---|
| 89 | : (w.ieee.exponent == 0 | 
|---|
| 90 | || (w.ieee.exponent == 1 | 
|---|
| 91 | && w.ieee.negative != neg | 
|---|
| 92 | && w.ieee.mantissa1 == 0 | 
|---|
| 93 | && w.ieee.mantissa0 == 0x80000000))) | 
|---|
| 94 | { | 
|---|
| 95 | long double force_underflow = x * y; | 
|---|
| 96 | math_force_eval (force_underflow); | 
|---|
| 97 | } | 
|---|
| 98 | return v.d * 0x1p-65L; | 
|---|
| 99 | } | 
|---|
| 100 | if (u.ieee.exponent + v.ieee.exponent | 
|---|
| 101 | >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) | 
|---|
| 102 | { | 
|---|
| 103 | /* Compute 1p-64 times smaller result and multiply | 
|---|
| 104 | at the end.  */ | 
|---|
| 105 | if (u.ieee.exponent > v.ieee.exponent) | 
|---|
| 106 | u.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 107 | else | 
|---|
| 108 | v.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 109 | /* If x + y exponent is very large and z exponent is very small, | 
|---|
| 110 | it doesn't matter if we don't adjust it.  */ | 
|---|
| 111 | if (w.ieee.exponent > LDBL_MANT_DIG) | 
|---|
| 112 | w.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 113 | adjust = 1; | 
|---|
| 114 | } | 
|---|
| 115 | else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
|---|
| 116 | { | 
|---|
| 117 | /* Similarly. | 
|---|
| 118 | If z exponent is very large and x and y exponents are | 
|---|
| 119 | very small, adjust them up to avoid spurious underflows, | 
|---|
| 120 | rather than down.  */ | 
|---|
| 121 | if (u.ieee.exponent + v.ieee.exponent | 
|---|
| 122 | <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG) | 
|---|
| 123 | { | 
|---|
| 124 | if (u.ieee.exponent > v.ieee.exponent) | 
|---|
| 125 | u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
|---|
| 126 | else | 
|---|
| 127 | v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
|---|
| 128 | } | 
|---|
| 129 | else if (u.ieee.exponent > v.ieee.exponent) | 
|---|
| 130 | { | 
|---|
| 131 | if (u.ieee.exponent > LDBL_MANT_DIG) | 
|---|
| 132 | u.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 133 | } | 
|---|
| 134 | else if (v.ieee.exponent > LDBL_MANT_DIG) | 
|---|
| 135 | v.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 136 | w.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 137 | adjust = 1; | 
|---|
| 138 | } | 
|---|
| 139 | else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
|---|
| 140 | { | 
|---|
| 141 | u.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 142 | if (v.ieee.exponent) | 
|---|
| 143 | v.ieee.exponent += LDBL_MANT_DIG; | 
|---|
| 144 | else | 
|---|
| 145 | v.d *= 0x1p64L; | 
|---|
| 146 | } | 
|---|
| 147 | else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
|---|
| 148 | { | 
|---|
| 149 | v.ieee.exponent -= LDBL_MANT_DIG; | 
|---|
| 150 | if (u.ieee.exponent) | 
|---|
| 151 | u.ieee.exponent += LDBL_MANT_DIG; | 
|---|
| 152 | else | 
|---|
| 153 | u.d *= 0x1p64L; | 
|---|
| 154 | } | 
|---|
| 155 | else /* if (u.ieee.exponent + v.ieee.exponent | 
|---|
| 156 | <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ | 
|---|
| 157 | { | 
|---|
| 158 | if (u.ieee.exponent > v.ieee.exponent) | 
|---|
| 159 | u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
|---|
| 160 | else | 
|---|
| 161 | v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
|---|
| 162 | if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) | 
|---|
| 163 | { | 
|---|
| 164 | if (w.ieee.exponent) | 
|---|
| 165 | w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
|---|
| 166 | else | 
|---|
| 167 | w.d *= 0x1p130L; | 
|---|
| 168 | adjust = -1; | 
|---|
| 169 | } | 
|---|
| 170 | /* Otherwise x * y should just affect inexact | 
|---|
| 171 | and nothing else.  */ | 
|---|
| 172 | } | 
|---|
| 173 | x = u.d; | 
|---|
| 174 | y = v.d; | 
|---|
| 175 | z = w.d; | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | /* Ensure correct sign of exact 0 + 0.  */ | 
|---|
| 179 | if (__glibc_unlikely ((x == 0 || y == 0) && z == 0)) | 
|---|
| 180 | { | 
|---|
| 181 | x = math_opt_barrier (x); | 
|---|
| 182 | return x * y + z; | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | fenv_t env; | 
|---|
| 186 | feholdexcept (&env); | 
|---|
| 187 | fesetround (FE_TONEAREST); | 
|---|
| 188 |  | 
|---|
| 189 | /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */ | 
|---|
| 190 | #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) | 
|---|
| 191 | long double x1 = x * C; | 
|---|
| 192 | long double y1 = y * C; | 
|---|
| 193 | long double m1 = x * y; | 
|---|
| 194 | x1 = (x - x1) + x1; | 
|---|
| 195 | y1 = (y - y1) + y1; | 
|---|
| 196 | long double x2 = x - x1; | 
|---|
| 197 | long double y2 = y - y1; | 
|---|
| 198 | long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2; | 
|---|
| 199 |  | 
|---|
| 200 | /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */ | 
|---|
| 201 | long double a1 = z + m1; | 
|---|
| 202 | long double t1 = a1 - z; | 
|---|
| 203 | long double t2 = a1 - t1; | 
|---|
| 204 | t1 = m1 - t1; | 
|---|
| 205 | t2 = z - t2; | 
|---|
| 206 | long double a2 = t1 + t2; | 
|---|
| 207 | /* Ensure the arithmetic is not scheduled after feclearexcept call.  */ | 
|---|
| 208 | math_force_eval (m2); | 
|---|
| 209 | math_force_eval (a2); | 
|---|
| 210 | feclearexcept (FE_INEXACT); | 
|---|
| 211 |  | 
|---|
| 212 | /* If the result is an exact zero, ensure it has the correct sign.  */ | 
|---|
| 213 | if (a1 == 0 && m2 == 0) | 
|---|
| 214 | { | 
|---|
| 215 | feupdateenv (&env); | 
|---|
| 216 | /* Ensure that round-to-nearest value of z + m1 is not reused.  */ | 
|---|
| 217 | z = math_opt_barrier (z); | 
|---|
| 218 | return z + m1; | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | fesetround (FE_TOWARDZERO); | 
|---|
| 222 | /* Perform m2 + a2 addition with round to odd.  */ | 
|---|
| 223 | u.d = a2 + m2; | 
|---|
| 224 |  | 
|---|
| 225 | if (__glibc_likely (adjust == 0)) | 
|---|
| 226 | { | 
|---|
| 227 | if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff) | 
|---|
| 228 | u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
|---|
| 229 | feupdateenv (&env); | 
|---|
| 230 | /* Result is a1 + u.d.  */ | 
|---|
| 231 | return a1 + u.d; | 
|---|
| 232 | } | 
|---|
| 233 | else if (__glibc_likely (adjust > 0)) | 
|---|
| 234 | { | 
|---|
| 235 | if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff) | 
|---|
| 236 | u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
|---|
| 237 | feupdateenv (&env); | 
|---|
| 238 | /* Result is a1 + u.d, scaled up.  */ | 
|---|
| 239 | return (a1 + u.d) * 0x1p64L; | 
|---|
| 240 | } | 
|---|
| 241 | else | 
|---|
| 242 | { | 
|---|
| 243 | if ((u.ieee.mantissa1 & 1) == 0) | 
|---|
| 244 | u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
|---|
| 245 | v.d = a1 + u.d; | 
|---|
| 246 | /* Ensure the addition is not scheduled after fetestexcept call.  */ | 
|---|
| 247 | math_force_eval (v.d); | 
|---|
| 248 | int j = fetestexcept (FE_INEXACT) != 0; | 
|---|
| 249 | feupdateenv (&env); | 
|---|
| 250 | /* Ensure the following computations are performed in default rounding | 
|---|
| 251 | mode instead of just reusing the round to zero computation.  */ | 
|---|
| 252 | asm volatile ( "": "=m"(u) : "m"(u)); | 
|---|
| 253 | /* If a1 + u.d is exact, the only rounding happens during | 
|---|
| 254 | scaling down.  */ | 
|---|
| 255 | if (j == 0) | 
|---|
| 256 | return v.d * 0x1p-130L; | 
|---|
| 257 | /* If result rounded to zero is not subnormal, no double | 
|---|
| 258 | rounding will occur.  */ | 
|---|
| 259 | if (v.ieee.exponent > 130) | 
|---|
| 260 | return (a1 + u.d) * 0x1p-130L; | 
|---|
| 261 | /* If v.d * 0x1p-130L with round to zero is a subnormal above | 
|---|
| 262 | or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa | 
|---|
| 263 | down just by 1 bit, which means v.ieee.mantissa1 |= j would | 
|---|
| 264 | change the round bit, not sticky or guard bit. | 
|---|
| 265 | v.d * 0x1p-130L never normalizes by shifting up, | 
|---|
| 266 | so round bit plus sticky bit should be already enough | 
|---|
| 267 | for proper rounding.  */ | 
|---|
| 268 | if (v.ieee.exponent == 130) | 
|---|
| 269 | { | 
|---|
| 270 | /* If the exponent would be in the normal range when | 
|---|
| 271 | rounding to normal precision with unbounded exponent | 
|---|
| 272 | range, the exact result is known and spurious underflows | 
|---|
| 273 | must be avoided on systems detecting tininess after | 
|---|
| 274 | rounding.  */ | 
|---|
| 275 | if (TININESS_AFTER_ROUNDING) | 
|---|
| 276 | { | 
|---|
| 277 | w.d = a1 + u.d; | 
|---|
| 278 | if (w.ieee.exponent == 131) | 
|---|
| 279 | return w.d * 0x1p-130L; | 
|---|
| 280 | } | 
|---|
| 281 | /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, | 
|---|
| 282 | v.ieee.mantissa1 & 1 is the round bit and j is our sticky | 
|---|
| 283 | bit.  */ | 
|---|
| 284 | w.d = 0.0L; | 
|---|
| 285 | w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; | 
|---|
| 286 | w.ieee.negative = v.ieee.negative; | 
|---|
| 287 | v.ieee.mantissa1 &= ~3U; | 
|---|
| 288 | v.d *= 0x1p-130L; | 
|---|
| 289 | w.d *= 0x1p-2L; | 
|---|
| 290 | return v.d + w.d; | 
|---|
| 291 | } | 
|---|
| 292 | v.ieee.mantissa1 |= j; | 
|---|
| 293 | return v.d * 0x1p-130L; | 
|---|
| 294 | } | 
|---|
| 295 | } | 
|---|
| 296 | weak_alias (__fmal, fmal) | 
|---|
| 297 |  | 
|---|