| 1 | /* Compute x * y + z as ternary operation. | 
| 2 |    Copyright (C) 2010-2018 Free Software Foundation, Inc. | 
| 3 |    This file is part of the GNU C Library. | 
| 4 |    Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. | 
| 5 |  | 
| 6 |    The GNU C Library is free software; you can redistribute it and/or | 
| 7 |    modify it under the terms of the GNU Lesser General Public | 
| 8 |    License as published by the Free Software Foundation; either | 
| 9 |    version 2.1 of the License, or (at your option) any later version. | 
| 10 |  | 
| 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
| 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 14 |    Lesser General Public License for more details. | 
| 15 |  | 
| 16 |    You should have received a copy of the GNU Lesser General Public | 
| 17 |    License along with the GNU C Library; if not, see | 
| 18 |    <http://www.gnu.org/licenses/>.  */ | 
| 19 |  | 
| 20 | #include <float.h> | 
| 21 | #include <math.h> | 
| 22 | #include <fenv.h> | 
| 23 | #include <ieee754.h> | 
| 24 | #include <math_private.h> | 
| 25 | #include <libm-alias-ldouble.h> | 
| 26 | #include <tininess.h> | 
| 27 |  | 
| 28 | /* This implementation uses rounding to odd to avoid problems with | 
| 29 |    double rounding.  See a paper by Boldo and Melquiond: | 
| 30 |    http://www.lri.fr/~melquion/doc/08-tc.pdf  */ | 
| 31 |  | 
| 32 | long double | 
| 33 | __fmal (long double x, long double y, long double z) | 
| 34 | { | 
| 35 |   union ieee854_long_double u, v, w; | 
| 36 |   int adjust = 0; | 
| 37 |   u.d = x; | 
| 38 |   v.d = y; | 
| 39 |   w.d = z; | 
| 40 |   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent | 
| 41 | 			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS | 
| 42 | 			   - LDBL_MANT_DIG, 0) | 
| 43 |       || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
| 44 |       || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
| 45 |       || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | 
| 46 |       || __builtin_expect (u.ieee.exponent + v.ieee.exponent | 
| 47 | 			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0)) | 
| 48 |     { | 
| 49 |       /* If z is Inf, but x and y are finite, the result should be | 
| 50 | 	 z rather than NaN.  */ | 
| 51 |       if (w.ieee.exponent == 0x7fff | 
| 52 | 	  && u.ieee.exponent != 0x7fff | 
| 53 |           && v.ieee.exponent != 0x7fff) | 
| 54 | 	return (z + x) + y; | 
| 55 |       /* If z is zero and x are y are nonzero, compute the result | 
| 56 | 	 as x * y to avoid the wrong sign of a zero result if x * y | 
| 57 | 	 underflows to 0.  */ | 
| 58 |       if (z == 0 && x != 0 && y != 0) | 
| 59 | 	return x * y; | 
| 60 |       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as | 
| 61 | 	 x * y + z.  */ | 
| 62 |       if (u.ieee.exponent == 0x7fff | 
| 63 | 	  || v.ieee.exponent == 0x7fff | 
| 64 | 	  || w.ieee.exponent == 0x7fff | 
| 65 | 	  || x == 0 | 
| 66 | 	  || y == 0) | 
| 67 | 	return x * y + z; | 
| 68 |       /* If fma will certainly overflow, compute as x * y.  */ | 
| 69 |       if (u.ieee.exponent + v.ieee.exponent | 
| 70 | 	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) | 
| 71 | 	return x * y; | 
| 72 |       /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the | 
| 73 | 	 result nor whether there is underflow depends on its exact | 
| 74 | 	 value, only on its sign.  */ | 
| 75 |       if (u.ieee.exponent + v.ieee.exponent | 
| 76 | 	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) | 
| 77 | 	{ | 
| 78 | 	  int neg = u.ieee.negative ^ v.ieee.negative; | 
| 79 | 	  long double tiny = neg ? -0x1p-16445L : 0x1p-16445L; | 
| 80 | 	  if (w.ieee.exponent >= 3) | 
| 81 | 	    return tiny + z; | 
| 82 | 	  /* Scaling up, adding TINY and scaling down produces the | 
| 83 | 	     correct result, because in round-to-nearest mode adding | 
| 84 | 	     TINY has no effect and in other modes double rounding is | 
| 85 | 	     harmless.  But it may not produce required underflow | 
| 86 | 	     exceptions.  */ | 
| 87 | 	  v.d = z * 0x1p65L + tiny; | 
| 88 | 	  if (TININESS_AFTER_ROUNDING | 
| 89 | 	      ? v.ieee.exponent < 66 | 
| 90 | 	      : (w.ieee.exponent == 0 | 
| 91 | 		 || (w.ieee.exponent == 1 | 
| 92 | 		     && w.ieee.negative != neg | 
| 93 | 		     && w.ieee.mantissa1 == 0 | 
| 94 | 		     && w.ieee.mantissa0 == 0x80000000))) | 
| 95 | 	    { | 
| 96 | 	      long double force_underflow = x * y; | 
| 97 | 	      math_force_eval (force_underflow); | 
| 98 | 	    } | 
| 99 | 	  return v.d * 0x1p-65L; | 
| 100 | 	} | 
| 101 |       if (u.ieee.exponent + v.ieee.exponent | 
| 102 | 	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) | 
| 103 | 	{ | 
| 104 | 	  /* Compute 1p-64 times smaller result and multiply | 
| 105 | 	     at the end.  */ | 
| 106 | 	  if (u.ieee.exponent > v.ieee.exponent) | 
| 107 | 	    u.ieee.exponent -= LDBL_MANT_DIG; | 
| 108 | 	  else | 
| 109 | 	    v.ieee.exponent -= LDBL_MANT_DIG; | 
| 110 | 	  /* If x + y exponent is very large and z exponent is very small, | 
| 111 | 	     it doesn't matter if we don't adjust it.  */ | 
| 112 | 	  if (w.ieee.exponent > LDBL_MANT_DIG) | 
| 113 | 	    w.ieee.exponent -= LDBL_MANT_DIG; | 
| 114 | 	  adjust = 1; | 
| 115 | 	} | 
| 116 |       else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
| 117 | 	{ | 
| 118 | 	  /* Similarly. | 
| 119 | 	     If z exponent is very large and x and y exponents are | 
| 120 | 	     very small, adjust them up to avoid spurious underflows, | 
| 121 | 	     rather than down.  */ | 
| 122 | 	  if (u.ieee.exponent + v.ieee.exponent | 
| 123 | 	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG) | 
| 124 | 	    { | 
| 125 | 	      if (u.ieee.exponent > v.ieee.exponent) | 
| 126 | 		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
| 127 | 	      else | 
| 128 | 		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
| 129 | 	    } | 
| 130 | 	  else if (u.ieee.exponent > v.ieee.exponent) | 
| 131 | 	    { | 
| 132 | 	      if (u.ieee.exponent > LDBL_MANT_DIG) | 
| 133 | 		u.ieee.exponent -= LDBL_MANT_DIG; | 
| 134 | 	    } | 
| 135 | 	  else if (v.ieee.exponent > LDBL_MANT_DIG) | 
| 136 | 	    v.ieee.exponent -= LDBL_MANT_DIG; | 
| 137 | 	  w.ieee.exponent -= LDBL_MANT_DIG; | 
| 138 | 	  adjust = 1; | 
| 139 | 	} | 
| 140 |       else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
| 141 | 	{ | 
| 142 | 	  u.ieee.exponent -= LDBL_MANT_DIG; | 
| 143 | 	  if (v.ieee.exponent) | 
| 144 | 	    v.ieee.exponent += LDBL_MANT_DIG; | 
| 145 | 	  else | 
| 146 | 	    v.d *= 0x1p64L; | 
| 147 | 	} | 
| 148 |       else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | 
| 149 | 	{ | 
| 150 | 	  v.ieee.exponent -= LDBL_MANT_DIG; | 
| 151 | 	  if (u.ieee.exponent) | 
| 152 | 	    u.ieee.exponent += LDBL_MANT_DIG; | 
| 153 | 	  else | 
| 154 | 	    u.d *= 0x1p64L; | 
| 155 | 	} | 
| 156 |       else /* if (u.ieee.exponent + v.ieee.exponent | 
| 157 | 		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ | 
| 158 | 	{ | 
| 159 | 	  if (u.ieee.exponent > v.ieee.exponent) | 
| 160 | 	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
| 161 | 	  else | 
| 162 | 	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
| 163 | 	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) | 
| 164 | 	    { | 
| 165 | 	      if (w.ieee.exponent) | 
| 166 | 		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | 
| 167 | 	      else | 
| 168 | 		w.d *= 0x1p130L; | 
| 169 | 	      adjust = -1; | 
| 170 | 	    } | 
| 171 | 	  /* Otherwise x * y should just affect inexact | 
| 172 | 	     and nothing else.  */ | 
| 173 | 	} | 
| 174 |       x = u.d; | 
| 175 |       y = v.d; | 
| 176 |       z = w.d; | 
| 177 |     } | 
| 178 |  | 
| 179 |   /* Ensure correct sign of exact 0 + 0.  */ | 
| 180 |   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0)) | 
| 181 |     { | 
| 182 |       x = math_opt_barrier (x); | 
| 183 |       return x * y + z; | 
| 184 |     } | 
| 185 |  | 
| 186 |   fenv_t env; | 
| 187 |   feholdexcept (&env); | 
| 188 |   fesetround (FE_TONEAREST); | 
| 189 |  | 
| 190 |   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */ | 
| 191 | #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) | 
| 192 |   long double x1 = x * C; | 
| 193 |   long double y1 = y * C; | 
| 194 |   long double m1 = x * y; | 
| 195 |   x1 = (x - x1) + x1; | 
| 196 |   y1 = (y - y1) + y1; | 
| 197 |   long double x2 = x - x1; | 
| 198 |   long double y2 = y - y1; | 
| 199 |   long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2; | 
| 200 |  | 
| 201 |   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */ | 
| 202 |   long double a1 = z + m1; | 
| 203 |   long double t1 = a1 - z; | 
| 204 |   long double t2 = a1 - t1; | 
| 205 |   t1 = m1 - t1; | 
| 206 |   t2 = z - t2; | 
| 207 |   long double a2 = t1 + t2; | 
| 208 |   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */ | 
| 209 |   math_force_eval (m2); | 
| 210 |   math_force_eval (a2); | 
| 211 |   feclearexcept (FE_INEXACT); | 
| 212 |  | 
| 213 |   /* If the result is an exact zero, ensure it has the correct sign.  */ | 
| 214 |   if (a1 == 0 && m2 == 0) | 
| 215 |     { | 
| 216 |       feupdateenv (&env); | 
| 217 |       /* Ensure that round-to-nearest value of z + m1 is not reused.  */ | 
| 218 |       z = math_opt_barrier (z); | 
| 219 |       return z + m1; | 
| 220 |     } | 
| 221 |  | 
| 222 |   fesetround (FE_TOWARDZERO); | 
| 223 |   /* Perform m2 + a2 addition with round to odd.  */ | 
| 224 |   u.d = a2 + m2; | 
| 225 |  | 
| 226 |   if (__glibc_likely (adjust == 0)) | 
| 227 |     { | 
| 228 |       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff) | 
| 229 | 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
| 230 |       feupdateenv (&env); | 
| 231 |       /* Result is a1 + u.d.  */ | 
| 232 |       return a1 + u.d; | 
| 233 |     } | 
| 234 |   else if (__glibc_likely (adjust > 0)) | 
| 235 |     { | 
| 236 |       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff) | 
| 237 | 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
| 238 |       feupdateenv (&env); | 
| 239 |       /* Result is a1 + u.d, scaled up.  */ | 
| 240 |       return (a1 + u.d) * 0x1p64L; | 
| 241 |     } | 
| 242 |   else | 
| 243 |     { | 
| 244 |       if ((u.ieee.mantissa1 & 1) == 0) | 
| 245 | 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; | 
| 246 |       v.d = a1 + u.d; | 
| 247 |       /* Ensure the addition is not scheduled after fetestexcept call.  */ | 
| 248 |       math_force_eval (v.d); | 
| 249 |       int j = fetestexcept (FE_INEXACT) != 0; | 
| 250 |       feupdateenv (&env); | 
| 251 |       /* Ensure the following computations are performed in default rounding | 
| 252 | 	 mode instead of just reusing the round to zero computation.  */ | 
| 253 |       asm volatile (""  : "=m"  (u) : "m"  (u)); | 
| 254 |       /* If a1 + u.d is exact, the only rounding happens during | 
| 255 | 	 scaling down.  */ | 
| 256 |       if (j == 0) | 
| 257 | 	return v.d * 0x1p-130L; | 
| 258 |       /* If result rounded to zero is not subnormal, no double | 
| 259 | 	 rounding will occur.  */ | 
| 260 |       if (v.ieee.exponent > 130) | 
| 261 | 	return (a1 + u.d) * 0x1p-130L; | 
| 262 |       /* If v.d * 0x1p-130L with round to zero is a subnormal above | 
| 263 | 	 or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa | 
| 264 | 	 down just by 1 bit, which means v.ieee.mantissa1 |= j would | 
| 265 | 	 change the round bit, not sticky or guard bit. | 
| 266 | 	 v.d * 0x1p-130L never normalizes by shifting up, | 
| 267 | 	 so round bit plus sticky bit should be already enough | 
| 268 | 	 for proper rounding.  */ | 
| 269 |       if (v.ieee.exponent == 130) | 
| 270 | 	{ | 
| 271 | 	  /* If the exponent would be in the normal range when | 
| 272 | 	     rounding to normal precision with unbounded exponent | 
| 273 | 	     range, the exact result is known and spurious underflows | 
| 274 | 	     must be avoided on systems detecting tininess after | 
| 275 | 	     rounding.  */ | 
| 276 | 	  if (TININESS_AFTER_ROUNDING) | 
| 277 | 	    { | 
| 278 | 	      w.d = a1 + u.d; | 
| 279 | 	      if (w.ieee.exponent == 131) | 
| 280 | 		return w.d * 0x1p-130L; | 
| 281 | 	    } | 
| 282 | 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, | 
| 283 | 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky | 
| 284 | 	     bit.  */ | 
| 285 | 	  w.d = 0.0L; | 
| 286 | 	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; | 
| 287 | 	  w.ieee.negative = v.ieee.negative; | 
| 288 | 	  v.ieee.mantissa1 &= ~3U; | 
| 289 | 	  v.d *= 0x1p-130L; | 
| 290 | 	  w.d *= 0x1p-2L; | 
| 291 | 	  return v.d + w.d; | 
| 292 | 	} | 
| 293 |       v.ieee.mantissa1 |= j; | 
| 294 |       return v.d * 0x1p-130L; | 
| 295 |     } | 
| 296 | } | 
| 297 | libm_alias_ldouble (__fma, fma) | 
| 298 |  |