This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Fix fma overflow results outside round-to-nearest mode (bug 14797)

From: "Joseph S. Myers" <joseph at codesourcery dot com>
To: <libc-alpha at sourceware dot org>
Date: Sat, 3 Nov 2012 20:19:06 +0000
Subject: Fix fma overflow results outside round-to-nearest mode (bug 14797)
Bug 14797 is the use of x * y + z in various fma implementations, for
cases that, based on the exponents alone, must have the multiplication
overflowing enough that the final result overflows independent of the
(finite) value of z, being incorrect in directed rounding modes: if x
* y rounds to the largest finite value of the relevant sign, because
of the rounding mode, and z has the opposite sign, then the final
result of x * y + z is the second largest value of the relevant sign,
when the final result of fma (x, y, z) should be the largest.

This patch fixes this by making the overflow case use x * y rather
than x * y + z.  Tested x86_64 (multi-arch both enabled and disabled)
and x86.

2012-11-03  Joseph Myers  <joseph@codesourcery.com>

	[BZ #14797]
	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Compute cases that
	definitely overflow as x * y not x * y + z.
	* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
	* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
	* math/libm-test.inc (fma_test): Add more tests.
	(fma_test_towardzero): Likewise.
	(fma_test_downward): Likewise.
	(fma_test_upward): Likewise.

diff --git a/math/libm-test.inc b/math/libm-test.inc
index 48241e0..55892c3 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4617,6 +4617,15 @@ fma_test (void)
   TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
 
+  TEST_fff_f (fma, max_value, max_value, min_value, plus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, max_value, max_value, -min_value, plus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, max_value, -max_value, min_value, minus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, max_value, -max_value, -min_value, minus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -max_value, max_value, min_value, minus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -max_value, max_value, -min_value, minus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -max_value, -max_value, min_value, plus_infty, OVERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -max_value, -max_value, -min_value, plus_infty, OVERFLOW_EXCEPTION);
+
 #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
   TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13);
   TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20);
@@ -4837,6 +4846,15 @@ fma_test_towardzero (void)
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
 
+      TEST_fff_f (fma, max_value, max_value, min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, max_value, -min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, -min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, -min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, -min_value, max_value, OVERFLOW_EXCEPTION);
+
 #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
       TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
@@ -5014,6 +5032,15 @@ fma_test_downward (void)
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
 
+      TEST_fff_f (fma, max_value, max_value, min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, max_value, -min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, min_value, minus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, -min_value, minus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, min_value, minus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, -min_value, minus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, min_value, max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, -min_value, max_value, OVERFLOW_EXCEPTION);
+
 #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
       TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION);
@@ -5191,6 +5218,15 @@ fma_test_upward (void)
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
 
+      TEST_fff_f (fma, max_value, max_value, min_value, plus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, max_value, -min_value, plus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, max_value, -max_value, -min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, max_value, -min_value, -max_value, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, min_value, plus_infty, OVERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -max_value, -max_value, -min_value, plus_infty, OVERFLOW_EXCEPTION);
+
 #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
       TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 07fe715..cd28830 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -55,16 +55,17 @@ __fma (double x, double y, double z)
 	 underflows to 0.  */
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
-      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is zero, compute as x * y + z.  */
+      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
+	 x * y + z.  */
       if (u.ieee.exponent == 0x7ff
 	  || v.ieee.exponent == 0x7ff
 	  || w.ieee.exponent == 0x7ff
-	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7ff + IEEE754_DOUBLE_BIAS
 	  || x == 0
 	  || y == 0)
 	return x * y + z;
+      /* If fma will certainly overflow, compute as x * y.  */
+      if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS)
+	return x * y;
       /* If x * y is less than 1/4 of DBL_DENORM_MIN, neither the
 	 result nor whether there is underflow depends on its exact
 	 value, only on its sign.  */
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index d576403..6fa663a 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -56,16 +56,18 @@ __fmal (long double x, long double y, long double z)
 	 underflows to 0.  */
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
-      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is zero, compute as x * y + z.  */
+      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
+	 x * y + z.  */
       if (u.ieee.exponent == 0x7fff
 	  || v.ieee.exponent == 0x7fff
 	  || w.ieee.exponent == 0x7fff
-	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
 	  || x == 0
 	  || y == 0)
 	return x * y + z;
+      /* If fma will certainly overflow, compute as x * y.  */
+      if (u.ieee.exponent + v.ieee.exponent
+	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
+	return x * y;
       /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
 	 result nor whether there is underflow depends on its exact
 	 value, only on its sign.  */
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index 32e71a1..53098b6 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -56,16 +56,18 @@ __fmal (long double x, long double y, long double z)
 	 underflows to 0.  */
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
-      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is zero, compute as x * y + z.  */
+      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
+	 x * y + z.  */
       if (u.ieee.exponent == 0x7fff
 	  || v.ieee.exponent == 0x7fff
 	  || w.ieee.exponent == 0x7fff
-	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
 	  || x == 0
 	  || y == 0)
 	return x * y + z;
+      /* If fma will certainly overflow, compute as x * y.  */
+      if (u.ieee.exponent + v.ieee.exponent
+	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
+	return x * y;
       /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
 	 result nor whether there is underflow depends on its exact
 	 value, only on its sign.  */

-- 
Joseph S. Myers
joseph@codesourcery.com
Follow-Ups:
- Re: Fix fma overflow results outside round-to-nearest mode (bug 14797)
  - From: Andreas Jaeger
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]