This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.15-432-gbd37f2e


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  bd37f2ee314147c69a39817d590149cf1181ef47 (commit)
       via  d0adc9223031b606c3c7781b4ec41462796ab313 (commit)
       via  0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876 (commit)
       via  eb92c487b35e26aa1e08815c4480d0bc5cc9f370 (commit)
       via  7d2e8012cfb4fa30086ef330ef1b993cfcc3dd99 (commit)
       via  b4dabbb47a174e1075b5b93f29093319eab69f2c (commit)
       via  4851a949b4cd1f280b56a728c784aaa85e51124c (commit)
      from  e79d442ee64ef2426ddd29a1fe1174108e845b69 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bd37f2ee314147c69a39817d590149cf1181ef47

commit bd37f2ee314147c69a39817d590149cf1181ef47
Author: Richard Henderson <rth@twiddle.net>
Date:   Sun Mar 18 15:58:00 2012 -0700

    Optimize private 387 fenv access; share code between i386 and x86_64.

diff --git a/ChangeLog b/ChangeLog
index 7efa13b..88ee593 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/i386/fpu/fenv_private.h: New file.
+	* sysdeps/i386/fpu/math_private.h: Use it.
+	(math_opt_barrier, math_force_eval): Remove.
+	(libc_feholdexcept_setround_53bit): Remove.
+	(libc_feupdateenv_53bit): Remove.
+	* sysdeps/x86_64/fpu/math_private.h: Likewise.
+	(math_opt_barrier, math_force_eval): Remove.
+	(libc_feholdexcept): Remove.
+	(libc_feholdexcept_setround): Remove.
+	(libc_fetestexcept, libc_fesetenv): Remove.
+	(libc_feupdateenv_test): Remove.
+	(libc_feupdateenv, libc_feholdsetround): Remove.
+	(libc_feresetround): Remove.
+
 	* sysdeps/i386/fpu/feholdexcpt.c (feholdexcept): Avoid the fldenv.
 	* sysdeps/x86_64/fpu/feholdexcpt.c (feholdexcept): Likewise.
 
diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
new file mode 100644
index 0000000..f33f57c
--- /dev/null
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -0,0 +1,304 @@
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+#ifdef __SSE2_MATH__
+# define math_opt_barrier(x) \
+  ({ __typeof(x) __x;					\
+     if (sizeof (x) <= sizeof (double))			\
+       __asm ("" : "=x" (__x) : "0" (x));		\
+     else						\
+       __asm ("" : "=t" (__x) : "0" (x));		\
+     __x; })
+# define math_force_eval(x) \
+  do {							\
+    if (sizeof (x) <= sizeof (double))			\
+      __asm __volatile ("" : : "x" (x));		\
+    else						\
+      __asm __volatile ("" : : "f" (x));		\
+  } while (0)
+#else
+# define math_opt_barrier(x) \
+  ({ __typeof (x) __x;					\
+     __asm ("" : "=t" (__x) : "0" (x));			\
+     __x; })
+# define math_force_eval(x) \
+  do {							\
+    __typeof (x) __x = (x);				\
+    if (sizeof (x) <= sizeof (double))			\
+      __asm __volatile ("" : : "m" (__x));		\
+    else						\
+      __asm __volatile ("" : : "f" (__x));		\
+  } while (0)
+#endif
+
+/* This file is used by both the 32- and 64-bit ports.  The 64-bit port
+   has a field in the fenv_t for the mxcsr; the 32-bit port does not.
+   Instead, we (ab)use the only 32-bit field extant in the struct.  */
+#ifndef __x86_64__
+# define __mxcsr	__eip
+#endif
+
+
+/* All of these functions are private to libm, and are all used in pairs
+   to save+change the fp state and restore the original state.  Thus we
+   need not care for both the 387 and the sse unit, only the one we're
+   actually using.  */
+
+#if defined __AVX__ || defined SSE2AVX
+# define STMXCSR "vstmxcsr"
+# define LDMXCSR "vldmxcsr"
+#else
+# define STMXCSR "stmxcsr"
+# define LDMXCSR "ldmxcsr"
+#endif
+
+static __always_inline void
+libc_feholdexcept_sse (fenv_t *e)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = (mxcsr | 0x1f80) & ~0x3f;
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feholdexcept_387 (fenv_t *e)
+{
+  /* Recall that fnstenv has a side-effect of masking exceptions.
+     Clobber all of the fp registers so that the TOS field is 0.  */
+  asm volatile ("fnstenv %0; fnclex"
+		: "=m"(*e)
+		: : "st", "st(1)", "st(2)", "st(3)",
+		    "st(4)", "st(5)", "st(6)", "st(7)");
+}
+
+static __always_inline void
+libc_feholdexcept_setround_sse (fenv_t *e, int r)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+/* Set both rounding mode and precision.  A convenience function for use
+   by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */
+static __always_inline void
+libc_feholdexcept_setround_387_prec (fenv_t *e, int r)
+{
+  libc_feholdexcept_387 (e);
+
+  fpu_control_t cw = e->__control_word;
+  cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+  cw |= r | 0x3f;
+  _FPU_SETCW (cw);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387 (fenv_t *e, int r)
+{
+  libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387_53bit (fenv_t *e, int r)
+{
+  libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE);
+}
+
+static __always_inline int
+libc_fetestexcept_sse (int e)
+{
+  unsigned int mxcsr;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  return mxcsr & e & FE_ALL_EXCEPT;
+}
+
+static __always_inline int
+libc_fetestexcept_387 (int ex)
+{
+  fexcept_t temp;
+  asm volatile ("fnstsw %0" : "=a" (temp));
+  return temp & ex & FE_ALL_EXCEPT;
+}
+
+static __always_inline void
+libc_fesetenv_sse (fenv_t *e)
+{
+  asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
+}
+
+static __always_inline void
+libc_fesetenv_387 (fenv_t *e)
+{
+  /* Clobber all fp registers so that the TOS value we saved earlier is
+     compatible with the current state of the compiler.  */
+  asm volatile ("fldenv %0"
+		: : "m" (*e)
+		: "st", "st(1)", "st(2)", "st(3)",
+		  "st(4)", "st(5)", "st(6)", "st(7)");
+}
+
+static __always_inline int
+libc_feupdateenv_test_sse (fenv_t *e, int ex)
+{
+  unsigned int mxcsr, old_mxcsr, cur_ex;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  cur_ex = mxcsr & FE_ALL_EXCEPT;
+
+  /* Merge current exceptions with the old environment.  */
+  old_mxcsr = e->__mxcsr;
+  mxcsr = old_mxcsr | cur_ex;
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+
+  /* Raise SIGFPE for any new exceptions since the hold.  Expect that
+     the normal environment has all exceptions masked.  */
+  if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
+    __feraiseexcept (cur_ex);
+
+  /* Test for exceptions raised since the hold.  */
+  return cur_ex & ex;
+}
+
+static __always_inline int
+libc_feupdateenv_test_387 (fenv_t *e, int ex)
+{
+  fexcept_t cur_ex;
+
+  /* Save current exceptions.  */
+  asm volatile ("fnstsw %0" : "=a" (cur_ex));
+  cur_ex &= FE_ALL_EXCEPT;
+
+  /* Reload original environment.  */
+  libc_fesetenv_387 (e);
+
+  /* Merge current exceptions.  */
+  __feraiseexcept (cur_ex);
+
+  /* Test for exceptions raised since the hold.  */
+  return cur_ex & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_sse (fenv_t *e)
+{
+  libc_feupdateenv_test_sse (e, 0);
+}
+
+static __always_inline void
+libc_feupdateenv_387 (fenv_t *e)
+{
+  libc_feupdateenv_test_387 (e, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_sse (fenv_t *e, int r)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = (mxcsr & ~0x6000) | (r << 3);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feholdsetround_387_prec (fenv_t *e, int r)
+{
+  fpu_control_t cw;
+
+  _FPU_GETCW (cw);
+  e->__control_word = cw;
+  cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+  cw |= r;
+  _FPU_SETCW (cw);
+}
+
+static __always_inline void
+libc_feholdsetround_387 (fenv_t *e, int r)
+{
+  libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdsetround_387_53bit (fenv_t *e, int r)
+{
+  libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE);
+}
+
+static __always_inline void
+libc_feresetround_sse (fenv_t *e)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feresetround_387 (fenv_t *e)
+{
+  _FPU_SETCW (e->__control_word);
+}
+
+#ifdef __SSE_MATH__
+# define libc_feholdexceptf		libc_feholdexcept_sse
+# define libc_feholdexcept_setroundf	libc_feholdexcept_setround_sse
+# define libc_fetestexceptf		libc_fetestexcept_sse
+# define libc_fesetenvf			libc_fesetenv_sse
+# define libc_feupdateenv_testf		libc_feupdateenv_test_sse
+# define libc_feupdateenvf		libc_feupdateenv_sse
+# define libc_feholdsetroundf		libc_feholdsetround_sse
+# define libc_feresetroundf		libc_feresetround_sse
+#else
+# define libc_feholdexceptf		libc_feholdexcept_387
+# define libc_feholdexcept_setroundf	libc_feholdexcept_setround_387
+# define libc_fetestexceptf		libc_fetestexcept_387
+# define libc_fesetenvf			libc_fesetenv_387
+# define libc_feupdateenv_testf		libc_feupdateenv_test_387
+# define libc_feupdateenvf		libc_feupdateenv_387
+# define libc_feholdsetroundf		libc_feholdsetround_387
+# define libc_feresetroundf		libc_feresetround_387
+#endif /* __SSE_MATH__ */
+
+#ifdef __SSE2_MATH__
+# define libc_feholdexcept		libc_feholdexcept_sse
+# define libc_feholdexcept_setround	libc_feholdexcept_setround_sse
+# define libc_fetestexcept		libc_fetestexcept_sse
+# define libc_fesetenv			libc_fesetenv_sse
+# define libc_feupdateenv_test		libc_feupdateenv_test_sse
+# define libc_feupdateenv		libc_feupdateenv_sse
+# define libc_feholdsetround		libc_feholdsetround_sse
+# define libc_feresetround		libc_feresetround_sse
+#else
+# define libc_feholdexcept		libc_feholdexcept_387
+# define libc_feholdexcept_setround	libc_feholdexcept_setround_387
+# define libc_fetestexcept		libc_fetestexcept_387
+# define libc_fesetenv			libc_fesetenv_387
+# define libc_feupdateenv_test		libc_feupdateenv_test_387
+# define libc_feupdateenv		libc_feupdateenv_387
+# define libc_feholdsetround		libc_feholdsetround_387
+# define libc_feresetround		libc_feresetround_387
+#endif /* __SSE2_MATH__ */
+
+#define libc_feholdexceptl		libc_feholdexcept_387
+#define libc_feholdexcept_setroundl	libc_feholdexcept_setround_387
+#define libc_fetestexceptl		libc_fetestexcept_387
+#define libc_fesetenvl			libc_fesetenv_387
+#define libc_feupdateenv_testl		libc_feupdateenv_test_387
+#define libc_feupdateenvl		libc_feupdateenv_387
+#define libc_feholdsetroundl		libc_feholdsetround_387
+#define libc_feresetroundl		libc_feresetround_387
+
+#ifndef __SSE2_MATH__
+# define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit
+# define libc_feholdsetround_53bit	libc_feholdsetround_387_53bit
+#endif
+
+#undef __mxcsr
+
+#endif /* FENV_PRIVATE_H */
diff --git a/sysdeps/i386/fpu/math_private.h b/sysdeps/i386/fpu/math_private.h
index 9236448..541a7f8 100644
--- a/sysdeps/i386/fpu/math_private.h
+++ b/sysdeps/i386/fpu/math_private.h
@@ -1,48 +1,6 @@
 #ifndef _MATH_PRIVATE_H
 
-#include <fenv.h>
-#include <fpu_control.h>
-
-#define math_opt_barrier(x) \
-({ __typeof (x) __x;					\
-   __asm ("" : "=t" (__x) : "0" (x));			\
-   __x; })
-#define math_force_eval(x) \
-do							\
-  {							\
-    __typeof (x) __x = (x);				\
-    if (sizeof (x) <= sizeof (double))			\
-      __asm __volatile ("" : : "m" (__x));		\
-    else						\
-      __asm __volatile ("" : : "f" (__x));		\
-  }							\
-while (0)
-
-static __always_inline void
-libc_feholdexcept_setround_53bit (fenv_t *e, int r)
-{
-  feholdexcept (e);
-  fesetround (r);
-
-  fpu_control_t cw;
-  _FPU_GETCW (cw);
-  cw &= ~(fpu_control_t) _FPU_EXTENDED;
-  cw |= _FPU_DOUBLE;
-  _FPU_SETCW (cw);
-}
-#define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_53bit
-
-static __always_inline void
-libc_feupdateenv_53bit (fenv_t *e)
-{
-  feupdateenv (e);
-
-  /* Unfortunately, feupdateenv fails to affect the rounding precision.
-     We can get that back by restoring the exact control word we saved.  */
-  _FPU_SETCW (e->__control_word);
-}
-#define libc_feupdateenv_53bit libc_feupdateenv_53bit
-
+#include "fenv_private.h"
 #include_next <math_private.h>
 
-#endif /* _MATH_PRIVATE_H */
+#endif
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index aa208b2..67c5f6a 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -1,33 +1,12 @@
 #ifndef X86_64_MATH_PRIVATE_H
 #define X86_64_MATH_PRIVATE_H 1
 
-#include <fenv.h>
-
-#define math_opt_barrier(x) \
-  ({ __typeof(x) __x;							      \
-     if (sizeof (x) <= sizeof (double))					      \
-       __asm ("" : "=x" (__x) : "0" (x));				      \
-     else								      \
-       __asm ("" : "=t" (__x) : "0" (x));				      \
-     __x; })
-#define math_force_eval(x) \
-  do {									      \
-    if (sizeof (x) <= sizeof (double))					      \
-      __asm __volatile ("" : : "x" (x));				      \
-    else								      \
-      __asm __volatile ("" : : "f" (x));				      \
-  } while (0)
-
 /* We can do a few things better on x86-64.  */
 
 #if defined __AVX__ || defined SSE2AVX
 # define MOVD "vmovd"
-# define STMXCSR "vstmxcsr"
-# define LDMXCSR "vldmxcsr"
 #else
 # define MOVD "movd"
-# define STMXCSR "stmxcsr"
-# define LDMXCSR "ldmxcsr"
 #endif
 
 /* Direct movement of float into integer register.  */
@@ -64,104 +43,7 @@
     f = f__;								      \
   } while (0)
 
-/* Specialized variants of the <fenv.h> interfaces which only handle
-   either the FPU or the SSE unit.  */
-static __always_inline void
-libc_feholdexcept (fenv_t *e)
-{
-  unsigned int mxcsr;
-  asm (STMXCSR " %0" : "=m" (*&mxcsr));
-  e->__mxcsr = mxcsr;
-  mxcsr = (mxcsr | 0x1f80) & ~0x3f;
-  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
-}
-#define libc_feholdexcept  libc_feholdexcept
-#define libc_feholdexceptf libc_feholdexcept
-
-static __always_inline void
-libc_feholdexcept_setround (fenv_t *e, int r)
-{
-  unsigned int mxcsr;
-  asm (STMXCSR " %0" : "=m" (*&mxcsr));
-  e->__mxcsr = mxcsr;
-  mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
-  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
-}
-#define libc_feholdexcept_setround  libc_feholdexcept_setround
-#define libc_feholdexcept_setroundf libc_feholdexcept_setround
-
-static __always_inline int
-libc_fetestexcept (int e)
-{
-  unsigned int mxcsr;
-  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
-  return mxcsr & e & FE_ALL_EXCEPT;
-}
-#define libc_fetestexcept  libc_fetestexcept
-#define libc_fetestexceptf libc_fetestexcept
-
-static __always_inline void
-libc_fesetenv (fenv_t *e)
-{
-  asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
-}
-#define libc_fesetenv  libc_fesetenv
-#define libc_fesetenvf libc_fesetenv
-
-static __always_inline int
-libc_feupdateenv_test (fenv_t *e, int ex)
-{
-  unsigned int mxcsr, old_mxcsr, cur_ex;
-  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
-  cur_ex = mxcsr & FE_ALL_EXCEPT;
-
-  /* Merge current exceptions with the old environment.  */
-  old_mxcsr = e->__mxcsr;
-  mxcsr = old_mxcsr | cur_ex;
-  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
-
-  /* Raise SIGFPE for any new exceptions since the hold.  Expect that
-     the normal environment has all exceptions masked.  */
-  if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
-    __feraiseexcept (cur_ex);
-
-  /* Test for exceptions raised since the hold.  */
-  return cur_ex & ex;
-}
-#define libc_feupdateenv_test  libc_feupdateenv_test
-#define libc_feupdateenv_testf libc_feupdateenv_test
-
-static __always_inline void
-libc_feupdateenv (fenv_t *e)
-{
-  libc_feupdateenv_test (e, 0);
-}
-#define libc_feupdateenv  libc_feupdateenv
-#define libc_feupdateenvf libc_feupdateenv
-
-static __always_inline void
-libc_feholdsetround (fenv_t *e, int r)
-{
-  unsigned int mxcsr;
-  asm (STMXCSR " %0" : "=m" (*&mxcsr));
-  e->__mxcsr = mxcsr;
-  mxcsr = (mxcsr & ~0x6000) | (r << 3);
-  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
-}
-#define libc_feholdsetround  libc_feholdsetround
-#define libc_feholdsetroundf libc_feholdsetround
-
-static __always_inline void
-libc_feresetround (fenv_t *e)
-{
-  unsigned int mxcsr;
-  asm (STMXCSR " %0" : "=m" (*&mxcsr));
-  mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
-  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
-}
-#define libc_feresetround  libc_feresetround
-#define libc_feresetroundf libc_feresetround
-
+#include <sysdeps/i386/fpu/fenv_private.h>
 #include_next <math_private.h>
 
 extern __always_inline double

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d0adc9223031b606c3c7781b4ec41462796ab313

commit d0adc9223031b606c3c7781b4ec41462796ab313
Author: Richard Henderson <rth@twiddle.net>
Date:   Sun Mar 18 15:51:53 2012 -0700

    i386/x86_64: Optimize feholdexcept.

diff --git a/ChangeLog b/ChangeLog
index 12a99d2..7efa13b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/i386/fpu/feholdexcpt.c (feholdexcept): Avoid the fldenv.
+	* sysdeps/x86_64/fpu/feholdexcpt.c (feholdexcept): Likewise.
+
 	* sysdeps/generic/math_private.h (default_libc_feupdateenv_test): New.
 	(libc_feupdateenv_test, libc_feupdateenv_testf): New.
 	(libc_feupdateenv_testl): New.
diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c
index a09d45e..7e10389 100644
--- a/sysdeps/i386/fpu/feholdexcpt.c
+++ b/sysdeps/i386/fpu/feholdexcpt.c
@@ -1,6 +1,5 @@
 /* Store current floating-point environment and clear exceptions.
-   Copyright (C) 1997, 1999, 2003, 2004, 2005, 2007
-   Free Software Foundation, Inc.
+   Copyright (C) 1997-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -26,19 +25,9 @@
 int
 feholdexcept (fenv_t *envp)
 {
-  fenv_t temp;
-
-  /* Store the environment.  */
-  __asm__ ("fnstenv %0" : "=m" (temp));
-  *envp = temp;
-
-  /* Now set all exceptions to non-stop.  */
-  temp.__control_word |= 0x3f;
-
-  /* And clear all exceptions.  */
-  temp.__status_word &= ~0x3f;
-
-  __asm__ ("fldenv %0" : : "m" (temp));
+  /* Store the environment.  Recall that fnstenv has a side effect of
+     masking all exceptions.  Then clear all exceptions.  */
+  __asm__ volatile ("fnstenv %0; fnclex" : "=m" (*envp));
 
   /* If the CPU supports SSE we set the MXCSR as well.  */
   if ((GLRO(dl_hwcap) & HWCAP_I386_XMM) != 0)
diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c
index b547b34..b380479 100644
--- a/sysdeps/x86_64/fpu/feholdexcpt.c
+++ b/sysdeps/x86_64/fpu/feholdexcpt.c
@@ -1,5 +1,5 @@
 /* Store current floating-point environment and clear exceptions.
-   Copyright (C) 2001, 2005, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2001-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,20 +22,13 @@ int
 feholdexcept (fenv_t *envp)
 {
   unsigned int mxcsr;
-  fenv_t temp;
 
-  /* Store the environment.  */
-  __asm__ ("fnstenv %0\n"
-	   "stmxcsr %1" : "=m" (temp), "=m" (temp.__mxcsr));
-  *envp = temp;
-
-  /* Now set all exceptions to non-stop, first the x87 FPU.  */
-  temp.__control_word |= 0x3f;
-
-  /* And clear all exceptions.  */
-  temp.__status_word &= ~0x3f;
-
-  __asm__ ("fldenv %0" : : "m" (temp));
+  /* Store the environment.  Recall that fnstenv has a side effect of
+     masking all exceptions.  Then clear all exceptions.  */
+  __asm__ ("fnstenv %0\n\t"
+	   "stmxcsr %1\n\t"
+	   "fnclex"
+	   : "=m" (*envp), "=m" (envp->__mxcsr));
 
   /* Set the SSE MXCSR register.  */
   mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f;

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876

commit 0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876
Author: Richard Henderson <rth@twiddle.net>
Date:   Sat Mar 10 08:53:05 2012 -0800

    Create and use libc_feupdateenv_test.
    
    We can reduce the number of STMXCSR, and often we can avoid the
    call to __feraiseexcept.

diff --git a/ChangeLog b/ChangeLog
index 277a201..12a99d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/generic/math_private.h (default_libc_feupdateenv_test): New.
+	(libc_feupdateenv_test, libc_feupdateenv_testf): New.
+	(libc_feupdateenv_testl): New.
+	* sysdeps/x86_64/fpu/math_private.h (libc_feupdateenv_test): New.
+	(libc_feupdateenv_testf): New.
+	(libc_feupdateenv): Use libc_feupdateenv_test.
+	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use libc_feupdateenv_test.
+	* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Likewise.
+
 	* sysdeps/generic/math_private.h (libc_feholdsetround): New.
 	(libc_feholdsetroundf, libc_feholdsetroundl): New.
 	(libc_feresetround, libc_feresetroundf, libc_feresetroundl): New.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 0b945f9..813ad93 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -457,6 +457,24 @@ default_libc_feupdateenv (fenv_t *e)
 # define libc_feupdateenv_53bit libc_feupdateenv
 #endif
 
+static __always_inline int
+default_libc_feupdateenv_test (fenv_t *e, int ex)
+{
+  int ret = fetestexcept (ex);
+  feupdateenv (e);
+  return ret;
+}
+
+#ifndef libc_feupdateenv_test
+# define libc_feupdateenv_test  default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testf
+# define libc_feupdateenv_testf default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testl
+# define libc_feupdateenv_testl default_libc_feupdateenv_test
+#endif
+
 /* Save and set the rounding mode.  The use of fenv_t to store the old mode
    allows a target-specific version of this function to avoid converting the
    rounding mode from the fpu format.  By default we have no choice but to
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index a27e246..ab20a80 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -149,35 +149,36 @@ __fma (double x, double y, double z)
 
   fenv_t env;
   libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
   /* Perform m2 + a2 addition with round to odd.  */
   u.d = a2 + m2;
 
+  if (__builtin_expect (adjust < 0, 0))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0)
+	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+      v.d = a1 + u.d;
+    }
+
+  /* Reset rounding mode and test for inexact simultaneously.  */
+  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
   if (__builtin_expect (adjust == 0, 1))
     {
       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
+	u.ieee.mantissa1 |= j;
       /* Result is a1 + u.d.  */
       return a1 + u.d;
     }
   else if (__builtin_expect (adjust > 0, 1))
     {
       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
+	u.ieee.mantissa1 |= j;
       /* Result is a1 + u.d, scaled up.  */
       return (a1 + u.d) * 0x1p53;
     }
   else
     {
-      if ((u.ieee.mantissa1 & 1) == 0)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      v.d = a1 + u.d;
-      int j = libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
-      /* Ensure the following computations are performed in default rounding
-	 mode instead of just reusing the round to zero computation.  */
-      asm volatile ("" : "=m" (u) : "m" (u));
       /* If a1 + u.d is exact, the only rounding happens during
 	 scaling down.  */
       if (j == 0)
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c
index 00cd382..7a939aa 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/dbl-64/s_fmaf.c
@@ -35,12 +35,18 @@ __fmaf (float x, float y, float z)
   /* Multiplication is always exact.  */
   double temp = (double) x * (double) y;
   union ieee754_double u;
-  libc_feholdexcept_setroundf (&env, FE_TOWARDZERO);
+
+  libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
   /* Perform addition with round to odd.  */
   u.d = temp + (double) z;
+
+  /* Reset rounding mode and test for inexact simultaneously.  */
+  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
   if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-    u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-  libc_feupdateenv (&env);
+    u.ieee.mantissa1 |= j;
+
   /* And finally truncation with round to nearest.  */
   return (float) u.d;
 }
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 3289afc..aa208b2 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -108,13 +108,33 @@ libc_fesetenv (fenv_t *e)
 #define libc_fesetenv  libc_fesetenv
 #define libc_fesetenvf libc_fesetenv
 
+static __always_inline int
+libc_feupdateenv_test (fenv_t *e, int ex)
+{
+  unsigned int mxcsr, old_mxcsr, cur_ex;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  cur_ex = mxcsr & FE_ALL_EXCEPT;
+
+  /* Merge current exceptions with the old environment.  */
+  old_mxcsr = e->__mxcsr;
+  mxcsr = old_mxcsr | cur_ex;
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+
+  /* Raise SIGFPE for any new exceptions since the hold.  Expect that
+     the normal environment has all exceptions masked.  */
+  if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
+    __feraiseexcept (cur_ex);
+
+  /* Test for exceptions raised since the hold.  */
+  return cur_ex & ex;
+}
+#define libc_feupdateenv_test  libc_feupdateenv_test
+#define libc_feupdateenv_testf libc_feupdateenv_test
+
 static __always_inline void
 libc_feupdateenv (fenv_t *e)
 {
-  unsigned int mxcsr;
-  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
-  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));
-  __feraiseexcept (mxcsr & FE_ALL_EXCEPT);
+  libc_feupdateenv_test (e, 0);
 }
 #define libc_feupdateenv  libc_feupdateenv
 #define libc_feupdateenvf libc_feupdateenv

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=eb92c487b35e26aa1e08815c4480d0bc5cc9f370

commit eb92c487b35e26aa1e08815c4480d0bc5cc9f370
Author: Richard Henderson <rth@twiddle.net>
Date:   Sat Mar 10 08:55:53 2012 -0800

    Create and use SET_RESTORE_ROUND{,_NOEX,_53BIT}{,F,L}.

diff --git a/ChangeLog b/ChangeLog
index aace9ef..277a201 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,29 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/generic/math_private.h (libc_feholdsetround): New.
+	(libc_feholdsetroundf, libc_feholdsetroundl): New.
+	(libc_feresetround, libc_feresetroundf, libc_feresetroundl): New.
+	(libc_feresetround_noex): New.
+	(libc_feresetround_noexf): New.
+	(libc_feresetround_noexl): New.
+	(SET_RESTORE_ROUND, SET_RESTORE_ROUNDF, SET_RESTORE_ROUNDL): New.
+	(SET_RESTORE_ROUND_NOEX, SET_RESTORE_ROUND_NOEXF): New.
+	(SET_RESTORE_ROUND_NOEXL, SET_RESTORE_ROUND_53BIT): New.
+	* sysdeps/ieee754/dbl-64/e_exp.c (__ieee754_exp): Use
+	SET_RESTORE_ROUND.
+	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Use SET_RESTORE_ROUND_53BIT.
+	(__cos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_tan.c (__tan): Likewise.
+	* sysdeps/ieee754/dbl-64/e_exp2.c (__ieee754_exp2): Use
+	SET_RESTORE_ROUND_NOEX.
+	* sysdeps/ieee754/dbl-64/e_exp2f.c (__ieee754_exp2f): Use
+	SET_RESTORE_ROUND_NOEXF.
+	* sysdeps/ieee754/flt-32/e_expf.c (__ieee754_expf): Likewise.
+	* sysdeps/x86_64/fpu/math_private.h (libc_feholdsetround): New.
+	(libc_feholdsetroundf): New.
+	(libc_feresetround, libc_feresetroundf): New.
+
 	* sysdeps/i386/fpu/math_private.h: Include <fenv.h>, <fpu_control.h>.
 	(libc_feholdexcept_setround_53bit): Convert from macro to function.
 	(libc_feupdateenv_53bit): Likewise.  Don't force _FPU_EXTENDED.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index ab4b47b..0b945f9 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -457,6 +457,75 @@ default_libc_feupdateenv (fenv_t *e)
 # define libc_feupdateenv_53bit libc_feupdateenv
 #endif
 
+/* Save and set the rounding mode.  The use of fenv_t to store the old mode
+   allows a target-specific version of this function to avoid converting the
+   rounding mode from the fpu format.  By default we have no choice but to
+   manipulate the entire env.  */
+
+#ifndef libc_feholdsetround
+# define libc_feholdsetround  libc_feholdexcept_setround
+#endif
+#ifndef libc_feholdsetroundf
+# define libc_feholdsetroundf libc_feholdexcept_setroundf
+#endif
+#ifndef libc_feholdsetroundl
+# define libc_feholdsetroundl libc_feholdexcept_setroundl
+#endif
+
+/* ... and the reverse.  */
+
+#ifndef libc_feresetround
+# define libc_feresetround  libc_feupdateenv
+#endif
+#ifndef libc_feresetroundf
+# define libc_feresetroundf libc_feupdateenvf
+#endif
+#ifndef libc_feresetroundl
+# define libc_feresetroundl libc_feupdateenvl
+#endif
+
+/* ... and a version that may also discard exceptions.  */
+
+#ifndef libc_feresetround_noex
+# define libc_feresetround_noex  libc_fesetenv
+#endif
+#ifndef libc_feresetround_noexf
+# define libc_feresetround_noexf libc_fesetenvf
+#endif
+#ifndef libc_feresetround_noexl
+# define libc_feresetround_noexl libc_fesetenvl
+#endif
+
+/* Save and restore the rounding mode within a lexical block.  */
+
+#define SET_RESTORE_ROUND(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround)));	\
+  libc_feholdsetround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDF(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundf)));	\
+  libc_feholdsetroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDL(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundl)));	\
+  libc_feholdsetroundl (&__libc_save_rm, (RM))
+
+/* Save and restore the rounding mode within a lexical block, and also
+   the set of exceptions raised within the block may be discarded.  */
+
+#define SET_RESTORE_ROUND_NOEX(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noex))); \
+  libc_feholdsetround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUND_NOEXF(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexf))); \
+  libc_feholdsetroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUND_NOEXL(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexl))); \
+  libc_feholdsetroundl (&__libc_save_rm, (RM))
+
+/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits.  */
+#define SET_RESTORE_ROUND_53BIT(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feupdateenv_53bit))); \
+  libc_feholdexcept_setround_53bit (&__libc_save_rm, (RM))
+
 #define __nan(str) \
   (__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str))
 #define __nanf(str) \
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index cb8d9e8..5deba5e 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -59,10 +59,9 @@ __ieee754_exp(double x) {
   int4 k;
 #endif
   int4 i,j,m,n,ex;
-  fenv_t env;
   double retval;
 
-  libc_feholdexcept_setround (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND (FE_TONEAREST);
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
@@ -157,7 +156,6 @@ __ieee754_exp(double x) {
     else { retval = __slowexp(x); goto ret; }
   }
  ret:
-  libc_feupdateenv (&env);
   return retval;
 }
 #ifndef __ieee754_exp
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index 4cf879b..e57ec92 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -61,57 +61,56 @@ __ieee754_exp2 (double x)
       int tval, unsafe;
       double rx, x22, result;
       union ieee754_double ex2_u, scale_u;
-      fenv_t oldenv;
-
-      libc_feholdexcept_setround (&oldenv, FE_TONEAREST);
-
-      /* 1. Argument reduction.
-	 Choose integers ex, -256 <= t < 256, and some real
-	 -1/1024 <= x1 <= 1024 so that
-	 x = ex + t/512 + x1.
-
-	 First, calculate rx = ex + t/512.  */
-      rx = x + THREEp42;
-      rx -= THREEp42;
-      x -= rx;  /* Compute x=x1. */
-      /* Compute tval = (ex*512 + t)+256.
-	 Now, t = (tval mod 512)-256 and ex=tval/512  [that's mod, NOT %; and
-	 /-round-to-nearest not the usual c integer /].  */
-      tval = (int) (rx * 512.0 + 256.0);
-
-      /* 2. Adjust for accurate table entry.
-	 Find e so that
-	 x = ex + t/512 + e + x2
-	 where -1e6 < e < 1e6, and
-	 (double)(2^(t/512+e))
-	 is accurate to one part in 2^-64.  */
-
-      /* 'tval & 511' is the same as 'tval%512' except that it's always
-	 positive.
-	 Compute x = x2.  */
-      x -= exp2_deltatable[tval & 511];
-
-      /* 3. Compute ex2 = 2^(t/512+e+ex).  */
-      ex2_u.d = exp2_accuratetable[tval & 511];
-      tval >>= 9;
-      unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
-      ex2_u.ieee.exponent += tval >> unsafe;
-      scale_u.d = 1.0;
-      scale_u.ieee.exponent += tval - (tval >> unsafe);
-
-      /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
-	 with maximum error in [-2^-10-2^-30,2^-10+2^-30]
-	 less than 10^-19.  */
-
-      x22 = (((.0096181293647031180
-	       * x + .055504110254308625)
-	      * x + .240226506959100583)
-	     * x + .69314718055994495) * ex2_u.d;
-      math_opt_barrier (x22);
 
-      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
-      libc_fesetenv (&oldenv);
+      {
+	SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
+
+	/* 1. Argument reduction.
+	   Choose integers ex, -256 <= t < 256, and some real
+	   -1/1024 <= x1 <= 1024 so that
+	   x = ex + t/512 + x1.
+
+	   First, calculate rx = ex + t/512.  */
+	rx = x + THREEp42;
+	rx -= THREEp42;
+	x -= rx;  /* Compute x=x1. */
+	/* Compute tval = (ex*512 + t)+256.
+	   Now, t = (tval mod 512)-256 and ex=tval/512  [that's mod, NOT %;
+	   and /-round-to-nearest not the usual c integer /].  */
+	tval = (int) (rx * 512.0 + 256.0);
+
+	/* 2. Adjust for accurate table entry.
+	   Find e so that
+	   x = ex + t/512 + e + x2
+	   where -1e6 < e < 1e6, and
+	   (double)(2^(t/512+e))
+	   is accurate to one part in 2^-64.  */
+
+	/* 'tval & 511' is the same as 'tval%512' except that it's always
+	   positive.
+	   Compute x = x2.  */
+	x -= exp2_deltatable[tval & 511];
+
+	/* 3. Compute ex2 = 2^(t/512+e+ex).  */
+	ex2_u.d = exp2_accuratetable[tval & 511];
+	tval >>= 9;
+	unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
+	ex2_u.ieee.exponent += tval >> unsafe;
+	scale_u.d = 1.0;
+	scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+	/* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
+	   with maximum error in [-2^-10-2^-30,2^-10+2^-30]
+	   less than 10^-19.  */
+
+	x22 = (((.0096181293647031180
+		 * x + .055504110254308625)
+		* x + .240226506959100583)
+	       * x + .69314718055994495) * ex2_u.d;
+        math_opt_barrier (x22);
+      }
 
+      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
       result = x22 * x + ex2_u.d;
 
       if (!unsafe)
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 550633c..f936a72 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -85,10 +85,9 @@ __ieee754_pow(double x, double y) {
        (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0))  &&
 				      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
       (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) {              /* if y<-1 or y>1   */
-    fenv_t env;
     double retval;
 
-    libc_feholdexcept_setround (&env, FE_TONEAREST);
+    SET_RESTORE_ROUND (FE_TONEAREST);
 
     z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
     t = y*134217729.0;
@@ -105,7 +104,6 @@ __ieee754_pow(double x, double y) {
     t = __exp1(a1,a2,1.9e16*error);     /* return -10 or 0 if wasn't computed exactly */
     retval = (t>0)?t:power1(x,y);
 
-    libc_feupdateenv (&env);
     return retval;
   }
 
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 4b4b675..7b9252f 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -108,10 +108,9 @@ __sin(double x){
 #if 0
 	int4 nn;
 #endif
-	fenv_t env;
 	double retval = 0;
 
-	libc_feholdexcept_setround_53bit (&env, FE_TONEAREST);
+	SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
 
 	u.x = x;
 	m = u.i[HIGH_HALF];
@@ -365,7 +364,6 @@ __sin(double x){
 	}
 
  ret:
-	libc_feupdateenv_53bit (&env);
 	return retval;
 }
 
@@ -383,10 +381,9 @@ __cos(double x)
   mynumber u,v;
   int4 k,m,n;
 
-  fenv_t env;
   double retval = 0;
 
-  libc_feholdexcept_setround_53bit (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
 
   u.x = x;
   m = u.i[HIGH_HALF];
@@ -635,7 +632,6 @@ __cos(double x)
   }
 
  ret:
-  libc_feupdateenv_53bit (&env);
   return retval;
 }
 
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index 8eee383..f8507ea 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -68,13 +68,12 @@ tan(double x) {
   mp_no mpy;
 #endif
 
-  fenv_t env;
   double retval;
 
   int __branred(double, double *, double *);
   int __mpranred(double, mp_no *, int);
 
-  libc_feholdexcept_setround_53bit (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
 
   /* x=+-INF, x=NaN */
   num.d = x;  ux = num.i[HIGH_HALF];
@@ -503,7 +502,6 @@ tan(double x) {
   goto ret;
 
  ret:
-  libc_feupdateenv_53bit (&env);
   return retval;
 }
 
diff --git a/sysdeps/ieee754/flt-32/e_exp2f.c b/sysdeps/ieee754/flt-32/e_exp2f.c
index e728e6e..267d81b 100644
--- a/sysdeps/ieee754/flt-32/e_exp2f.c
+++ b/sysdeps/ieee754/flt-32/e_exp2f.c
@@ -54,53 +54,52 @@ __ieee754_exp2f (float x)
       int tval, unsafe;
       float rx, x22, result;
       union ieee754_float ex2_u, scale_u;
-      fenv_t oldenv;
-
-      libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
-
-      /* 1. Argument reduction.
-	 Choose integers ex, -128 <= t < 128, and some real
-	 -1/512 <= x1 <= 1/512 so that
-	 x = ex + t/512 + x1.
-
-	 First, calculate rx = ex + t/256.  */
-      rx = x + THREEp14;
-      rx -= THREEp14;
-      x -= rx;  /* Compute x=x1. */
-      /* Compute tval = (ex*256 + t)+128.
-	 Now, t = (tval mod 256)-128 and ex=tval/256  [that's mod, NOT %; and
-	 /-round-to-nearest not the usual c integer /].  */
-      tval = (int) (rx * 256.0f + 128.0f);
-
-      /* 2. Adjust for accurate table entry.
-	 Find e so that
-	 x = ex + t/256 + e + x2
-	 where -7e-4 < e < 7e-4, and
-	 (float)(2^(t/256+e))
-	 is accurate to one part in 2^-64.  */
-
-      /* 'tval & 255' is the same as 'tval%256' except that it's always
-	 positive.
-	 Compute x = x2.  */
-      x -= __exp2f_deltatable[tval & 255];
-
-      /* 3. Compute ex2 = 2^(t/255+e+ex).  */
-      ex2_u.f = __exp2f_atable[tval & 255];
-      tval >>= 8;
-      unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
-      ex2_u.ieee.exponent += tval >> unsafe;
-      scale_u.f = 1.0;
-      scale_u.ieee.exponent += tval - (tval >> unsafe);
-
-      /* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
-	 with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
-	 less than 1.3e-10.  */
-
-      x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
 
-      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
-      libc_fesetenv (&oldenv);
+      {
+	SET_RESTORE_ROUND_NOEXF (FE_TONEAREST);
+
+	/* 1. Argument reduction.
+	   Choose integers ex, -128 <= t < 128, and some real
+	   -1/512 <= x1 <= 1/512 so that
+	   x = ex + t/512 + x1.
+
+	   First, calculate rx = ex + t/256.  */
+	rx = x + THREEp14;
+	rx -= THREEp14;
+	x -= rx;  /* Compute x=x1. */
+	/* Compute tval = (ex*256 + t)+128.
+	   Now, t = (tval mod 256)-128 and ex=tval/256  [that's mod, NOT %;
+	   and /-round-to-nearest not the usual c integer /].  */
+	tval = (int) (rx * 256.0f + 128.0f);
+
+	/* 2. Adjust for accurate table entry.
+	   Find e so that
+	   x = ex + t/256 + e + x2
+	   where -7e-4 < e < 7e-4, and
+	   (float)(2^(t/256+e))
+	   is accurate to one part in 2^-64.  */
+
+	/* 'tval & 255' is the same as 'tval%256' except that it's always
+	   positive.
+	   Compute x = x2.  */
+	x -= __exp2f_deltatable[tval & 255];
+
+	/* 3. Compute ex2 = 2^(t/255+e+ex).  */
+	ex2_u.f = __exp2f_atable[tval & 255];
+	tval >>= 8;
+	unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
+	ex2_u.ieee.exponent += tval >> unsafe;
+	scale_u.f = 1.0;
+	scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+	/* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
+	   with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
+	   less than 1.3e-10.  */
+
+	x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
+      }
 
+      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
       result = x22 * x + ex2_u.f;
 
       if (!unsafe)
diff --git a/sysdeps/ieee754/flt-32/e_expf.c b/sysdeps/ieee754/flt-32/e_expf.c
index e69e7f6..57aff16 100644
--- a/sysdeps/ieee754/flt-32/e_expf.c
+++ b/sysdeps/ieee754/flt-32/e_expf.c
@@ -80,40 +80,39 @@ __ieee754_expf (float x)
       double x22, t, result, dx;
       float n, delta;
       union ieee754_double ex2_u;
-      fenv_t oldenv;
 
-      libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
+      {
+	SET_RESTORE_ROUND_NOEXF (FE_TONEAREST);
 
-      /* Calculate n.  */
-      n = x * M_1_LN2 + THREEp22;
-      n -= THREEp22;
-      dx = x - n*M_LN2;
+	/* Calculate n.  */
+	n = x * M_1_LN2 + THREEp22;
+	n -= THREEp22;
+	dx = x - n*M_LN2;
 
-      /* Calculate t/512.  */
-      t = dx + THREEp42;
-      t -= THREEp42;
-      dx -= t;
+	/* Calculate t/512.  */
+	t = dx + THREEp42;
+	t -= THREEp42;
+	dx -= t;
 
-      /* Compute tval = t.  */
-      tval = (int) (t * 512.0);
+	/* Compute tval = t.  */
+	tval = (int) (t * 512.0);
 
-      if (t >= 0)
-	delta = - __exp_deltatable[tval];
-      else
-	delta = __exp_deltatable[-tval];
+	if (t >= 0)
+	  delta = - __exp_deltatable[tval];
+	else
+	  delta = __exp_deltatable[-tval];
 
-      /* Compute ex2 = 2^n e^(t/512+delta[t]).  */
-      ex2_u.d = __exp_atable[tval+177];
-      ex2_u.ieee.exponent += (int) n;
+	/* Compute ex2 = 2^n e^(t/512+delta[t]).  */
+	ex2_u.d = __exp_atable[tval+177];
+	ex2_u.ieee.exponent += (int) n;
 
-      /* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
-	 with maximum error in [-2^-10-2^-28,2^-10+2^-28]
-	 less than 5e-11.  */
-      x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+	/* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
+	   with maximum error in [-2^-10-2^-28,2^-10+2^-28]
+	   less than 5e-11.  */
+	x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+      }
 
       /* Return result.  */
-      libc_fesetenvf (&oldenv);
-
       result = x22 * ex2_u.d + ex2_u.d;
       return (float) result;
     }
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 8b1fe70..3289afc 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -119,6 +119,29 @@ libc_feupdateenv (fenv_t *e)
 #define libc_feupdateenv  libc_feupdateenv
 #define libc_feupdateenvf libc_feupdateenv
 
+static __always_inline void
+libc_feholdsetround (fenv_t *e, int r)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = (mxcsr & ~0x6000) | (r << 3);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feholdsetround  libc_feholdsetround
+#define libc_feholdsetroundf libc_feholdsetround
+
+static __always_inline void
+libc_feresetround (fenv_t *e)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feresetround  libc_feresetround
+#define libc_feresetroundf libc_feresetround
+
 #include_next <math_private.h>
 
 extern __always_inline double

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7d2e8012cfb4fa30086ef330ef1b993cfcc3dd99

commit 7d2e8012cfb4fa30086ef330ef1b993cfcc3dd99
Author: Richard Henderson <rth@twiddle.net>
Date:   Sun Mar 18 10:12:16 2012 -0700

    i386: Convert libc_{feholdexcept_setround,updateenv}_53bit to functions.
    
    Also fix a bug in libc_feupdateenv_53bit: don't force the rounding
    precision back to _FPU_EXTENDED, instead restore the precision that
    the user had in effect beforehand.

diff --git a/ChangeLog b/ChangeLog
index 2d6d574..aace9ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/i386/fpu/math_private.h: Include <fenv.h>, <fpu_control.h>.
+	(libc_feholdexcept_setround_53bit): Convert from macro to function.
+	(libc_feupdateenv_53bit): Likewise.  Don't force _FPU_EXTENDED.
+
 	* sysdeps/generic/math_private.h: Include <fenv.h>.
 	(default_libc_feholdexcept): New.
 	(default_libc_feholdexcept_setround): New.
diff --git a/sysdeps/i386/fpu/math_private.h b/sysdeps/i386/fpu/math_private.h
index 6520226..9236448 100644
--- a/sysdeps/i386/fpu/math_private.h
+++ b/sysdeps/i386/fpu/math_private.h
@@ -1,5 +1,8 @@
 #ifndef _MATH_PRIVATE_H
 
+#include <fenv.h>
+#include <fpu_control.h>
+
 #define math_opt_barrier(x) \
 ({ __typeof (x) __x;					\
    __asm ("" : "=t" (__x) : "0" (x));			\
@@ -15,34 +18,31 @@ do							\
   }							\
 while (0)
 
-#include_next <math_private.h>
+static __always_inline void
+libc_feholdexcept_setround_53bit (fenv_t *e, int r)
+{
+  feholdexcept (e);
+  fesetround (r);
 
-#include <fpu_control.h>
+  fpu_control_t cw;
+  _FPU_GETCW (cw);
+  cw &= ~(fpu_control_t) _FPU_EXTENDED;
+  cw |= _FPU_DOUBLE;
+  _FPU_SETCW (cw);
+}
+#define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_53bit
+
+static __always_inline void
+libc_feupdateenv_53bit (fenv_t *e)
+{
+  feupdateenv (e);
+
+  /* Unfortunately, feupdateenv fails to affect the rounding precision.
+     We can get that back by restoring the exact control word we saved.  */
+  _FPU_SETCW (e->__control_word);
+}
+#define libc_feupdateenv_53bit libc_feupdateenv_53bit
+
+#include_next <math_private.h>
 
-#undef libc_feholdexcept_setround_53bit
-#define libc_feholdexcept_setround_53bit(e, r)	\
-  do						\
-    {						\
-      fpu_control_t cw;				\
-      libc_feholdexcept_setround (e, r);	\
-      _FPU_GETCW (cw);				\
-      cw &= ~(fpu_control_t) _FPU_EXTENDED;	\
-      cw |= _FPU_DOUBLE;			\
-      _FPU_SETCW (cw);				\
-    }						\
-  while (0)
-
-#undef libc_feupdateenv_53bit
-#define libc_feupdateenv_53bit(e)		\
-  do						\
-    {						\
-      fpu_control_t cw;				\
-      libc_feupdateenv (e);			\
-      _FPU_GETCW (cw);				\
-      cw &= ~(fpu_control_t) _FPU_EXTENDED;	\
-      cw |= _FPU_EXTENDED;			\
-      _FPU_SETCW (cw);				\
-    }						\
-  while (0)
-
-#endif
+#endif /* _MATH_PRIVATE_H */

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b4dabbb47a174e1075b5b93f29093319eab69f2c

commit b4dabbb47a174e1075b5b93f29093319eab69f2c
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Mar 9 12:51:27 2012 -0800

    Convert libc_feholdexcept et al from macros to inline functions.

diff --git a/ChangeLog b/ChangeLog
index 43e2415..2d6d574 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2012-03-19  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/generic/math_private.h: Include <fenv.h>.
+	(default_libc_feholdexcept): New.
+	(default_libc_feholdexcept_setround): New.
+	(default_libc_fesetenv, default_libc_feupdateenv): New.
+	(libc_feholdexcept): Only define if undefined.
+	(libc_feholdexceptf, libc_feholdexceptl): Likewise.
+	(libc_feholdexcept_setround, libc_feholdexcept_setroundf): Likewise.
+	(libc_feholdexcept_setroundl): Likewise.
+	(libc_feholdexcept_setround_53bit): Likewise.
+	(libc_fetestexcept, libc_fetestexceptf, libc_fetestexceptl): Likewise.
+	(libc_fesetenv, libc_fesetenvf, libc_fesetenvl): Likewise.
+	(libc_feupdateenv, libc_feupdateenvf, libc_feupdateenvl): Likewise.
+	(libc_feupdateenv_53bit): Likewise.
+	* sysdeps/x86_64/fpu/math_private.h: Include <fenv.h>.
+	(libc_feholdexcept): Convert from macro to inline function.
+	(libc_feholdexcept_setround, libc_fetestexcept): Likewise.
+	(libc_fesetenv, libc_feupdateenv): Likewise.
+
 	* sysdeps/generic/math_private.h (GET_HIGH_WORD): Define only if
 	not previously defined.
 	(GET_LOW_WORD, EXTRACT_WORDS64, INSERT_WORDS): Likewise.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 908e6b7..ab4b47b 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -19,6 +19,7 @@
 #include <endian.h>
 #include <stdint.h>
 #include <sys/types.h>
+#include <fenv.h>
 
 /* The original fdlibm code used statements like:
 	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
@@ -372,33 +373,89 @@ extern void __docos (double __x, double __dx, double __v[]);
    know what operations are going to be performed.  Therefore we
    define additional interfaces.  By default they refer to the normal
    interfaces.  */
-#define libc_feholdexcept(e) (void) feholdexcept (e)
-#define libc_feholdexceptf(e) (void) feholdexcept (e)
-#define libc_feholdexceptl(e) (void) feholdexcept (e)
 
-#define libc_feholdexcept_setround(e, r) \
-  do { feholdexcept (e); fesetround (r); } while (0)
-#define libc_feholdexcept_setroundf(e, r) \
-  do { feholdexcept (e); fesetround (r); } while (0)
-#define libc_feholdexcept_setroundl(e, r) \
-  do { feholdexcept (e); fesetround (r); } while (0)
+static __always_inline void
+default_libc_feholdexcept (fenv_t *e)
+{
+  (void) feholdexcept (e);
+}
+
+#ifndef libc_feholdexcept
+# define libc_feholdexcept  default_libc_feholdexcept
+#endif
+#ifndef libc_feholdexceptf
+# define libc_feholdexceptf default_libc_feholdexcept
+#endif
+#ifndef libc_feholdexceptl
+# define libc_feholdexceptl default_libc_feholdexcept
+#endif
+
+static __always_inline void
+default_libc_feholdexcept_setround (fenv_t *e, int r)
+{
+  feholdexcept (e);
+  fesetround (r);
+}
+
+#ifndef libc_feholdexcept_setround
+# define libc_feholdexcept_setround  default_libc_feholdexcept_setround
+#endif
+#ifndef libc_feholdexcept_setroundf
+# define libc_feholdexcept_setroundf default_libc_feholdexcept_setround
+#endif
+#ifndef libc_feholdexcept_setroundl
+# define libc_feholdexcept_setroundl default_libc_feholdexcept_setround
+#endif
+
+#ifndef libc_feholdexcept_setround_53bit
+# define libc_feholdexcept_setround_53bit libc_feholdexcept_setround
+#endif
 
-#define libc_feholdexcept_setround_53bit(e, r) \
-  libc_feholdexcept_setround (e, r)
+#ifndef libc_fetestexcept
+# define libc_fetestexcept  fetestexcept
+#endif
+#ifndef libc_fetestexceptf
+# define libc_fetestexceptf fetestexcept
+#endif
+#ifndef libc_fetestexceptl
+# define libc_fetestexceptl fetestexcept
+#endif
 
-#define libc_fetestexcept(e) fetestexcept (e)
-#define libc_fetestexceptf(e) fetestexcept (e)
-#define libc_fetestexceptl(e) fetestexcept (e)
+static __always_inline void
+default_libc_fesetenv (fenv_t *e)
+{
+  (void) fesetenv (e);
+}
 
-#define libc_fesetenv(e) (void) fesetenv (e)
-#define libc_fesetenvf(e) (void) fesetenv (e)
-#define libc_fesetenvl(e) (void) fesetenv (e)
+#ifndef libc_fesetenv
+# define libc_fesetenv  default_libc_fesetenv
+#endif
+#ifndef libc_fesetenvf
+# define libc_fesetenvf default_libc_fesetenv
+#endif
+#ifndef libc_fesetenvl
+# define libc_fesetenvl default_libc_fesetenv
+#endif
 
-#define libc_feupdateenv(e) (void) feupdateenv (e)
-#define libc_feupdateenvf(e) (void) feupdateenv (e)
-#define libc_feupdateenvl(e) (void) feupdateenv (e)
+static __always_inline void
+default_libc_feupdateenv (fenv_t *e)
+{
+  (void) feupdateenv (e);
+}
+
+#ifndef libc_feupdateenv
+# define libc_feupdateenv  default_libc_feupdateenv
+#endif
+#ifndef libc_feupdateenvf
+# define libc_feupdateenvf default_libc_feupdateenv
+#endif
+#ifndef libc_feupdateenvl
+# define libc_feupdateenvl default_libc_feupdateenv
+#endif
 
-#define libc_feupdateenv_53bit(e) libc_feupdateenv (e)
+#ifndef libc_feupdateenv_53bit
+# define libc_feupdateenv_53bit libc_feupdateenv
+#endif
 
 #define __nan(str) \
   (__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str))
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index dae9ccc..8b1fe70 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -1,6 +1,8 @@
 #ifndef X86_64_MATH_PRIVATE_H
 #define X86_64_MATH_PRIVATE_H 1
 
+#include <fenv.h>
+
 #define math_opt_barrier(x) \
   ({ __typeof(x) __x;							      \
      if (sizeof (x) <= sizeof (double))					      \
@@ -62,6 +64,61 @@
     f = f__;								      \
   } while (0)
 
+/* Specialized variants of the <fenv.h> interfaces which only handle
+   either the FPU or the SSE unit.  */
+static __always_inline void
+libc_feholdexcept (fenv_t *e)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = (mxcsr | 0x1f80) & ~0x3f;
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feholdexcept  libc_feholdexcept
+#define libc_feholdexceptf libc_feholdexcept
+
+static __always_inline void
+libc_feholdexcept_setround (fenv_t *e, int r)
+{
+  unsigned int mxcsr;
+  asm (STMXCSR " %0" : "=m" (*&mxcsr));
+  e->__mxcsr = mxcsr;
+  mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feholdexcept_setround  libc_feholdexcept_setround
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround
+
+static __always_inline int
+libc_fetestexcept (int e)
+{
+  unsigned int mxcsr;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  return mxcsr & e & FE_ALL_EXCEPT;
+}
+#define libc_fetestexcept  libc_fetestexcept
+#define libc_fetestexceptf libc_fetestexcept
+
+static __always_inline void
+libc_fesetenv (fenv_t *e)
+{
+  asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
+}
+#define libc_fesetenv  libc_fesetenv
+#define libc_fesetenvf libc_fesetenv
+
+static __always_inline void
+libc_feupdateenv (fenv_t *e)
+{
+  unsigned int mxcsr;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));
+  __feraiseexcept (mxcsr & FE_ALL_EXCEPT);
+}
+#define libc_feupdateenv  libc_feupdateenv
+#define libc_feupdateenvf libc_feupdateenv
+
 #include_next <math_private.h>
 
 extern __always_inline double
@@ -146,61 +203,4 @@ __floorf (float d)
 }
 #endif /* __SSE4_1__ */
 
-
-/* Specialized variants of the <fenv.h> interfaces which only handle
-   either the FPU or the SSE unit.  */
-#undef libc_feholdexcept
-#define libc_feholdexcept(e) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
-     (e)->__mxcsr = mxcsr;						      \
-     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
-     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#undef libc_feholdexceptf
-#define libc_feholdexceptf(e) libc_feholdexcept (e)
-// #define libc_feholdexceptl(e) (void) feholdexcept (e)
-
-#undef libc_feholdexcept_setround
-#define libc_feholdexcept_setround(e, r) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
-     (e)->__mxcsr = mxcsr;						      \
-     mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
-     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#undef libc_feholdexcept_setroundf
-#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
-// #define libc_feholdexcept_setroundl(e, r) ...
-
-#undef libc_fetestexcept
-#define libc_fetestexcept(e) \
-  ({ unsigned int mxcsr;						      \
-     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
-     mxcsr & (e) & FE_ALL_EXCEPT; })
-#undef libc_fetestexceptf
-#define libc_fetestexceptf(e) libc_fetestexcept (e)
-// #define libc_fetestexceptl(e) fetestexcept (e)
-
-#undef libc_fesetenv
-#define libc_fesetenv(e) \
-  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
-#undef libc_fesetenvf
-#define libc_fesetenvf(e) libc_fesetenv (e)
-// #define libc_fesetenvl(e) (void) fesetenv (e)
-
-#undef libc_feupdateenv
-#define libc_feupdateenv(e) \
-  do {									      \
-    unsigned int mxcsr;							      \
-    asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
-    asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));		      \
-    __feraiseexcept (mxcsr & FE_ALL_EXCEPT);				      \
-  } while (0)
-#undef libc_feupdateenvf
-#define libc_feupdateenvf(e) libc_feupdateenv (e)
-// #define libc_feupdateenvl(e) (void) feupdateenv (e)
-
 #endif /* X86_64_MATH_PRIVATE_H */

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4851a949b4cd1f280b56a728c784aaa85e51124c

commit 4851a949b4cd1f280b56a728c784aaa85e51124c
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Mar 9 12:38:23 2012 -0800

    Make inline __isnan, __isinf_ns, __finite generic.
    
    For code generation to stay identical on x86_64, this requires that
    we define the fp word manipulation macros before including the
    generic header.

diff --git a/ChangeLog b/ChangeLog
index f60edac..43e2415 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2012-03-19  Richard Henderson  <rth@twiddle.net>
+
+	* sysdeps/generic/math_private.h (GET_HIGH_WORD): Define only if
+	not previously defined.
+	(GET_LOW_WORD, EXTRACT_WORDS64, INSERT_WORDS): Likewise.
+	(INSERT_WORDS64, SET_HIGH_WORD, SET_LOW_WORD): Likewise.
+	(GET_FLOAT_WORD, SET_FLOAT_WORD): Likewise.
+	* sysdeps/ieee754/dbl-64/wordsize-64/math_private.h: New file.
+	* sysdeps/ieee754/flt-32/math_private.h: New file.
+	* sysdeps/x86_64/fpu/math_private.h: Move the include_next of
+	math_private.h below SET_FLOAT_WORD.
+	(__isnan, __isinf_ns, __finite): Remove.
+	(__isnanf, __isinf_nsf, __finitef): Remove.
+
 2012-03-18  Andreas Schwab  <schwab@linux-m68k.org>
 
 	* sysdeps/powerpc/fpu/libm-test-ulps: Update.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index be1e4d2..908e6b7 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -76,50 +76,59 @@ do {								\
 
 /* Get the more significant 32 bit int from a double.  */
 
-#define GET_HIGH_WORD(i,d)					\
+#ifndef GET_HIGH_WORD
+# define GET_HIGH_WORD(i,d)					\
 do {								\
   ieee_double_shape_type gh_u;					\
   gh_u.value = (d);						\
   (i) = gh_u.parts.msw;						\
 } while (0)
+#endif
 
 /* Get the less significant 32 bit int from a double.  */
 
-#define GET_LOW_WORD(i,d)					\
+#ifndef GET_LOW_WORD
+# define GET_LOW_WORD(i,d)					\
 do {								\
   ieee_double_shape_type gl_u;					\
   gl_u.value = (d);						\
   (i) = gl_u.parts.lsw;						\
 } while (0)
+#endif
 
 /* Get all in one, efficient on 64-bit machines.  */
-#define EXTRACT_WORDS64(i,d)					\
+#ifndef EXTRACT_WORDS64
+# define EXTRACT_WORDS64(i,d)					\
 do {								\
   ieee_double_shape_type gh_u;					\
   gh_u.value = (d);						\
   (i) = gh_u.word;						\
 } while (0)
+#endif
 
 /* Set a double from two 32 bit ints.  */
-
-#define INSERT_WORDS(d,ix0,ix1)					\
+#ifndef INSERT_WORDS
+# define INSERT_WORDS(d,ix0,ix1)				\
 do {								\
   ieee_double_shape_type iw_u;					\
   iw_u.parts.msw = (ix0);					\
   iw_u.parts.lsw = (ix1);					\
   (d) = iw_u.value;						\
 } while (0)
+#endif
 
 /* Get all in one, efficient on 64-bit machines.  */
-#define INSERT_WORDS64(d,i)					\
+#ifndef INSERT_WORDS64
+# define INSERT_WORDS64(d,i)					\
 do {								\
   ieee_double_shape_type iw_u;					\
   iw_u.word = (i);						\
   (d) = iw_u.value;						\
 } while (0)
+#endif
 
 /* Set the more significant 32 bits of a double from an int.  */
-
+#ifndef SET_HIGH_WORD
 #define SET_HIGH_WORD(d,v)					\
 do {								\
   ieee_double_shape_type sh_u;					\
@@ -127,16 +136,18 @@ do {								\
   sh_u.parts.msw = (v);						\
   (d) = sh_u.value;						\
 } while (0)
+#endif
 
 /* Set the less significant 32 bits of a double from an int.  */
-
-#define SET_LOW_WORD(d,v)					\
+#ifndef SET_LOW_WORD
+# define SET_LOW_WORD(d,v)					\
 do {								\
   ieee_double_shape_type sl_u;					\
   sl_u.value = (d);						\
   sl_u.parts.lsw = (v);						\
   (d) = sl_u.value;						\
 } while (0)
+#endif
 
 /* A union which permits us to convert between a float and a 32 bit
    int.  */
@@ -148,22 +159,24 @@ typedef union
 } ieee_float_shape_type;
 
 /* Get a 32 bit int from a float.  */
-
-#define GET_FLOAT_WORD(i,d)					\
+#ifndef GET_FLOAT_WORD
+# define GET_FLOAT_WORD(i,d)					\
 do {								\
   ieee_float_shape_type gf_u;					\
   gf_u.value = (d);						\
   (i) = gf_u.word;						\
 } while (0)
+#endif
 
 /* Set a float from a 32 bit int.  */
-
-#define SET_FLOAT_WORD(d,i)					\
+#ifndef SET_FLOAT_WORD
+# define SET_FLOAT_WORD(d,i)					\
 do {								\
   ieee_float_shape_type sf_u;					\
   sf_u.word = (i);						\
   (d) = sf_u.value;						\
 } while (0)
+#endif
 
 /* Get long double macros from a separate header.  */
 #include <math_ldbl.h>
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/math_private.h b/sysdeps/ieee754/dbl-64/wordsize-64/math_private.h
new file mode 100644
index 0000000..b66085e
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/math_private.h
@@ -0,0 +1,35 @@
+#ifndef _MATH_PRIVATE_H_
+
+#include_next <math_private.h>
+
+#ifndef __isnan
+extern __always_inline int
+__isnan (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
+}
+#endif
+
+#ifndef __isinf_ns
+extern __always_inline int
+__isinf_ns (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
+}
+#endif
+
+#ifndef __finite
+extern __always_inline int
+__finite (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
+}
+#endif
+
+#endif /* _MATH_PRIVATE_H_ */
diff --git a/sysdeps/ieee754/flt-32/math_private.h b/sysdeps/ieee754/flt-32/math_private.h
new file mode 100644
index 0000000..e33db02
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/math_private.h
@@ -0,0 +1,35 @@
+#ifndef _MATH_PRIVATE_H_
+
+#include_next <math_private.h>
+
+#ifndef __isnanf
+extern __always_inline int
+__isnanf (float d)
+{
+  u_int32_t di;
+  GET_FLOAT_WORD (di, d);
+  return (di & 0x7fffffff) > 0x7f800000;
+}
+#endif
+
+#ifndef __isinf_nsf
+extern __always_inline int
+__isinf_nsf (float d)
+{
+  u_int32_t di;
+  GET_FLOAT_WORD (di, d);
+  return (di & 0x7fffffff) == 0x7f800000;
+}
+#endif
+
+#ifndef __finitef
+extern __always_inline int
+__finitef (float d)
+{
+  u_int32_t di;
+  GET_FLOAT_WORD (di, d);
+  return (di & 0x7fffffff) < 0x7f800000;
+}
+#endif
+
+#endif /* _MATH_PRIVATE_H_ */
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index c8616f6..dae9ccc 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -16,8 +16,6 @@
       __asm __volatile ("" : : "f" (x));				      \
   } while (0)
 
-#include_next <math_private.h>
-
 /* We can do a few things better on x86-64.  */
 
 #if defined __AVX__ || defined SSE2AVX
@@ -31,7 +29,6 @@
 #endif
 
 /* Direct movement of float into integer register.  */
-#undef EXTRACT_WORDS64
 #define EXTRACT_WORDS64(i, d)						      \
   do {									      \
     long int i_;							      \
@@ -40,7 +37,6 @@
   } while (0)
 
 /* And the reverse.  */
-#undef INSERT_WORDS64
 #define INSERT_WORDS64(d, i) \
   do {									      \
     long int i_ = i;							      \
@@ -50,7 +46,6 @@
   } while (0)
 
 /* Direct movement of float into integer register.  */
-#undef GET_FLOAT_WORD
 #define GET_FLOAT_WORD(i, d) \
   do {									      \
     int i_;								      \
@@ -59,7 +54,6 @@
   } while (0)
 
 /* And the reverse.  */
-#undef SET_FLOAT_WORD
 #define SET_FLOAT_WORD(f, i) \
   do {									      \
     int i_ = i;								      \
@@ -68,27 +62,7 @@
     f = f__;								      \
   } while (0)
 
-
-#define __isnan(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
-     (__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })
-#define __isnanf(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
-     (__di & 0x7fffffff) > 0x7f800000; })
-
-#define __isinf_ns(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
-     (__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })
-#define __isinf_nsf(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
-     (__di & 0x7fffffff) == 0x7f800000; })
-
-#define __finite(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
-     (__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })
-#define __finitef(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
-     (__di & 0x7fffffff) < 0x7f800000; })
+#include_next <math_private.h>
 
 extern __always_inline double
 __ieee754_sqrt (double d)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                         |   86 ++++++
 sysdeps/generic/math_private.h                    |  225 +++++++++++++---
 sysdeps/i386/fpu/feholdexcpt.c                    |   19 +-
 sysdeps/i386/fpu/fenv_private.h                   |  304 +++++++++++++++++++++
 sysdeps/i386/fpu/math_private.h                   |   44 +---
 sysdeps/ieee754/dbl-64/e_exp.c                    |    4 +-
 sysdeps/ieee754/dbl-64/e_exp2.c                   |   97 ++++----
 sysdeps/ieee754/dbl-64/e_pow.c                    |    4 +-
 sysdeps/ieee754/dbl-64/s_fma.c                    |   25 +-
 sysdeps/ieee754/dbl-64/s_fmaf.c                   |   12 +-
 sysdeps/ieee754/dbl-64/s_sin.c                    |    8 +-
 sysdeps/ieee754/dbl-64/s_tan.c                    |    4 +-
 sysdeps/ieee754/dbl-64/wordsize-64/math_private.h |   35 +++
 sysdeps/ieee754/flt-32/e_exp2f.c                  |   89 +++---
 sysdeps/ieee754/flt-32/e_expf.c                   |   49 ++--
 sysdeps/ieee754/flt-32/math_private.h             |   35 +++
 sysdeps/x86_64/fpu/feholdexcpt.c                  |   21 +-
 sysdeps/x86_64/fpu/math_private.h                 |  105 +-------
 18 files changed, 808 insertions(+), 358 deletions(-)
 create mode 100644 sysdeps/i386/fpu/fenv_private.h
 create mode 100644 sysdeps/ieee754/dbl-64/wordsize-64/math_private.h
 create mode 100644 sysdeps/ieee754/flt-32/math_private.h


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]