This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Speed up libm on MIPS


On 09/19/2013 06:56 PM, Steve Ellcey wrote:
> This patch defines various inline routines and macros used by the math
> library in order to speed up libm on MIPS.  It does not affect
> soft-float builds but for hard-float builds 'make bench' shows a
> speed-up.  With an o32 little-endian glibc, sin() went from 27792.6
> iter/s to 31293.6 iter/s.  On n32 it went from 32955.2 to 36179.7 and on
> n64 from 33074.7 to 36242. exp() went from 45742.4 to 56511.2 on o32 and
> pow() went from 19008.8 to 20508.7.  I have attached the original and
> new bench.out files for o32, n32, and n64 ABIs in case you want to see
> more of the data.  These are all little-endian hard-float runs.
> 
> I ran 'make check' and 'make bench' using the o32, n32, and n64 ABIs
> with big and little endian and with hard and soft float to verify there
> were no failures.  I did run into an unrelated problem that is being
> fixed (https://sourceware.org/ml/libc-alpha/2013-09/msg00601.html) but
> there were no other failures except the expected ones for MIPS.

Thank you for running `make bench' and posting the results. I appreciate
you going out of your way to make the measurements and post them.

> OK for checkin?

Does MIPS have a slow fpu save/restore?

Does using HAVE_RM_CTX speed anything up for you?

For example see 2506109403de69bd454de27835d42e6eb6ec3abc

Two nits below.

> Steve Ellcey
> sellcey@mips.com
> 
> 
> 2013-09-18  Steve Ellcey  <sellcey@mips.com>
> 
> 	* sysdeps/mips/math_private.h (libc_feholdexcept_mips): New function.
> 	(libc_feholdexcept): New macro.
> 	(libc_feholdexceptf): New macro.
> 	(libc_feholdexceptl): New macro.
> 	(libc_fesetround_mips): New function.
> 	(libc_fesetround): New macro.
> 	(libc_fesetroundf): New macro.
> 	(libc_fesetroundl): New macro.
> 	(libc_feholdexcept_setround_mips): New function.
> 	(libc_feholdexcept_setround): New macro.
> 	(libc_feholdexcept_setroundf): New macro.
> 	(libc_feholdexcept_setroundl): New macro.
> 	(libc_fesetenv_mips): New function.
> 	(libc_fesetenv): New macro.
> 	(libc_fesetenvf): New macro.
> 	(libc_fesetenvl): New macro.
> 	(libc_feupdateenv_mips): New function.
> 	(libc_feupdateenv): New macro.
> 	(libc_feupdateenvf): New macro.
> 	(libc_feupdateenvl): New macro.
> 
> 
> mips-libm.patch
> 
> 
> diff --git a/ports/sysdeps/mips/math_private.h b/ports/sysdeps/mips/math_private.h
> index 6b99957..0ac18fd 100644
> --- a/ports/sysdeps/mips/math_private.h
> +++ b/ports/sysdeps/mips/math_private.h
> @@ -26,6 +26,119 @@
>  # define HIGH_ORDER_BIT_IS_SET_FOR_SNAN
>  #endif
>  
> +/* Inline functions to speed up the math library implementation.  The
> +   default versions of these routines are in generic/math_private.h
> +   and call fesetround, feholdexcept, etc.  These routines use inlined
> +   code instead.  */
> +
> +#ifdef __mips_hard_float
> +
> +#include <fenv.h>
> +#include <fenv_libc.h>
> +#include <fpu_control.h>
> +
> +static __always_inline void
> +libc_feholdexcept_mips (fenv_t *envp)
> +{
> +  fpu_control_t cw;
> +
> +  /* Save the current state.  */
> +  _FPU_GETCW (cw);
> +  envp->__fp_control_register = cw;
> +
> +  /* Clear all exception enable bits and flags.  */
> +  cw &= ~(_FPU_MASK_V|_FPU_MASK_Z|_FPU_MASK_O|_FPU_MASK_U|_FPU_MASK_I|FE_ALL_EXCEPT);
> +  _FPU_SETCW (cw);
> +}
> +#define libc_feholdexcept libc_feholdexcept_mips
> +#define libc_feholdexceptf libc_feholdexcept_mips
> +#define libc_feholdexceptl libc_feholdexcept_mips
> +
> +static __always_inline void
> +libc_fesetround_mips (int round)
> +{
> +  fpu_control_t cw;
> +
> +  /* Get current state.  */
> +  _FPU_GETCW (cw);
> +
> +  /* Set rounding bits.  */
> +  cw &= ~0x3;

What's the magic ~0x3? Should it be a new macro?

> +  cw |= round;
> +
> +  /* Set new state.  */
> +  _FPU_SETCW (cw);
> +}
> +#define libc_fesetround libc_fesetround_mips
> +#define libc_fesetroundf libc_fesetround_mips
> +#define libc_fesetroundl libc_fesetround_mips
> +
> +static __always_inline void
> +libc_feholdexcept_setround_mips (fenv_t *envp, int round)
> +{
> +  fpu_control_t cw;
> +
> +  /* Save the current state.  */
> +  _FPU_GETCW (cw);
> +  envp->__fp_control_register = cw;
> +
> +  /* Clear all exception enable bits and flags.  */
> +  cw &= ~(_FPU_MASK_V|_FPU_MASK_Z|_FPU_MASK_O|_FPU_MASK_U|_FPU_MASK_I|FE_ALL_EXCEPT);
> +
> +  /* Set rounding bits.  */
> +  cw &= ~0x3;

Likewise?

> +  cw |= round;
> +
> +  /* Set new state.  */
> +  _FPU_SETCW (cw);
> +}
> +#define libc_feholdexcept_setround libc_feholdexcept_setround_mips
> +#define libc_feholdexcept_setroundf libc_feholdexcept_setround_mips
> +#define libc_feholdexcept_setroundl libc_feholdexcept_setround_mips
> +
> +static __always_inline void
> +libc_fesetenv_mips (fenv_t *envp)
> +{
> +  fpu_control_t cw;
> +
> +  /* Read first current state to flush fpu pipeline.  */
> +  _FPU_GETCW (cw);
> +
> +  if (envp == FE_DFL_ENV)
> +    _FPU_SETCW (_FPU_DEFAULT);
> +  else if (envp == FE_NOMASK_ENV)
> +    _FPU_SETCW (_FPU_IEEE);
> +  else
> +    _FPU_SETCW (envp->__fp_control_register);
> +}
> +#define libc_fesetenv libc_fesetenv_mips
> +#define libc_fesetenvf libc_fesetenv_mips
> +#define libc_fesetenvl libc_fesetenv_mips
> +
> +static __always_inline void
> +libc_feupdateenv_mips (fenv_t *envp)
> +{
> +  int temp;
> +
> +  /* Save current exceptions.  */
> +  _FPU_GETCW (temp);
> +
> +  /* Set flag bits (which are accumulative), and *also* set the
> +     cause bits. The setting of the cause bits is what actually causes

Two spaces after period.

> +     the hardware to generate the exception, if the corresponding enable
> +     bit is set as well.  */
> +  temp &= FE_ALL_EXCEPT;
> +  temp |= envp->__fp_control_register | (temp << CAUSE_SHIFT);
> +
> +  /* Set new state.  */
> +  _FPU_SETCW (temp);
> +}
> +#define libc_feupdateenv libc_feupdateenv_mips
> +#define libc_feupdateenvf libc_feupdateenv_mips
> +#define libc_feupdateenvl libc_feupdateenv_mips
> +
> +#endif
> +
>  #include_next <math_private.h>
>  
>  #endif

Otherwise looks good to me.

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]