This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 4/8] powerpc: Convert __ieee754_sqrt{,f} from macros toinlines.
On Wed, 2012-03-07 at 14:10 -0800, Richard Henderson wrote:
> * sysdeps/powerpc/fpu/math_private.h (__ieee754_sqrt): Convert
> from macro to inline function; merge with the
> !__LIBC_INTERNAL_MATH_INLINES version.
> (__ieee754_sqrtf): Likewise.
>
> Cc: Ryan S. Arnold <rsa@us.ibm.com>
> ---
> sysdeps/powerpc/fpu/math_private.h | 106 ++++++++++++------------------------
> 1 files changed, 36 insertions(+), 70 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 7bacecb..6174bf8 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -23,35 +23,49 @@
> #include <sysdep.h>
> #include <ldsodefs.h>
> #include <dl-procinfo.h>
> -
> #include <math/math_private.h>
>
> # if __WORDSIZE == 64 || defined _ARCH_PWR4
> # define __CPU_HAS_FSQRT 1
> -
> -#ifndef __ieee754_sqrt
> -# define __ieee754_sqrt(x) \
> - ({ double __z; \
> - __asm __volatile ( \
> - " fsqrt %0,%1\n" \
> - : "=f" (__z) \
> - : "f"(x)); \
> - __z; })
> -#endif
> -#ifndef __ieee754_sqrtf
> -# define __ieee754_sqrtf(x) \
> - ({ float __z; \
> - __asm __volatile ( \
> - " fsqrts %0,%1\n" \
> - : "=f" (__z) \
> - : "f"(x)); \
> - __z; })
> -#endif
> -
> # else
> # define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
> -# endif // __WORDSIZE == 64 || defined _ARCH_PWR4
> +# endif
> +
> +extern double __slow_ieee754_sqrt (double);
> +extern inline double
> +__ieee754_sqrt (double __x)
> +{
> + double __z;
>
> + if (__CPU_HAS_FSQRT)
> + {
> + /* Volatile is required to prevent the compiler from moving the
> + fsqrt instruction above the branch. */
> + __asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
> + }
> + else
> + __z = __slow_ieee754_sqrt(__x);
> +
> + return __z;
> +}
> +
> +extern float __slow_ieee754_sqrtf (float);
> +extern inline float
> +__ieee754_sqrtf (float __x)
> +{
> + float __z;
> +
> + if (__CPU_HAS_FSQRT)
> + {
> + /* Volatile is required to prevent the compiler from moving the
> + fsqrts instruction above the branch. */
> + __asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
> + }
> + else
> + __z = __slow_ieee754_sqrtf(__x);
> +
> + return __z;
> +}
>
> #if defined _ARCH_PWR5X
>
> @@ -162,52 +176,4 @@
>
> #endif /* defined _ARCH_PWR6 */
>
> -
> -# ifndef __LIBC_INTERNAL_MATH_INLINES
> -extern double __slow_ieee754_sqrt (double);
> -__inline double
> -__ieee754_sqrt (double __x)
> -{
> - double __z;
> -
> - /* If the CPU is 64-bit we can use the optional FP instructions. */
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrt instruction above the branch. */
> - __asm __volatile (
> - " fsqrt %0,%1\n"
> - : "=f" (__z)
> - : "f" (__x));
> - }
> - else
> - __z = __slow_ieee754_sqrt(__x);
> -
> - return __z;
> -}
> -
> -extern float __slow_ieee754_sqrtf (float);
> -
> -__inline float
> -__ieee754_sqrtf (float __x)
> -{
> - float __z;
> -
> - /* If the CPU is 64-bit we can use the optional FP instructions. */
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrts instruction above the branch. */
> - __asm __volatile (
> - " fsqrts %0,%1\n"
> - : "=f" (__z)
> - : "f" (__x));
> - }
> - else
> - __z = __slow_ieee754_sqrtf(__x);
> -
> - return __z;
> -}
> -#endif /* __LIBC_INTERNAL_MATH_INLINES */
> -
> #endif /* _PPC_MATH_PRIVATE_H_ */
Hi Richard,
I'm fine with these changes. Adhemerval looked at the code gen of these
on Power7 and the fsqrt was generated without a branch test so we're
satisfied.
Thanks,
Ryan