This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PowerPC's math_ldbl.h


Ping.

On 02/22/2013 04:11 PM, Adhemerval Zanella wrote:
> On 02/22/2013 12:46 PM, Joseph S. Myers wrote:
>> On Fri, 22 Feb 2013, Adhemerval Zanella wrote:
>>
>>> 	* sysdeps/unix/sysv/linux/powerpc/Implies: Add powerpc/fpu prior to
>>> 	ieee754/ldbl-128ibm so sysdeps/powerpc/fpu/math_ldbl.h will be included
>>> 	first.
>> That's wrong - powerpc --without-fp will be using 
>> sysdeps/unix/sysv/linux/powerpc but can't use sysdeps/powerpc/fpu.
>>
> Indeed I didn't take nofpu in consideration. This patch now removes the
> sysdeps/unix/sysv/linux/powerpc/Implies and adds the ldbl after ldbl-opt
> on sysdeps/powerpc/Implies. This makes sysdeps/powerpc/fpu not appear with
> nofpu build while add making it show before sysdeps/ieee754/ldbl-128ibm
> so the PPC specific optimization is still used.
>
>
> 2013-02-22  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
>
> 	* sysdeps/ieee754/ldbl-128ibm/math_ldbl.h (ldbl_pack): Renamed to
> 	default_ldbl_pack and using as default implementation.
> 	(ldbl_unpack): Renamed to default_ldbl_unpack and using as default
> 	implementation.
> 	* sysdeps/powerpc/fpu/math_ldbl.h (ldbl_extract_mantissa): removed
> 	redundant definition.
> 	(ldbl_insert_mantissa): Likewise.
> 	(ldbl_canonicalize): Likewise.
> 	(ldbl_nearbyint): Likewise.
> 	(ldbl_pack): Renamed to ldbl_pack_ppc.
> 	(ldbl_unpack): Renamed to ldbl_unpack_ppc.
> 	* sysdeps/unix/sysv/linux/powerpc/Implies: Remove file.
> 	* sysdeps/powerpc/Implies: Add ieee754/ldbl-opt after ieee754/ldbl-128ibm.
>
> --
>
> diff --git a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
> index be9ac71..1cce1fc 100644
> --- a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
> +++ b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
> @@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
>  /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
>     of long double implemented as double double.  */
>  static inline long double
> -ldbl_pack (double a, double aa)
> +default_ldbl_pack (double a, double aa)
>  {
>    union ibm_extended_long_double u;
>    u.dd[0] = a;
> @@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
>  }
>
>  static inline void
> -ldbl_unpack (long double l, double *a, double *aa)
> +default_ldbl_unpack (long double l, double *a, double *aa)
>  {
>    union ibm_extended_long_double u;
>    u.d = l;
> @@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
>    *aa = u.dd[1];
>  }
>
> +#ifndef ldbl_pack
> +# define ldbl_pack   default_ldbl_pack
> +#endif
> +#ifndef ldbl_unpack
> +# define ldbl_unpack default_ldbl_unpack
> +#endif
>
>  /* Convert a finite long double to canonical form.
>     Does not handle +/-Inf properly.  */
> diff --git a/sysdeps/powerpc/Implies b/sysdeps/powerpc/Implies
> index 7ccf9a7..78dba95 100644
> --- a/sysdeps/powerpc/Implies
> +++ b/sysdeps/powerpc/Implies
> @@ -1,4 +1,5 @@
>  # On PowerPC we use the IBM extended long double format.
>  ieee754/ldbl-128ibm
> +ieee754/ldbl-opt
>  ieee754/dbl-64
>  ieee754/flt-32
> diff --git a/sysdeps/powerpc/fpu/math_ldbl.h b/sysdeps/powerpc/fpu/math_ldbl.h
> index 20224e6..36378c0 100644
> --- a/sysdeps/powerpc/fpu/math_ldbl.h
> +++ b/sysdeps/powerpc/fpu/math_ldbl.h
> @@ -2,132 +2,12 @@
>  #error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
>  #endif
>
> -#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
> -#include <ieee754.h>
> -  
> -static inline void
> -ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x)
> -{
> -  /* We have 105 bits of mantissa plus one implicit digit.  Since
> -     106 bits are representable we use the first implicit digit for
> -     the number before the decimal point and the second implicit bit
> -     as bit 53 of the mantissa.  */
> -  unsigned long long hi, lo;
> -  int ediff;
> -  union ibm_extended_long_double eldbl;
> -  eldbl.d = x;
> -  *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
> -
> -  lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
> -  hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
> -  /* If the lower double is not a denomal or zero then set the hidden
> -     53rd bit.  */
> -  if (eldbl.ieee.exponent2 > 0x001)
> -    {
> -      lo |= (1ULL << 52);
> -      lo = lo << 7; /* pre-shift lo to match ieee854.  */
> -      /* The lower double is normalized separately from the upper.  We
> -	 may need to adjust the lower mantissa to reflect this.  */
> -      ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
> -      if (ediff > 53)
> -	lo = lo >> (ediff-53);
> -    }
> -  hi |= (1ULL << 52);
> -  
> -  if ((eldbl.ieee.negative != eldbl.ieee.negative2)
> -      && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
> -    {
> -      hi--;
> -      lo = (1ULL << 60) - lo;
> -      if (hi < (1ULL << 52))
> -	{
> -	  /* we have a borrow from the hidden bit, so shift left 1.  */
> -	  hi = (hi << 1) | (lo >> 59);
> -	  lo = 0xfffffffffffffffLL & (lo << 1);
> -	  *exp = *exp - 1;
> -	}
> -    }
> -  *lo64 = (hi << 60) | lo;
> -  *hi64 = hi >> 4;
> -}
> -
> -static inline long double
> -ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
> -{
> -  union ibm_extended_long_double u;
> -  unsigned long hidden2, lzcount;
> -  unsigned long long hi, lo;
> -
> -  u.ieee.negative = sign;
> -  u.ieee.negative2 = sign;
> -  u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
> -  u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
> -  /* Expect 113 bits (112 bits + hidden) right justified in two longs.
> -     The low order 53 bits (52 + hidden) go into the lower double */ 
> -  lo = (lo64 >> 7)& ((1ULL << 53) - 1);
> -  hidden2 = (lo64 >> 59) &  1ULL;
> -  /* The high order 53 bits (52 + hidden) go into the upper double */
> -  hi = (lo64 >> 60) & ((1ULL << 11) - 1);
> -  hi |= (hi64 << 4);
> -
> -  if (lo != 0LL)
> -    {
> -      /* hidden2 bit of low double controls rounding of the high double.
> -	 If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
> -	 plus change the sign of the low double to compensate.  */
> -      if (hidden2)
> -	{
> -	  hi++;
> -	  u.ieee.negative2 = !sign;
> -	  lo = (1ULL << 53) - lo;
> -	}
> -      /* The hidden bit of the lo mantissa is zero so we need to
> -	 normalize the it for the low double.  Shift it left until the
> -	 hidden bit is '1' then adjust the 2nd exponent accordingly.  */ 
> -
> -      if (sizeof (lo) == sizeof (long))
> -	lzcount = __builtin_clzl (lo);
> -      else if ((lo >> 32) != 0)
> -	lzcount = __builtin_clzl ((long) (lo >> 32));
> -      else
> -	lzcount = __builtin_clzl ((long) lo) + 32;
> -      lzcount = lzcount - 11;
> -      if (lzcount > 0)
> -	{
> -	  int expnt2 = u.ieee.exponent2 - lzcount;
> -	  if (expnt2 >= 1)
> -	    {
> -	      /* Not denormal.  Normalize and set low exponent.  */
> -	      lo = lo << lzcount;
> -	      u.ieee.exponent2 = expnt2;
> -	    }
> -	  else
> -	    {
> -	      /* Is denormal.  */
> -	      lo = lo << (lzcount + expnt2);
> -	      u.ieee.exponent2 = 0;
> -	    }
> -	}
> -    }
> -  else
> -    {
> -      u.ieee.negative2 = 0;
> -      u.ieee.exponent2 = 0;
> -    }
> -
> -  u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
> -  u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
> -  u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
> -  u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
> -  return u.d;
> -}
> -  
> -/* gcc generates disgusting code to pack and unpack long doubles.
> -   This tells gcc that pack/unpack is really a nop.  We use fr1/fr2
> -   because those are the regs used to pass/return a single
> -   long double arg.  */
> +/* GCC does not optimize the default ldbl_pack code to not spill register
> +   in the stack. The following optimization tells gcc that pack/unpack
> +   is really a nop.  We use fr1/fr2 because those are the regs used to
> +   pass/return a single long double arg.  */
>  static inline long double
> -ldbl_pack (double a, double aa)
> +ldbl_pack_ppc (double a, double aa)
>  {
>    register long double x __asm__ ("fr1");
>    register double xh __asm__ ("fr1");
> @@ -139,7 +19,7 @@ ldbl_pack (double a, double aa)
>  }
>
>  static inline void
> -ldbl_unpack (long double l, double *a, double *aa)
> +ldbl_unpack_ppc (long double l, double *a, double *aa)
>  {
>    register long double x __asm__ ("fr1");
>    register double xh __asm__ ("fr1");
> @@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa)
>    *aa = xl;
>  }
>
> +#define ldbl_pack   ldbl_pack_ppc
> +#define ldbl_unpack ldbl_unpack_ppc
>
> -/* Convert a finite long double to canonical form.
> -   Does not handle +/-Inf properly.  */
> -static inline void
> -ldbl_canonicalize (double *a, double *aa)
> -{
> -  double xh, xl;
> -
> -  xh = *a + *aa;
> -  xl = (*a - xh) + *aa;
> -  *a = xh;
> -  *aa = xl;
> -}
> -
> -/* Simple inline nearbyint (double) function .
> -   Only works in the default rounding mode
> -   but is useful in long double rounding functions.  */
> -static inline double
> -ldbl_nearbyint (double a)
> -{
> -  double two52 = 0x10000000000000LL;
> -
> -  if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
> -    {
> -      if (__builtin_expect ((a > 0.0), 1))
> -	{
> -	  a += two52;
> -	  a -= two52;
> -	}
> -      else if (__builtin_expect ((a < 0.0), 1))
> -	{
> -	  a = two52 - a;
> -	  a = -(a - two52);
> -	}
> -    }
> -  return a;
> -}
> +#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h>
> diff --git a/sysdeps/unix/sysv/linux/powerpc/Implies b/sysdeps/unix/sysv/linux/powerpc/Implies
> deleted file mode 100644
> index ff27cdb..0000000
> --- a/sysdeps/unix/sysv/linux/powerpc/Implies
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -# Make sure these routines come before ldbl-opt.
> -ieee754/ldbl-128ibm
> -# These supply the ABI compatibility for when long double was double.
> -ieee754/ldbl-opt


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]