This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Bug report on pow() for i386 targets

From: Takaki Makino <t at snowelm dot com>
To: newlib at sources dot redhat dot com
Date: Fri, 28 Aug 2009 14:45:13 +0900
Subject: Bug report on pow() for i386 targets

Dear newlib maintainers,

I found a bug on pow() function in cygwin environment, and I think it is
caused by a problem in _f_pow.c specialized for i386 in newlib.
The following is the description and a proposed patch.
I hope this report helps you.
Thank you for maintaining such an important project!

Best regards,
-- 
Takaki Makino <t@snowelm.com>


############################
# How to reproduce the bug #
############################

Install the most recent cygwin-1.7 environment, and compile
the following test code with -ffast-math option.
------------------------------------------------------------------------
#include <math.h>
#include <stdio.h>

void test( int ) __attribute__((noinline));

int main()
{
    test(3);
}

void test( int x ) 
{
    int i;
    for( i=-x; i<=x; i++ ) {
        printf(" pow(%d,%d) = %g\n", i, i, pow(i, i) );
    }
}
------------------------------------------------------------------------

The following is the result on my PC:
------------------------------------------------------------------------
$ uname -a
CYGWIN_NT-6.1 pocahontas 1.7.0(0.212/5/3) 2009-08-20 10:56 i686 Cygwin
$ gcc -o test -ffast-math test.c
$ ./test
 pow(-3,-3) = -0.037037
 pow(-2,-2) = 0.25
 pow(-1,-1) = -1
 pow(0,0) = 1
 pow(1,1) = 9.97338e-313
 pow(2,2) = 0
 pow(3,3) = 0
------------------------------------------------------------------------
As you can see, pow() produce incorrect results for i >= 1.
Without -ffast-math, no problem happens.

##########################
#    Cause of the bug    #
##########################

I found the bug is in _f_pow function specialized for i386, which is
stored in cygwin1.dll. From the following disassembly of _f_pow in the
latest cygwin-1.7, you can see that "leave" at 0x610e95f9
precedes "fldl 0x8(%ebp)" at 0x610e95fa, which means that
the function uses %ebp after destroying %ebp.  
Presumably,  _f_pow in newlib is not compatible to some additional
optimization in new gcc (but it seems that new gcc is used to compile
cygwin1.dll...)
------------------------------------------------------------------------
Dump of assembler code for function _f_pow:
0x610e95c0 <_f_pow+0>:  push   %ebp
0x610e95c1 <_f_pow+1>:  mov    %esp,%ebp
0x610e95c3 <_f_pow+3>:  sub    $0x8,%esp
0x610e95c6 <_f_pow+6>:  fldl   0x8(%ebp)
0x610e95c9 <_f_pow+9>:  fldl   0x10(%ebp)
0x610e95cc <_f_pow+12>: fldz   
0x610e95ce <_f_pow+14>: fxch   %st(2)
0x610e95d0 <_f_pow+16>: fucom  %st(2)
0x610e95d2 <_f_pow+18>: fnstsw %ax
0x610e95d4 <_f_pow+20>: fstp   %st(2)
0x610e95d6 <_f_pow+22>: sahf   
0x610e95d7 <_f_pow+23>: jbe    0x610e95eb <_f_pow+43>
0x610e95d9 <_f_pow+25>: fstl   -0x8(%ebp)
0x610e95dc <_f_pow+28>: mov    -0x4(%ebp),%eax
0x610e95df <_f_pow+31>: and    $0x7fffffff,%eax
0x610e95e4 <_f_pow+36>: sub    $0x7ff00000,%eax
0x610e95e9 <_f_pow+41>: js     0x610e95f7 <_f_pow+55>
0x610e95eb <_f_pow+43>: fstpl  0x10(%ebp)
0x610e95ee <_f_pow+46>: fstpl  0x8(%ebp)
0x610e95f1 <_f_pow+49>: leave  
0x610e95f2 <_f_pow+50>: jmp    0x610ecd60 <pow>
0x610e95f7 <_f_pow+55>: fstp   %st(1)
0x610e95f9 <_f_pow+57>: leave  
0x610e95fa <_f_pow+58>: fldl   0x8(%ebp)
0x610e95fd <_f_pow+61>: fyl2x  
0x610e95ff <_f_pow+63>: fld    %st(0)
0x610e9601 <_f_pow+65>: frndint 
0x610e9603 <_f_pow+67>: fsub   %st,%st(1)
0x610e9605 <_f_pow+69>: fxch   %st(1)
0x610e9607 <_f_pow+71>: fchs   
0x610e9609 <_f_pow+73>: f2xm1  
0x610e960b <_f_pow+75>: fld1   
0x610e960d <_f_pow+77>: faddp  %st,%st(1)
0x610e960f <_f_pow+79>: fxch   %st(1)
0x610e9611 <_f_pow+81>: fld1   
0x610e9613 <_f_pow+83>: fscale 
0x610e9615 <_f_pow+85>: fstp   %st(1)
0x610e9617 <_f_pow+87>: fmulp  %st,%st(1)
0x610e9619 <_f_pow+89>: ret    
------------------------------------------------------------------------

##########################
#   Possible solution    #
##########################
The following patch prevents the new gcc from moving "leave" before 
accessing stack frame.  It seems to work on gcc-4.3 but I'm not sure
whether it works on older versions of gcc as well.
------------------------------------------------------------------------
diff -c -r src/newlib/libm/machine/i386/f_pow.c src-new/newlib/libm/machine/i386/f_pow.c
*** src/newlib/libm/machine/i386/f_pow.c	Sat Dec 21 06:31:20 2002
--- src-new/newlib/libm/machine/i386/f_pow.c	Fri Aug 28 14:27:35 2009
***************
*** 35,43 ****
        /* calculate x ** y as 2 ** (y log2(x)).  On Intel, can only
           raise 2 to an integer or a small fraction, thus, we have
           to perform two steps 2**integer portion * 2**fraction. */
!       asm ("fldl 8(%%ebp); fyl2x; fld %%st; frndint; fsub %%st,%%st(1);" \
             "fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
!            "fmulp" : "=t" (result) : "0" (y));
        return result;
      }
    else /* all other strange cases, defer to normal pow() */
--- 35,43 ----
        /* calculate x ** y as 2 ** (y log2(x)).  On Intel, can only
           raise 2 to an integer or a small fraction, thus, we have
           to perform two steps 2**integer portion * 2**fraction. */
!       asm ("fyl2x; fld %%st; frndint; fsub %%st,%%st(1);"\
             "fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
!            "fmulp" : "=t" (result) : "0" (x), "u" (y) : "st(1)" );
        return result;
      }
    else /* all other strange cases, defer to normal pow() */
diff -c -r src/newlib/libm/machine/i386/f_powf.c src-new/newlib/libm/machine/i386/f_powf.c
*** src/newlib/libm/machine/i386/f_powf.c	Sat Dec 21 06:31:20 2002
--- src-new/newlib/libm/machine/i386/f_powf.c	Fri Aug 28 14:27:35 2009
***************
*** 35,43 ****
        /* calculate x ** y as 2 ** (y log2(x)).  On Intel, can only
           raise 2 to an integer or a small fraction, thus, we have
           to perform two steps 2**integer portion * 2**fraction. */
!       asm ("flds 8(%%ebp); fyl2x; fld %%st; frndint; fsub %%st,%%st(1);" \
             "fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
!            "fmulp" : "=t" (result) : "0" (y));
        return result;
      }
    else /* all other strange cases, defer to normal pow() */
--- 35,43 ----
        /* calculate x ** y as 2 ** (y log2(x)).  On Intel, can only
           raise 2 to an integer or a small fraction, thus, we have
           to perform two steps 2**integer portion * 2**fraction. */
!       asm ("fyl2x; fld %%st; frndint; fsub %%st,%%st(1);"\
             "fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
!            "fmulp" : "=t" (result) : "0" (x), "u" (y) : "st(1)" );
        return result;
      }
    else /* all other strange cases, defer to normal pow() */
------------------------------------------------------------------------

Follow-Ups:
- RE: Bug report on pow() for i386 targets
  - From: Howland Craig D (Craig)
- Re: Bug report on pow() for i386 targets
  - From: Jeff Johnston

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]