This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
Bug report on pow() for i386 targets
- From: Takaki Makino <t at snowelm dot com>
- To: newlib at sources dot redhat dot com
- Date: Fri, 28 Aug 2009 14:45:13 +0900
- Subject: Bug report on pow() for i386 targets
Dear newlib maintainers,
I found a bug on pow() function in cygwin environment, and I think it is
caused by a problem in _f_pow.c specialized for i386 in newlib.
The following is the description and a proposed patch.
I hope this report helps you.
Thank you for maintaining such an important project!
Best regards,
--
Takaki Makino <t@snowelm.com>
############################
# How to reproduce the bug #
############################
Install the most recent cygwin-1.7 environment, and compile
the following test code with -ffast-math option.
------------------------------------------------------------------------
#include <math.h>
#include <stdio.h>
void test( int ) __attribute__((noinline));
int main()
{
test(3);
}
void test( int x )
{
int i;
for( i=-x; i<=x; i++ ) {
printf(" pow(%d,%d) = %g\n", i, i, pow(i, i) );
}
}
------------------------------------------------------------------------
The following is the result on my PC:
------------------------------------------------------------------------
$ uname -a
CYGWIN_NT-6.1 pocahontas 1.7.0(0.212/5/3) 2009-08-20 10:56 i686 Cygwin
$ gcc -o test -ffast-math test.c
$ ./test
pow(-3,-3) = -0.037037
pow(-2,-2) = 0.25
pow(-1,-1) = -1
pow(0,0) = 1
pow(1,1) = 9.97338e-313
pow(2,2) = 0
pow(3,3) = 0
------------------------------------------------------------------------
As you can see, pow() produce incorrect results for i >= 1.
Without -ffast-math, no problem happens.
##########################
# Cause of the bug #
##########################
I found the bug is in _f_pow function specialized for i386, which is
stored in cygwin1.dll. From the following disassembly of _f_pow in the
latest cygwin-1.7, you can see that "leave" at 0x610e95f9
precedes "fldl 0x8(%ebp)" at 0x610e95fa, which means that
the function uses %ebp after destroying %ebp.
Presumably, _f_pow in newlib is not compatible to some additional
optimization in new gcc (but it seems that new gcc is used to compile
cygwin1.dll...)
------------------------------------------------------------------------
Dump of assembler code for function _f_pow:
0x610e95c0 <_f_pow+0>: push %ebp
0x610e95c1 <_f_pow+1>: mov %esp,%ebp
0x610e95c3 <_f_pow+3>: sub $0x8,%esp
0x610e95c6 <_f_pow+6>: fldl 0x8(%ebp)
0x610e95c9 <_f_pow+9>: fldl 0x10(%ebp)
0x610e95cc <_f_pow+12>: fldz
0x610e95ce <_f_pow+14>: fxch %st(2)
0x610e95d0 <_f_pow+16>: fucom %st(2)
0x610e95d2 <_f_pow+18>: fnstsw %ax
0x610e95d4 <_f_pow+20>: fstp %st(2)
0x610e95d6 <_f_pow+22>: sahf
0x610e95d7 <_f_pow+23>: jbe 0x610e95eb <_f_pow+43>
0x610e95d9 <_f_pow+25>: fstl -0x8(%ebp)
0x610e95dc <_f_pow+28>: mov -0x4(%ebp),%eax
0x610e95df <_f_pow+31>: and $0x7fffffff,%eax
0x610e95e4 <_f_pow+36>: sub $0x7ff00000,%eax
0x610e95e9 <_f_pow+41>: js 0x610e95f7 <_f_pow+55>
0x610e95eb <_f_pow+43>: fstpl 0x10(%ebp)
0x610e95ee <_f_pow+46>: fstpl 0x8(%ebp)
0x610e95f1 <_f_pow+49>: leave
0x610e95f2 <_f_pow+50>: jmp 0x610ecd60 <pow>
0x610e95f7 <_f_pow+55>: fstp %st(1)
0x610e95f9 <_f_pow+57>: leave
0x610e95fa <_f_pow+58>: fldl 0x8(%ebp)
0x610e95fd <_f_pow+61>: fyl2x
0x610e95ff <_f_pow+63>: fld %st(0)
0x610e9601 <_f_pow+65>: frndint
0x610e9603 <_f_pow+67>: fsub %st,%st(1)
0x610e9605 <_f_pow+69>: fxch %st(1)
0x610e9607 <_f_pow+71>: fchs
0x610e9609 <_f_pow+73>: f2xm1
0x610e960b <_f_pow+75>: fld1
0x610e960d <_f_pow+77>: faddp %st,%st(1)
0x610e960f <_f_pow+79>: fxch %st(1)
0x610e9611 <_f_pow+81>: fld1
0x610e9613 <_f_pow+83>: fscale
0x610e9615 <_f_pow+85>: fstp %st(1)
0x610e9617 <_f_pow+87>: fmulp %st,%st(1)
0x610e9619 <_f_pow+89>: ret
------------------------------------------------------------------------
##########################
# Possible solution #
##########################
The following patch prevents the new gcc from moving "leave" before
accessing stack frame. It seems to work on gcc-4.3 but I'm not sure
whether it works on older versions of gcc as well.
------------------------------------------------------------------------
diff -c -r src/newlib/libm/machine/i386/f_pow.c src-new/newlib/libm/machine/i386/f_pow.c
*** src/newlib/libm/machine/i386/f_pow.c Sat Dec 21 06:31:20 2002
--- src-new/newlib/libm/machine/i386/f_pow.c Fri Aug 28 14:27:35 2009
***************
*** 35,43 ****
/* calculate x ** y as 2 ** (y log2(x)). On Intel, can only
raise 2 to an integer or a small fraction, thus, we have
to perform two steps 2**integer portion * 2**fraction. */
! asm ("fldl 8(%%ebp); fyl2x; fld %%st; frndint; fsub %%st,%%st(1);" \
"fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
! "fmulp" : "=t" (result) : "0" (y));
return result;
}
else /* all other strange cases, defer to normal pow() */
--- 35,43 ----
/* calculate x ** y as 2 ** (y log2(x)). On Intel, can only
raise 2 to an integer or a small fraction, thus, we have
to perform two steps 2**integer portion * 2**fraction. */
! asm ("fyl2x; fld %%st; frndint; fsub %%st,%%st(1);"\
"fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
! "fmulp" : "=t" (result) : "0" (x), "u" (y) : "st(1)" );
return result;
}
else /* all other strange cases, defer to normal pow() */
diff -c -r src/newlib/libm/machine/i386/f_powf.c src-new/newlib/libm/machine/i386/f_powf.c
*** src/newlib/libm/machine/i386/f_powf.c Sat Dec 21 06:31:20 2002
--- src-new/newlib/libm/machine/i386/f_powf.c Fri Aug 28 14:27:35 2009
***************
*** 35,43 ****
/* calculate x ** y as 2 ** (y log2(x)). On Intel, can only
raise 2 to an integer or a small fraction, thus, we have
to perform two steps 2**integer portion * 2**fraction. */
! asm ("flds 8(%%ebp); fyl2x; fld %%st; frndint; fsub %%st,%%st(1);" \
"fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
! "fmulp" : "=t" (result) : "0" (y));
return result;
}
else /* all other strange cases, defer to normal pow() */
--- 35,43 ----
/* calculate x ** y as 2 ** (y log2(x)). On Intel, can only
raise 2 to an integer or a small fraction, thus, we have
to perform two steps 2**integer portion * 2**fraction. */
! asm ("fyl2x; fld %%st; frndint; fsub %%st,%%st(1);"\
"fxch; fchs; f2xm1; fld1; faddp; fxch; fld1; fscale; fstp %%st(1);"\
! "fmulp" : "=t" (result) : "0" (x), "u" (y) : "st(1)" );
return result;
}
else /* all other strange cases, defer to normal pow() */
------------------------------------------------------------------------