This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] PPC clean from powerpc-cpu merge
- From: Steven Munroe <munroesj at us dot ibm dot com>
- To: libc-alpha at sources dot redhat dot com, Ryan Arnold <rsa at us dot ibm dot com>
- Date: Tue, 10 Jul 2007 15:19:57 -0500
- Subject: [PATCH] PPC clean from powerpc-cpu merge
Realized that gcc defaults to -mcpu=common/powerpc64 which assumes this
ISA V1.0 and the old style branch hints. The ISA V2.0+ introduced the
new style branch hints.
(http://www.power.org/resources/downloads/PowerISA_Public.pdf page 28)
This can be a problem as the --with-cpu= configure only asserts -mcpu=
to gcc for "c" files (not *.S" assembler files). The encoding of the ISA
V1.0 branch hints when used on ISA V2.0+ hardware results in "No branch
hint given" (effectively ignored). To make the branch hints effective we
need to either pass -mcpu=power4 to as or insert the appropriate
".machine power#" statement in the source. The attached patch takes the
.machine approach.
Also found that ./sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S was
missing and include for math_ldbl_opt.h which hosed the version symbol
generation.
2007-07-07 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/powerpc32/power4/memcmp.S: Specify .machine power4
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc32/power4/memcpy.S: Specify .machine power4
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc32/power4/memset.S: Specify .machine power4
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc32/power6/memcpy.S: Specify .machine power6
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc64/power4/memcmp.S: Specify .machine power4
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc64/power4/memcpy.S: Specify .machine power4
to get ISA-V2.0 branch hints.
* sysdeps/powerpc/powerpc64/power4/memset.S: Specify .machine power4
to get ISA-V2.0 branch hints. Remove toc ref to __cache_line_size.
* sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S:
Include math_ldbl_opt.h
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcmp.S libc25/sysdeps/powerpc/powerpc32/power4/memcmp.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcmp.S 2007-06-03 15:49:26.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memcmp.S 2007-07-10 14:42:31.508087560 -0500
@@ -23,6 +23,7 @@
/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+ .machine power4
EALIGN (BP_SYM(memcmp), 4, 0)
CALL_MCOUNT
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcpy.S libc25/sysdeps/powerpc/powerpc32/power4/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcpy.S 2007-06-03 15:49:42.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memcpy.S 2007-07-10 14:40:48.066969976 -0500
@@ -34,6 +34,7 @@
possible when both source and destination are word aligned.
Each case has an optimized unrolled loop. */
+ .machine power4
EALIGN (BP_SYM (memcpy), 5, 0)
CALL_MCOUNT
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memset.S libc25/sysdeps/powerpc/powerpc32/power4/memset.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memset.S 2007-06-03 15:49:48.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memset.S 2007-06-18 14:49:20.000000000 -0500
@@ -28,6 +28,7 @@
cache line (1024 bits). There is a special case for setting cache lines
to 0, to take advantage of the dcbz instruction. */
+ .machine power4
EALIGN (BP_SYM (memset), 5, 0)
CALL_MCOUNT
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S libc25/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2007-06-03 15:54:19.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2007-06-21 16:33:55.000000000 -0500
@@ -18,6 +18,7 @@
02110-1301 USA. */
#include <sysdep.h>
+#include <math_ldbl_opt.h>
/* long long int[r3, r4] __llrint (double x[fp1]) */
ENTRY (__llrint)
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/memcpy.S libc25/sysdeps/powerpc/powerpc32/power6/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/memcpy.S 2007-06-03 15:53:47.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power6/memcpy.S 2007-07-10 14:40:14.676989000 -0500
@@ -34,6 +34,7 @@
possible when both source and destination are word aligned.
Each case has an optimized unrolled loop. */
+ .machine power6
EALIGN (BP_SYM (memcpy), 5, 0)
CALL_MCOUNT
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcmp.S libc25/sysdeps/powerpc/powerpc64/power4/memcmp.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcmp.S 2007-06-03 16:14:29.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memcmp.S 2007-07-10 14:42:47.355074944 -0500
@@ -23,6 +23,7 @@
/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+ .machine power4
EALIGN (BP_SYM(memcmp), 4, 0)
CALL_MCOUNT 3
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcpy.S libc25/sysdeps/powerpc/powerpc64/power4/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcpy.S 2007-06-03 16:14:43.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memcpy.S 2007-07-10 14:41:21.642057832 -0500
@@ -36,6 +36,7 @@
posible when both source and destination are doubleword aligned.
Each case has a optimized unrolled loop. */
+ .machine power4
EALIGN (BP_SYM (memcpy), 5, 0)
CALL_MCOUNT 3
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memset.S libc25/sysdeps/powerpc/powerpc64/power4/memset.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memset.S 2007-06-03 16:14:50.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memset.S 2007-07-09 11:17:33.449056568 -0500
@@ -22,12 +22,6 @@
#include <bp-sym.h>
#include <bp-asm.h>
- .section ".toc","aw"
-.LC0:
- .tc __cache_line_size[TC],__cache_line_size
- .section ".text"
- .align 2
-
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
@@ -35,6 +29,7 @@
cache line (256 bits). There is a special case for setting cache lines
to 0, to take advantage of the dcbz instruction. */
+ .machine power4
EALIGN (BP_SYM (memset), 5, 0)
CALL_MCOUNT 3