This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.14-209-gad69cc2
- From: davem at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 24 Aug 2011 08:35:13 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.14-209-gad69cc2
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via ad69cc2652c0422ebac3296d914c25e470498ce1 (commit)
from 39dd69dfb9b8948720f05455df6775b70238803f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ad69cc2652c0422ebac3296d914c25e470498ce1
commit ad69cc2652c0422ebac3296d914c25e470498ce1
Author: David S. Miller <davem@davemloft.net>
Date: Wed Aug 24 01:32:24 2011 -0700
Rewritten v9/64-bit sparc strcmp.
diff --git a/ChangeLog b/ChangeLog
index 4fde8c2..ab754e5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2011-08-24 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc64/strcmp.S: Rewrite.
+
2011-08-24 Andreas Schwab <schwab@redhat.com>
* elf/Makefile: Add rules to build and run unload8 test.
diff --git a/sysdeps/sparc/sparc64/strcmp.S b/sysdeps/sparc/sparc64/strcmp.S
index fade4c4..263bb40 100644
--- a/sysdeps/sparc/sparc64/strcmp.S
+++ b/sysdeps/sparc/sparc64/strcmp.S
@@ -1,9 +1,8 @@
/* Compare two strings for differences.
For SPARC v9.
- Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
+ Contributed by David S. Miller <davem@davemloft.net>
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -22,259 +21,182 @@
#include <sysdep.h>
#include <asm/asi.h>
+
#ifndef XCC
.register %g2, #scratch
.register %g3, #scratch
.register %g6, #scratch
#endif
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
+#define rSTR1 %o0
+#define rSTR2 %o1
+#define r0101 %o2 /* 0x0101010101010101 */
+#define r8080 %o3 /* 0x8080808080808080 */
+#define rSTRXOR %o4
+#define rWORD1 %o5
+#define rTMP1 %g1
+#define rTMP2 %g2
+#define rWORD2 %g3
+#define rSLL %g4
+#define rSRL %g5
+#define rBARREL %g6
+
+ /* There are two cases, either the two pointers are aligned
+ * identically or they are not. If they have the same
+ * alignment we can use the normal full speed loop. Otherwise
+ * we have to use the barrel-shifter version.
*/
.text
- .align 32
+ .align 32
ENTRY(strcmp)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- bne,pn %icc, 7f /* CTI */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
-
- andcc %o1, 7, %g3 /* IEU1 */
- bne,pn %icc, 9f /* CTI */
- sllx %g1, 32, %g2 /* IEU0 Group */
- ldx [%o0], %o2 /* Load */
-
- or %g1, %g2, %g1 /* IEU0 Group */
-1: ldx [%o1], %o3 /* Load */
- sub %o1, %o0, %o1 /* IEU1 */
- sllx %g1, 7, %g2 /* IEU0 Group */
-
-2: add %o0, 8, %o0 /* IEU1 */
- sub %o2, %g1, %g3 /* IEU0 Group */
- subcc %o2, %o3, %g0 /* IEU1 */
- bne,pn %xcc, 13f /* CTI */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %g3, %o2, %g4 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o2 /* Load */
- andcc %g4, %g2, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o2 /* Load Group */
- andcc %g3, %g2, %g0 /* IEU1 */
-#endif
- be,a,pt %xcc, 2b /* CTI */
- ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */
-
- addcc %g3, %g1, %o4 /* IEU1 */
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 3f /* CTI */
-
- srlx %o4, 56, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 48, %o5 /* IEU0 */
-
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 40, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4f /* CTI */
- srlx %o4, 32, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
-
-3: srlx %o4, 24, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 16, %o5 /* IEU0 */
-
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 8, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4f /* CTI */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- bne,a,pn %icc, 2b /* CTI */
- ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */
-
-4: retl /* CTI+IEU1 Group */
- clr %o0 /* IEU0 */
-
- .align 32
-13: mov 0xff, %g6 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andcc %g4, %g2, %g0 /* IEU1 */
-#else
- andcc %g3, %g2, %g0 /* IEU1 */
-#endif
- be,pt %xcc, 25f /* CTI */
- addcc %g3, %g1, %o4 /* IEU1 Group */
-
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 23f /* CTI */
- sllx %g6, 56, %o5 /* IEU0 */
-
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
- sllx %g6, 48, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
-
- be,pn %xcc, 24f /* CTI */
- sllx %g6, 40, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
-
- sllx %g6, 32, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
-23: sllx %g6, 24, %o5 /* IEU0 */
-
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
- sllx %g6, 16, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
-
- be,pn %icc, 24f /* CTI */
- sllx %g6, 8, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
-
- mov %g6, %o5 /* IEU0 */
-25: cmp %o4, %o3 /* IEU1 Group */
-5: mov -1, %o0 /* IEU0 */
- retl /* CTI+IEU1 Group */
-
- movgu %xcc, 1, %o0 /* Single Group */
-
- .align 16
-24: sub %o5, 1, %g6 /* IEU0 Group */
- clr %o0 /* IEU1 */
- or %o5, %g6, %o5 /* IEU0 Group */
- andn %o4, %o5, %o4 /* IEU0 Group */
-
- andn %o3, %o5, %o3 /* IEU1 */
- cmp %o4, %o3 /* IEU1 Group */
- movgu %xcc, 1, %o0 /* Single Group */
- retl /* CTI+IEU1 Group */
-
- movlu %xcc, -1, %o0 /* Single Group */
-6: retl /* CTI+IEU1 Group */
- mov %o4, %o0 /* IEU0 */
-
- .align 16
-7: ldub [%o0], %o2 /* Load */
- add %o0, 1, %o0 /* IEU1 */
- ldub [%o1], %o3 /* Load Group */
- sllx %g1, 32, %g2 /* IEU0 */
-
-8: add %o1, 1, %o1 /* IEU1 */
- subcc %o2, %o3, %o4 /* IEU1 Group */
- bne,pn %xcc, 6b /* CTI */
- lduba [%o0] ASI_PNF, %o2 /* Load */
-
- brz,pn %o3, 4b /* CTI+IEU1 Group */
- lduba [%o1] ASI_PNF, %o3 /* Load */
- andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pn %icc, 8b /* CTI */
-
- add %o0, 1, %o0 /* IEU0 */
- or %g1, %g2, %g1 /* IEU0 Group */
- andcc %o1, 7, %g3 /* IEU1 */
- be,a,pn %icc, 1b /* CTI */
-
- ldxa [%o0] ASI_PNF, %o2 /* Load Group */
-9: sllx %g3, 3, %g5 /* IEU0 */
- mov 64, %o5 /* IEU1 */
- sub %o1, %g3, %o1 /* IEU0 Group */
-
- sub %o5, %g5, %o5 /* IEU1 */
- ldxa [%o1] ASI_PNF, %g6 /* Load Group */
- or %g1, %g2, %g1 /* IEU0 */
- sub %o1, %o0, %o1 /* IEU1 */
-
- sllx %g1, 7, %g2 /* IEU0 Group */
- add %o1, 8, %o1 /* IEU1 */
- /* %g1 = 0101010101010101
- * %g2 = 8080808080800880
- * %g5 = number of bits to shift left
- * %o5 = number of bits to shift right */
-10: sllx %g6, %g5, %o3 /* IEU0 Group */
- ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
-
-11: srlx %g6, %o5, %o4 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o2 /* Load */
- or %o3, %o4, %o3 /* IEU1 */
- add %o0, 8, %o0 /* IEU0 Group */
-
- subcc %o2, %o3, %g0 /* IEU1 */
-#ifdef EIGHTBIT_NOT_RARE
- sub %o2, %g1, %g3 /* IEU0 Group */
- bne,pn %xcc, 13b /* CTI */
- andn %g3, %o2, %g4 /* IEU0 Group */
-
- andcc %g4, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 10b /* CTI */
- srlx %g4, 32, %g4 /* IEU0 */
- andcc %g4, %g2, %g0 /* IEU1 Group */
-#else
- bne,pn %xcc, 13b /* CTI */
- sub %o2, %g1, %g3 /* IEU0 Group */
- andcc %g3, %g2, %g0 /* IEU1 Group */
-
- be,pt %xcc, 10b /* CTI */
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
-#endif
- be,pt %xcc, 12f /* CTI */
-
- srlx %o2, 56, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 48, %g3 /* IEU0 */
-
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 40, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4b /* CTI */
- srlx %o2, 32, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
-
-12: srlx %o2, 24, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 16, %g3 /* IEU0 */
-
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 8, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4b /* CTI */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- sllx %g6, %g5, %o3 /* IEU0 */
-
- ba,pt %xcc, 11b /* CTI Group */
- ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
+ or rSTR2, rSTR1, rTMP1
+ sethi %hi(0x80808080), r8080
+
+ andcc rTMP1, 0x7, %g0
+ bne,pn %icc, .Lmaybe_barrel_shift
+ or r8080, %lo(0x80808080), r8080
+ ldx [rSTR1], rWORD1
+
+ sub rSTR2, rSTR1, rSTR2
+ sllx r8080, 32, rTMP1
+
+ ldx [rSTR1 + rSTR2], rWORD2
+ or r8080, rTMP1, r8080
+
+ ba,pt %xcc, .Laligned_loop_entry
+ srlx r8080, 7, r0101
+
+ .align 32
+.Laligned_loop_entry:
+.Laligned_loop:
+ add rSTR1, 8, rSTR1
+
+ sub rWORD1, r0101, rTMP2
+ xorcc rWORD1, rWORD2, rSTRXOR
+ bne,pn %xcc, .Lcommon_endstring
+
+ andn r8080, rWORD1, rTMP1
+
+ ldxa [rSTR1] ASI_PNF, rWORD1
+ andcc rTMP1, rTMP2, %g0
+ be,a,pt %xcc, .Laligned_loop
+
+ ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2
+
+.Lcommon_equal:
+ retl
+ mov 0, %o0
+
+ /* All loops terminate here once they find an unequal word.
+ * If a zero byte appears in the word before the first unequal
+ * byte, we must report zero. Otherwise we report '1' or '-1'
+ * depending upon whether the first mis-matching byte is larger
+ * in the first string or the second, respectively.
+ *
+ * First we compute a 64-bit mask value that has "0x01" in
+ * each byte where a zero exists in rWORD1. rSTRXOR holds the
+ * value (rWORD1 ^ rWORD2). Therefore, if considered as an
+ * unsigned quantity, our "0x01" mask value is "greater than"
+ * rSTRXOR then a zero terminating byte comes first and
+ * therefore we report '0'.
+ *
+ * The formula for this mask is:
+ *
+ * mask_tmp1 = ~rWORD1 & 0x8080808080808080;
+ * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
+ * 0x7f7f7f7f7f7f7f7f);
+ *
+ * mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
+ */
+.Lcommon_endstring:
+ andn rWORD1, r8080, rTMP2
+ or r8080, 1, %o1
+
+ mov 1, %o0
+ sub rTMP2, %o1, rTMP2
+
+ cmp rWORD1, rWORD2
+ andn rTMP1, rTMP2, rTMP1
+
+ movleu %xcc, -1, %o0
+ srlx rTMP1, 7, rTMP1
+
+ cmp rTMP1, rSTRXOR
+ retl
+ movgu %xcc, 0, %o0
+
+.Lmaybe_barrel_shift:
+ sub rSTR2, rSTR1, rSTR2
+ sllx r8080, 32, rTMP1
+
+ or r8080, rTMP1, r8080
+ and rSTR1, 0x7, rTMP2
+
+ srlx r8080, 7, r0101
+ andn rSTR1, 0x7, rSTR1
+
+ ldxa [rSTR1] ASI_PNF, rWORD1
+ andcc rSTR2, 0x7, rSLL
+ sll rTMP2, 3, rSTRXOR
+
+ bne,pn %icc, .Lneed_barrel_shift
+ mov -1, rTMP1
+ ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+ srlx rTMP1, rSTRXOR, rTMP2
+
+ orn rWORD1, rTMP2, rWORD1
+ ba,pt %xcc, .Laligned_loop_entry
+ orn rBARREL, rTMP2, rWORD2
+
+.Lneed_barrel_shift:
+ sllx rSLL, 3, rSLL
+ andn rSTR2, 0x7, rSTR2
+
+ ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
+ mov 64, rTMP2
+ sub rTMP2, rSLL, rSRL
+
+ srlx rTMP1, rSTRXOR, rTMP1
+ add rSTR2, 8, rSTR2
+
+ orn rWORD1, rTMP1, rWORD1
+ sllx rBARREL, rSLL, rWORD2
+ ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+ add rSTR1, 8, rSTR1
+ sub rWORD1, r0101, rTMP2
+
+ srlx rBARREL, rSRL, rSTRXOR
+
+ or rWORD2, rSTRXOR, rWORD2
+
+ orn rWORD2, rTMP1, rWORD2
+ ba,pt %xcc, .Lbarrel_shift_loop_entry
+ andn r8080, rWORD1, rTMP1
+
+.Lbarrel_shift_loop:
+ sllx rBARREL, rSLL, rWORD2
+ ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+ add rSTR1, 8, rSTR1
+ sub rWORD1, r0101, rTMP2
+
+ srlx rBARREL, rSRL, rSTRXOR
+ andn r8080, rWORD1, rTMP1
+
+ or rWORD2, rSTRXOR, rWORD2
+
+.Lbarrel_shift_loop_entry:
+ xorcc rWORD1, rWORD2, rSTRXOR
+ bne,pn %xcc, .Lcommon_endstring
+
+ andcc rTMP1, rTMP2, %g0
+ be,a,pt %xcc, .Lbarrel_shift_loop
+ ldxa [rSTR1] ASI_PNF, rWORD1
+
+ retl
+ mov 0, %o0
END(strcmp)
libc_hidden_builtin_def (strcmp)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 4 +
sysdeps/sparc/sparc64/strcmp.S | 416 ++++++++++++++++------------------------
2 files changed, 173 insertions(+), 247 deletions(-)
hooks/post-receive
--
GNU C Library master sources