This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.23-162-g25dba0a
- From: murphyp at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 7 Apr 2016 20:53:45 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.23-162-g25dba0a
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 25dba0ad054723196fb633ba5d8a463ef5cb775c (commit)
from 1d2a8245ff7bcc1d8be54f01e26c49297e446aba (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=25dba0ad054723196fb633ba5d8a463ef5cb775c
commit 25dba0ad054723196fb633ba5d8a463ef5cb775c
Author: Paul E. Murphy <murphyp@linux.vnet.ibm.com>
Date: Mon Mar 14 17:40:46 2016 -0400
powerpc: Add optimized P8 strspn
This utilizes vectors and bitmasks. For small needle, large
haystack, the performance improvement is upto 8x. For short
strings (0-4B), the cost of computing the bitmask dominates,
and is a tad slower.
diff --git a/ChangeLog b/ChangeLog
index c3b2053..9d48c8b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2016-04-07 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/multiarch/Makefile:
+ (sysdep_routines): Add new strspn targets.
+ * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c:
+ (__libc_ifunc_impl_list): Add strspn.
+ * sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S:
+ New file.
+ * sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c:
+ Likewise.
+ * sysdeps/powerpc/powerpc64/multiarch/strspn.c:
+ Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strspn.S:
+ Likewise.
+
2016-04-07 Florian Weimer <fweimer@redhat.com>
* misc/hsearch_r.c: Include <limits.h>.
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 3b0e3a0..7ed56bf 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -19,7 +19,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
strcmp-power8 strcmp-power7 strcmp-ppc64 \
strcat-power8 strcat-power7 strcat-ppc64 \
memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \
- strncpy-power8 strstr-power7 strstr-ppc64
+ strncpy-power8 strstr-power7 strstr-ppc64 \
+ strspn-power8 strspn-ppc64
CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 11a8215..3e1f099 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -322,6 +322,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcat, 1,
__strcat_ppc))
+ /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */
+ IFUNC_IMPL (i, name, strspn,
+ IFUNC_IMPL_ADD (array, i, strspn,
+ hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __strspn_power8)
+ IFUNC_IMPL_ADD (array, i, strspn, 1,
+ __strspn_ppc))
+
/* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
IFUNC_IMPL_ADD (array, i, strstr,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S
new file mode 100644
index 0000000..86a4e09
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S
@@ -0,0 +1,40 @@
+/* Optimized strspn implementation for POWER8.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words) \
+ .section ".text"; \
+ ENTRY_2(__strspn_power8) \
+ .align ALIGNARG(alignt); \
+ EALIGN_W_##words; \
+ BODY_LABEL(__strspn_power8): \
+ cfi_startproc; \
+ LOCALENTRY(__strspn_power8)
+
+#undef END
+#define END(name) \
+ cfi_endproc; \
+ TRACEBACK(__strspn_power8) \
+ END_2(__strspn_power8)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strspn.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c
new file mode 100644
index 0000000..4c63665
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c
@@ -0,0 +1,25 @@
+/* Default strspn implementation for PowerPC64.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRSPN __strspn_ppc
+#ifdef SHARED
+#undef libc_hidden_def
+#define libc_hidden_def(name)
+#endif
+
+#include <string/strspn.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/sysdeps/powerpc/powerpc64/multiarch/strspn.c
new file mode 100644
index 0000000..0e653f3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strspn.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strspn. PowerPC64 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+# include <string.h>
+# include <shlib-compat.h>
+# include "init-arch.h"
+
+#undef strspn
+extern __typeof (strspn) __libc_strspn;
+
+extern __typeof (strspn) __strspn_ppc attribute_hidden;
+extern __typeof (strspn) __strspn_power8 attribute_hidden;
+
+libc_ifunc (__libc_strspn,
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __strspn_power8
+ : __strspn_ppc);
+
+weak_alias (__libc_strspn, strspn)
+libc_hidden_builtin_def (strspn)
diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S
new file mode 100644
index 0000000..0dda437
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strspn.S
@@ -0,0 +1,179 @@
+/* Optimized strspn implementation for Power8.
+
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* size_t [r3] strspn (const char *string [r3],
+ const char *needleAccept [r4]) */
+
+/* This takes a novel approach by computing a 256 bit mask whereby
+ each set bit implies the byte is "accepted". P8 vector hardware
+ has extremely efficient hardware for selecting bits from a mask.
+
+ One might ask "why not use bpermd for short strings"? It is
+ so slow that its performance about matches the generic PPC64
+ variant without any fancy masking, with the added expense of
+ making the mask. That was the first variant of this. */
+
+
+
+#include "sysdep.h"
+
+/* Simple macro to use VSX instructions in overlapping VR's. */
+#define XXVR(insn, vrt, vra, vrb) \
+ insn 32+vrt, 32+vra, 32+vrb
+
+/* ISA 2.07B instructions are not all defined for older binutils.
+ Macros are defined below for these newer instructions in order
+ to maintain compatibility. */
+
+/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */
+#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
+#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
+
+#define VBPERMQ(t,a,b) .long (0x1000054c \
+ | ((t)<<(32-11)) \
+ | ((a)<<(32-16)) \
+ | ((b)<<(32-21)) )
+
+ /* This can be updated to power8 once the minimum version of
+ binutils supports power8 and the above instructions. */
+ .machine power7
+EALIGN(strspn, 4, 0)
+ CALL_MCOUNT 2
+
+ /* Generate useful constants for later on. */
+ vspltisb v1, 7
+ vspltisb v2, -1
+ vslb v1, v1, v1 /* 0x80 to swap high bit for vbpermq. */
+ vspltisb v10, 0
+ vsldoi v4, v10, v2, 2 /* 0xFFFF into vr4. */
+ XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches. */
+
+ /* Prepare to compute 256b mask. */
+ addi r4, r4, -1
+ li r5, 0
+ li r6, 0
+ li r7, 0
+ li r8, 0
+ li r11, 1
+ sldi r11, r11, 63
+
+ /* Start interleaved Mask computation.
+ This will eventually or 1's into ignored bits from vbpermq. */
+ lvsr v11, 0, r3
+ vspltb v11, v11, 0 /* Splat shift constant. */
+
+ /* Build a 256b mask in r5-r8. */
+ .align 4
+L(next_needle):
+ lbzu r9, 1(r4)
+
+ cmpldi cr0, r9, 0
+ cmpldi cr1, r9, 128
+
+ /* This is a little tricky. srd only uses the first 7 bits,
+ and if bit 7 is set, value is always 0. So, we can
+ effectively shift 128b in this case. */
+ xori r12, r9, 0x40 /* Invert bit 6. */
+ srd r10, r11, r9 /* Mask for bits 0-63. */
+ srd r12, r11, r12 /* Mask for bits 64-127. */
+
+ beq cr0, L(start_cmp)
+
+ /* Now, or the value into the correct GPR. */
+ bge cr1,L(needle_gt128)
+ or r5, r5, r10 /* 0 - 63. */
+ or r6, r6, r12 /* 64 - 127. */
+ b L(next_needle)
+
+ .align 4
+L(needle_gt128):
+ or r7, r7, r10 /* 128 - 191. */
+ or r8, r8, r12 /* 192 - 255. */
+ b L(next_needle)
+
+
+ .align 4
+L(start_cmp):
+ /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */
+ mr r0, r3 /* Save r3 for final length computation. */
+ MTVRD (v5, r5)
+ MTVRD (v6, r6)
+ MTVRD (v7, r7)
+ MTVRD (v8, r8)
+
+ /* Continue interleaved mask generation. */
+#ifdef __LITTLE_ENDIAN__
+ vsrw v11, v2, v11 /* Note, shift ignores higher order bits. */
+ vsplth v11, v11, 0 /* Only care about the high 16 bits of v10. */
+#else
+ vslw v11, v2, v11 /* Note, shift ignores higher order bits. */
+ vsplth v11, v11, 1 /* Only care about the low 16 bits of v10. */
+#endif
+ lvx v0, 0, r3 /* Note, unaligned load ignores lower bits. */
+
+ /* Do the merging of the bitmask. */
+ XXVR(xxmrghd, v5, v5, v6)
+ XXVR(xxmrghd, v6, v7, v8)
+
+ /* Finish mask generation. */
+ vand v11, v11, v4 /* Throwaway bits not in the mask. */
+
+ /* Compare the first 1-16B, while masking unwanted bytes. */
+ clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */
+ vxor v9, v0, v1 /* Swap high bit. */
+ VBPERMQ (v8, v5, v0)
+ VBPERMQ (v7, v6, v9)
+ vor v7, v7, v8
+ vor v7, v7, v11 /* Ignore non-participating bytes. */
+ vcmpequh. v8, v7, v4
+ bnl cr6, L(done)
+
+ addi r3, r3, 16
+
+ .align 4
+L(vec):
+ lvx v0, 0, r3
+ addi r3, r3, 16
+ vxor v9, v0, v1 /* Swap high bit. */
+ VBPERMQ (v8, v5, v0)
+ VBPERMQ (v7, v6, v9)
+ vor v7, v7, v8
+ vcmpequh. v8, v7, v4
+ blt cr6, L(vec)
+
+ addi r3, r3, -16
+L(done):
+ subf r3, r0, r3
+ MFVRD (r10, v7)
+
+#ifdef __LITTLE_ENDIAN__
+ addi r0, r10, 1 /* Count the trailing 1's. */
+ andc r10, r10, r0
+ popcntd r10, r10
+#else
+ xori r10, r10, 0xffff /* Count leading 1's by inverting. */
+ addi r3, r3, -48 /* Account for the extra leading zeros. */
+ cntlzd r10, r10
+#endif
+
+ add r3, r3, r10
+ blr
+
+END(strspn)
+libc_hidden_builtin_def (strspn)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 15 ++
sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +-
.../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 8 +
.../powerpc/powerpc64/multiarch/strspn-power8.S | 40 +++++
sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c | 25 +++
sysdeps/powerpc/powerpc64/multiarch/strspn.c | 35 ++++
sysdeps/powerpc/powerpc64/power8/strspn.S | 179 ++++++++++++++++++++
7 files changed, 304 insertions(+), 1 deletions(-)
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn.c
create mode 100644 sysdeps/powerpc/powerpc64/power8/strspn.S
hooks/post-receive
--
GNU C Library master sources