This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] powerpc: 405/440/464/476 support and optimizations


> Since Todd doesn't have copyright assignment these changes are
> contributed to the FSF by IBM without author/contributor attribution.
> 
> You can simply attribute the changes to him in the email leaving his
> name out of the sources per FSF policy and submit them on IBM's
> behalf.
> 
> Ryan

Thanks.

Follows the updated patch without Todd's name on the sources.

Luis


2010-09-03  Luis Machado  <luisgpm@br.ibm.com>

	* sysdeps/powerpc/dl-procinfo.c: New file.
	* sysdeps/powerpc/dl-procinfo.h: New file.
	* sysdeps/powerpc/powerpc32/405/memcmp.S: New file.
	* sysdeps/powerpc/powerpc32/405/memcpy.S: New file.
	* sysdeps/powerpc/powerpc32/405/memset.S: New file.
	* sysdeps/powerpc/powerpc32/405/strcmp.S: New file.
	* sysdeps/powerpc/powerpc32/405/strcpy.S: New file.
	* sysdeps/powerpc/powerpc32/405/strlen.S: New file.
	* sysdeps/powerpc/powerpc32/405/strncmp.S: New file.
	* sysdeps/powerpc/powerpc32/440/Implies: New file.
	* sysdeps/powerpc/powerpc32/464/Implies: New file.
	* sysdeps/powerpc/powerpc32/476/Implies: New file.
	* sysdeps/powerpc/powerpc32/Makefile: New file.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies: New file.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies: New file.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies: New file.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies: New file.

diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
new file mode 100644
index 0000000..60fb465
--- /dev/null
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -0,0 +1,96 @@
+/* Data for processor capability information.  PowerPC version.
+   Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This information must be kept in sync with the _DL_HWCAP_COUNT and
+   _DL_PLATFORM_COUNT definitions in procinfo.h.
+
+   If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+  ._dl_powerpc_cap_flags
+#else
+PROCINFO_CLASS const char _dl_powerpc_cap_flags[25][10]
+#endif
+#ifndef PROCINFO_DECL
+= {
+    "vsx",
+    "arch_2_06", "power6x", "dfp", "pa6t",
+    "arch_2_05", "ic_snoop", "smt", "booke",
+    "cellbe", "power5+", "power5", "power4",
+    "notb", "efpdouble", "efpsingle", "spe",
+    "ucache", "4xxmac", "mmu", "fpu",
+    "altivec", "ppc601", "ppc64", "ppc32",
+  }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+  ._dl_powerpc_platforms
+#else
+PROCINFO_CLASS const char _dl_powerpc_platforms[12][12]
+#endif
+#ifndef PROCINFO_DECL
+= {
+    [PPC_PLATFORM_POWER4] = "power4",
+    [PPC_PLATFORM_PPC970] = "ppc970",
+    [PPC_PLATFORM_POWER5] = "power5",
+    [PPC_PLATFORM_POWER5_PLUS] = "power5+",
+    [PPC_PLATFORM_POWER6] = "power6",
+    [PPC_PLATFORM_CELL_BE] = "ppc-cell-be",
+    [PPC_PLATFORM_POWER6X] = "power6x",
+    [PPC_PLATFORM_POWER7] = "power7",
+    [PPC_PLATFORM_PPC405] = "ppc405",
+    [PPC_PLATFORM_PPC440] = "ppc440",
+    [PPC_PLATFORM_PPC464] = "ppc464",
+    [PPC_PLATFORM_PPC476] = "ppc476"
+  }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
new file mode 100644
index 0000000..87279de
--- /dev/null
+++ b/sysdeps/powerpc/dl-procinfo.h
@@ -0,0 +1,168 @@
+/* Processor capability information handling macros.  PowerPC version.
+   Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _DL_PROCINFO_H
+#define _DL_PROCINFO_H	1
+
+#include <ldsodefs.h>
+#include <sysdep.h>		/* This defines the PPC_FEATURE_* macros.  */
+
+/* There are 25 bits used, but they are bits 7..31.  */
+#define _DL_HWCAP_FIRST		7
+#define _DL_HWCAP_COUNT		32
+
+/* These bits influence library search.  */
+#define HWCAP_IMPORTANT		(PPC_FEATURE_HAS_ALTIVEC \
+				+ PPC_FEATURE_HAS_DFP)
+
+#define _DL_PLATFORMS_COUNT	12
+
+#define _DL_FIRST_PLATFORM	32
+/* Mask to filter out platforms.  */
+#define _DL_HWCAP_PLATFORM      (((1ULL << _DL_PLATFORMS_COUNT) - 1) \
+				<< _DL_FIRST_PLATFORM)
+
+/* Platform bits (relative to _DL_FIRST_PLATFORM).  */
+#define PPC_PLATFORM_POWER4	      0
+#define PPC_PLATFORM_PPC970	      1
+#define PPC_PLATFORM_POWER5	      2
+#define PPC_PLATFORM_POWER5_PLUS      3
+#define PPC_PLATFORM_POWER6	      4
+#define PPC_PLATFORM_CELL_BE	      5
+#define PPC_PLATFORM_POWER6X	      6
+#define PPC_PLATFORM_POWER7	      7
+#define PPC_PLATFORM_PPC405	      8
+#define PPC_PLATFORM_PPC440	      9
+#define PPC_PLATFORM_PPC464	      10
+#define PPC_PLATFORM_PPC476	      11
+
+static inline const char *
+__attribute__ ((unused))
+_dl_hwcap_string (int idx)
+{
+  return GLRO(dl_powerpc_cap_flags)[idx - _DL_HWCAP_FIRST];
+}
+
+static inline const char *
+__attribute__ ((unused))
+_dl_platform_string (int idx)
+{
+  return GLRO(dl_powerpc_platforms)[idx - _DL_FIRST_PLATFORM];
+}
+
+static inline int
+__attribute__ ((unused))
+_dl_string_hwcap (const char *str)
+{
+  for (int i = _DL_HWCAP_FIRST; i < _DL_HWCAP_COUNT; ++i)
+    if (strcmp (str, _dl_hwcap_string (i)) == 0)
+      return i;
+  return -1;
+}
+
+static inline int
+__attribute__ ((unused, always_inline))
+_dl_string_platform (const char *str)
+{
+  if (str == NULL)
+    return -1;
+
+  if (strncmp (str, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_POWER4], 5) == 0)
+    {
+      int ret;
+      str += 5;
+      switch (*str)
+	{
+	case '4':
+	  ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER4;
+	  break;
+	case '5':
+	  ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5;
+	  if (str[1] == '+')
+	    {
+	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5_PLUS;
+	      ++str;
+	    }
+	  break;
+	case '6':
+	  ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6;
+	  if (str[1] == 'x')
+	    {
+	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6X;
+	      ++str;
+	    }
+	  break;
+	case '7':
+	  ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER7;
+	  break;
+	default:
+	  return -1;
+	}
+      if (str[1] == '\0')
+	return ret;
+    }
+  else if (strncmp (str, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC970],
+		    3) == 0)
+    {
+      if (strcmp (str + 3, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC970]
+			   + 3) == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC970;
+      else if (strcmp (str + 3,
+		       GLRO(dl_powerpc_platforms)[PPC_PLATFORM_CELL_BE] + 3)
+	       == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_CELL_BE;
+      else if (strcmp (str + 3,
+		       GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC405] + 3)
+	       == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC405;
+      else if (strcmp (str + 3,
+		       GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC440] + 3)
+	       == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC440;
+      else if (strcmp (str + 3,
+		       GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC464] + 3)
+	       == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC464;
+      else if (strcmp (str + 3,
+		       GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC476] + 3)
+	       == 0)
+	return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC476;
+    }
+
+  return -1;
+}
+
+#ifdef IS_IN_rtld
+static inline int
+__attribute__ ((unused))
+_dl_procinfo (int word)
+{
+  _dl_printf ("AT_HWCAP:       ");
+
+  for (int i = _DL_HWCAP_FIRST; i < _DL_HWCAP_COUNT; ++i)
+    if (word & (1 << i))
+      _dl_printf (" %s", _dl_hwcap_string (i));
+
+  _dl_printf ("\n");
+
+  return 0;
+}
+#endif
+
+#endif /* dl-procinfo.h */
diff --git a/sysdeps/powerpc/powerpc32/405/memcmp.S b/sysdeps/powerpc/powerpc32/405/memcmp.S
new file mode 100644
index 0000000..653d3b5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/memcmp.S
@@ -0,0 +1,131 @@
+/* Optimized memcmp implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* memcmp
+
+	r3:source1 address, return equality
+	r4:source2 address
+	r5:byte count
+
+	Check 2 words from src1 and src2. If unequal jump to end and
+	return src1 > src2 or src1 < src2.
+	If count = zero check bytes before zero counter and then jump to end and
+	return src1 > src2, src1 < src2 or src1 = src2.
+	If src1 = src2 and no null, repeat. */
+
+EALIGN (BP_SYM (memcmp), 5, 0)
+	srwi.	r6,r5,5
+	beq	L(preword2_count_loop)
+	mtctr	r6
+	clrlwi	r5,r5,27
+
+L(word8_compare_loop):
+	lwz	r10,0(r3)
+	lwz	r6,4(r3)
+	lwz	r8,0(r4)
+	lwz	r9,4(r4)
+	cmplw	cr5,r8,r10
+	cmplw	cr1,r9,r6
+	bne	cr5,L(st2)
+	bne	cr1,L(st1)
+	lwz	r10,8(r3)
+	lwz	r6,12(r3)
+	lwz	r8,8(r4)
+	lwz	r9,12(r4)
+	cmplw	cr5,r8,r10
+	cmplw	cr1,r9,r6
+	bne	cr5,L(st2)
+	bne	cr1,L(st1)
+	lwz	r10,16(r3)
+	lwz	r6,20(r3)
+	lwz	r8,16(r4)
+	lwz	r9,20(r4)
+	cmplw	cr5,r8,r10
+	cmplw	cr1,r9,r6
+	bne	cr5,L(st2)
+	bne	cr1,L(st1)
+	lwz	r10,24(r3)
+	lwz	r6,28(r3)
+	addi	r3,r3,0x20
+	lwz	r8,24(r4)
+	lwz	r9,28(r4)
+	addi	r4,r4,0x20
+	cmplw	cr5,r8,r10
+	cmplw	cr1,r9,r6
+	bne	cr5,L(st2)
+	bne	cr1,L(st1)
+	bdnz	L(word8_compare_loop)
+
+L(preword2_count_loop):
+	srwi.	r6,r5,3
+	beq	L(prebyte_count_loop)
+	mtctr	r6
+	clrlwi  r5,r5,29
+
+L(word2_count_loop):
+	lwz	r10,0(r3)
+	lwz	r6,4(r3)
+	addi	r3,r3,0x08
+	lwz	r8,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,0x08
+	cmplw	cr5,r8,r10
+	cmplw	cr1,r9,r6
+	bne	cr5,L(st2)
+	bne	cr1,L(st1)
+	bdnz	L(word2_count_loop)
+
+L(prebyte_count_loop):
+	addi	r5,r5,1
+	mtctr	r5
+	bdz	L(end_memcmp)
+
+L(byte_count_loop):
+	lbz	r6,0(r3)
+	addi	r3,r3,0x01
+	lbz	r8,0(r4)
+	addi	r4,r4,0x01
+	cmplw	cr5,r8,r6
+	bne	cr5,L(st2)
+	bdnz	L(byte_count_loop)
+
+L(end_memcmp):
+	addi	r3,r0,0
+	blr
+
+L(l_r):
+	addi	r3,r0,1
+	blr
+
+L(st1):
+	blt	cr1,L(l_r)
+	addi	r3,r0,-1
+	blr
+
+L(st2):
+	blt	cr5,L(l_r)
+	addi	r3,r0,-1
+	blr
+END (BP_SYM (memcmp))
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
diff --git a/sysdeps/powerpc/powerpc32/405/memcpy.S b/sysdeps/powerpc/powerpc32/405/memcpy.S
new file mode 100644
index 0000000..a654c73
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/memcpy.S
@@ -0,0 +1,133 @@
+/* Optimized memcpy implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* memcpy
+
+	r0:return address
+	r3:destination address
+	r4:source address
+	r5:byte count
+
+	Save return address in r0.
+	If destinationn and source are unaligned and copy count is greater than 256
+	then copy 0-3 bytes to make destination aligned.
+	If 32 or more bytes to copy we use 32 byte copy loop.
+	Finaly we copy 0-31 extra bytes. */
+
+EALIGN (BP_SYM (memcpy), 5, 0)
+/* Check if bytes to copy are greater than 256 and if
+	source and destination are unaligned */
+	cmpwi	r5,0x0100
+	addi	r0,r3,0
+	ble	L(string_count_loop)
+	neg	r6,r3
+	clrlwi. r6,r6,30
+	beq	L(string_count_loop)
+	neg	r6,r4
+	clrlwi. r6,r6,30
+	beq	L(string_count_loop)
+	mtctr	r6
+	subf	r5,r6,r5
+
+L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */
+	lbz	r8,0x0(r4)
+	addi	r4,r4,1
+	stb	r8,0x0(r3)
+	addi	r3,r3,1
+	bdnz	L(unaligned_bytecopy_loop)
+	srwi.	r7,r5,5
+	beq	L(preword2_count_loop)
+	mtctr	r7
+
+L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+	lwz	r6,0(r4)
+	lwz	r7,4(r4)
+	lwz	r8,8(r4)
+	lwz	r9,12(r4)
+	subi	r5,r5,0x20
+	stw	r6,0(r3)
+	stw	r7,4(r3)
+	stw	r8,8(r3)
+	stw	r9,12(r3)
+	lwz	r6,16(r4)
+	lwz	r7,20(r4)
+	lwz	r8,24(r4)
+	lwz	r9,28(r4)
+	addi	r4,r4,0x20
+	stw	r6,16(r3)
+	stw	r7,20(r3)
+	stw	r8,24(r3)
+	stw	r9,28(r3)
+	addi	r3,r3,0x20
+	bdnz	L(word8_count_loop_no_dcbt)
+
+L(preword2_count_loop): /* Copy remaining 0-31 bytes */
+	clrlwi. r12,r5,27
+	beq	L(end_memcpy)
+	mtxer	r12
+	lswx	r5,0,r4
+	stswx	r5,0,r3
+	mr	 r3,r0
+	blr
+
+L(string_count_loop): /* Copy odd 0-31 bytes */
+	clrlwi. r12,r5,28
+	add	r3,r3,r5
+	add	r4,r4,r5
+	beq	L(pre_string_copy)
+	mtxer	r12
+	subf	r4,r12,r4
+	subf	r3,r12,r3
+	lswx	r6,0,r4
+	stswx	r6,0,r3
+
+L(pre_string_copy): /* Check how many 32 byte chunck to copy */
+	srwi.	r7,r5,4
+	beq	L(end_memcpy)
+	mtctr	r7
+
+L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+	lwz	r6,-4(r4)
+	lwz	r7,-8(r4)
+	lwz	r8,-12(r4)
+	lwzu	r9,-16(r4)
+	stw	r6,-4(r3)
+	stw	r7,-8(r3)
+	stw	r8,-12(r3)
+	stwu	r9,-16(r3)
+	bdz	L(end_memcpy)
+	lwz	r6,-4(r4)
+	lwz	r7,-8(r4)
+	lwz	r8,-12(r4)
+	lwzu	r9,-16(r4)
+	stw	r6,-4(r3)
+	stw	r7,-8(r3)
+	stw	r8,-12(r3)
+	stwu	r9,-16(r3)
+	bdnz	L(word4_count_loop_no_dcbt)
+
+L(end_memcpy):
+	mr	 r3,r0
+	blr
+END (BP_SYM (memcpy))
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc32/405/memset.S b/sysdeps/powerpc/powerpc32/405/memset.S
new file mode 100644
index 0000000..69d5d4c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/memset.S
@@ -0,0 +1,155 @@
+/* Optimized memset implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* memset
+
+	r3:destination address and return address
+	r4:source integer to copy
+	r5:byte count
+	r11:sources integer to copy in all 32 bits of reg
+	r12:temp return address
+
+	Save return address in r12
+	If destinationn is unaligned and count is greater tha 255 bytes
+	set 0-3 bytes to make destination aligned
+	If count is greater tha 255 bytes and setting zero to memory
+	use dbcz to set memeory when we can
+	otherwsie do the follwoing
+	If 16 or more words to set we use 16 word copy loop.
+	Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (BP_SYM (memset), 5, 0)
+	rlwinm	r11,r4,0,24,31
+	rlwimi	r11,r4,8,16,23
+	rlwimi	r11,r11,16,0,15
+	addi	r12,r3,0
+	cmpwi	r5,0x00FF
+	ble	L(preword8_count_loop)
+	cmpwi	r4,0x00
+	beq	L(use_dcbz)
+	neg	r6,r3
+	clrlwi.	r6,r6,30
+	beq	L(preword8_count_loop)
+	addi	r8,0,1
+	mtctr	r6
+	subi	r3,r3,1
+
+L(unaligned_bytecopy_loop):
+	stbu	r11,0x1(r3)
+	subf.	r5,r8,r5
+	beq	L(end_memset)
+	bdnz	L(unaligned_bytecopy_loop)
+	addi	r3,r3,1
+
+L(preword8_count_loop):
+	srwi.	r6,r5,4
+	beq	L(preword2_count_loop)
+	mtctr	r6
+	addi	r3,r3,-4
+	mr	r8,r11
+	mr	r9,r11
+	mr	r10,r11
+
+L(word8_count_loop_no_dcbt):
+	stwu	r8,4(r3)
+	stwu	r9,4(r3)
+	subi	r5,r5,0x10
+	stwu	r10,4(r3)
+	stwu	r11,4(r3)
+	bdnz	L(word8_count_loop_no_dcbt)
+	addi	r3,r3,4
+
+L(preword2_count_loop):
+	clrlwi.	r7,r5,28
+	beq	L(end_memset)
+	mr	r8,r11
+	mr	r9,r11
+	mr	r10,r11
+	mtxer	r7
+	stswx	r8,0,r3
+
+L(end_memset):
+	addi	r3,r12,0
+	blr
+
+L(use_dcbz):
+	neg	r6,r3
+	clrlwi.	r7,r6,28
+	beq	L(skip_string_loop)
+	mr	r8,r11
+	mr	r9,r11
+	mr	r10,r11
+	subf	r5,r7,r5
+	mtxer	r7
+	stswx	r8,0,r3
+	add	r3,r3,r7
+
+L(skip_string_loop):
+	clrlwi	r8,r6,25
+	srwi.	r8,r8,4
+	beq	L(dcbz_pre_loop)
+	mtctr	r8
+
+L(word_loop):
+	stw	r11,0(r3)
+	subi	r5,r5,0x10
+	stw	r11,4(r3)
+	stw	r11,8(r3)
+	stw	r11,12(r3)
+	addi	r3,r3,0x10
+	bdnz	L(word_loop)
+
+L(dcbz_pre_loop):
+	srwi	r6,r5,7
+	mtctr	r6
+	addi	r7,0,0
+
+L(dcbz_loop):
+	dcbz	r3,r7
+	addi	r3,r3,0x80
+	subi	r5,r5,0x80
+	bdnz	L(dcbz_loop)
+	srwi.	r6,r5,4
+	beq	L(postword2_count_loop)
+	mtctr	r6
+
+L(postword8_count_loop):
+	stw	r11,0(r3)
+	subi	r5,r5,0x10
+	stw	r11,4(r3)
+	stw	r11,8(r3)
+	stw	r11,12(r3)
+	addi	r3,r3,0x10
+	bdnz	L(postword8_count_loop)
+
+L(postword2_count_loop):
+	clrlwi.	r7,r5,28
+	beq	L(end_memset)
+	mr	r8,r11
+	mr	r9,r11
+	mr	r10,r11
+	mtxer	r7
+	stswx	r8,0,r3
+	b	L(end_memset)
+END (BP_SYM (memset))
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/powerpc/powerpc32/405/strcmp.S b/sysdeps/powerpc/powerpc32/405/strcmp.S
new file mode 100644
index 0000000..6eb5b5a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/strcmp.S
@@ -0,0 +1,137 @@
+/* Optimized strcmp implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* strcmp
+
+	Register Use
+	r0:temp return equality
+	r3:source1 address, return equality
+	r4:source2 address
+
+	Implementation description
+	Check 2 words from src1 and src2. If unequal jump to end and
+	return src1 > src2 or src1 < src2.
+	If null check bytes before null and then jump to end and
+	return src1 > src2, src1 < src2 or src1 = src2.
+	If src1 = src2 and no null, repeat. */
+
+EALIGN (BP_SYM(strcmp),5,0)
+	neg	r7,r3
+	clrlwi	r7,r7,20
+	neg	r8,r4
+	clrlwi	r8,r8,20
+	srwi.	r7,r7,5
+	beq	L(byte_loop)
+	srwi.	r8,r8,5
+	beq	L(byte_loop)
+	cmplw	r7,r8
+	mtctr	r7
+	ble	L(big_loop)
+	mtctr	r8
+
+L(big_loop):
+	lwz	r5,0(r3)
+	lwz	r6,4(r3)
+	lwz	r8,0(r4)
+	lwz	r9,4(r4)
+	dlmzb.	r12,r5,r6
+	bne	L(end_check)
+	cmplw	r5,r8
+	bne	L(st1)
+	cmplw	r6,r9
+	bne	L(st1)
+	lwz	r5,8(r3)
+	lwz	r6,12(r3)
+	lwz	r8,8(r4)
+	lwz	r9,12(r4)
+	dlmzb.	r12,r5,r6
+	bne	L(end_check)
+	cmplw	r5,r8
+	bne	L(st1)
+	cmplw	r6,r9
+	bne	L(st1)
+	lwz	r5,16(r3)
+	lwz	r6,20(r3)
+	lwz	r8,16(r4)
+	lwz	r9,20(r4)
+	dlmzb.	r12,r5,r6
+	bne	L(end_check)
+	cmplw	r5,r8
+	bne	L(st1)
+	cmplw	r6,r9
+	bne	L(st1)
+	lwz	r5,24(r3)
+	lwz	r6,28(r3)
+	addi	r3,r3,0x20
+	lwz	r8,24(r4)
+	lwz	r9,28(r4)
+	addi	r4,r4,0x20
+	dlmzb.	r12,r5,r6
+	bne	L(end_check)
+	cmplw	r5,r8
+	bne	L(st1)
+	cmplw	r6,r9
+	bne	L(st1)
+	bdnz	L(big_loop)
+	b	L(byte_loop)
+
+L(end_check):
+	subfic	r12,r12,4
+	blt	L(end_check2)
+	rlwinm	r12,r12,3,0,31
+	srw	r5,r5,r12
+	srw	r8,r8,r12
+	cmplw	r5,r8
+	bne	L(st1)
+	b	L(end_strcmp)
+
+L(end_check2):
+	addi	r12,r12,4
+	cmplw	r5,r8
+	rlwinm	r12,r12,3,0,31
+	bne	L(st1)
+	srw	r6,r6,r12
+	srw	r9,r9,r12
+	cmplw	r6,r9
+	bne	L(st1)
+
+L(end_strcmp):
+	addi	r3,r0,0
+	blr
+
+L(st1):
+	mfcr	r3
+	blr
+
+L(byte_loop):
+	lbz	r5,0(r3)
+	addi	r3,r3,1
+	lbz	r6,0(r4)
+	addi	r4,r4,1
+	cmplw	r5,r6
+	bne	L(st1)
+	cmpwi	r5,0
+	beq	L(end_strcmp)
+	b	L(byte_loop)
+END (BP_SYM (strcmp))
+libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc32/405/strcpy.S b/sysdeps/powerpc/powerpc32/405/strcpy.S
new file mode 100644
index 0000000..025ac16
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/strcpy.S
@@ -0,0 +1,110 @@
+/* Optimized strcpy implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* strcpy
+
+	Register Use
+	r3:destination and return address
+	r4:source address
+	r10:temp destination address
+
+	Implementation description
+	Loop by checking 2 words at a time, with dlmzb. Check if there is a null
+	in the 2 words. If there is a null jump to end checking to determine
+	where in the last 8 bytes it is. Copy the appropriate bytes of the last
+	8 according to the null position. */
+
+EALIGN (BP_SYM (strcpy), 5, 0)
+	neg	r7,r4
+	subi	r4,r4,1
+	clrlwi.	r8,r7,29
+	subi	r10,r3,1
+	beq	L(pre_word8_loop)
+	mtctr	r8
+
+L(loop):
+	lbzu	r5,0x01(r4)
+	cmpi	cr5,r5,0x0
+	stbu	r5,0x01(r10)
+	beq	cr5,L(end_strcpy)
+	bdnz	L(loop)
+
+L(pre_word8_loop):
+	subi	r4,r4,3
+	subi	r10,r10,3
+
+L(word8_loop):
+	lwzu	r5,0x04(r4)
+	lwzu	r6,0x04(r4)
+	dlmzb.	r11,r5,r6
+	bne	L(byte_copy)
+	stwu	r5,0x04(r10)
+	stwu	r6,0x04(r10)
+	lwzu	r5,0x04(r4)
+	lwzu	r6,0x04(r4)
+	dlmzb.	r11,r5,r6
+	bne	L(byte_copy)
+	stwu	r5,0x04(r10)
+	stwu	r6,0x04(r10)
+	lwzu	r5,0x04(r4)
+	lwzu	r6,0x04(r4)
+	dlmzb.	r11,r5,r6
+	bne	L(byte_copy)
+	stwu	r5,0x04(r10)
+	stwu	r6,0x04(r10)
+	lwzu	r5,0x04(r4)
+	lwzu	r6,0x04(r4)
+	dlmzb.	r11,r5,r6
+	bne	L(byte_copy)
+	stwu	r5,0x04(r10)
+	stwu	r6,0x04(r10)
+	b	L(word8_loop)
+
+L(last_bytes_copy):
+	stwu	r5,0x04(r10)
+	subi	r11,r11,4
+	mtctr	r11
+	addi	r10,r10,3
+	subi	r4,r4,1
+
+L(last_bytes_copy_loop):
+	lbzu	r5,0x01(r4)
+	stbu	r5,0x01(r10)
+	bdnz	L(last_bytes_copy_loop)
+	blr
+
+L(byte_copy):
+	blt	L(last_bytes_copy)
+	mtctr	r11
+	addi	r10,r10,3
+	subi	r4,r4,5
+
+L(last_bytes_copy_loop2):
+	lbzu	r5,0x01(r4)
+	stbu	r5,0x01(r10)
+	bdnz	L(last_bytes_copy_loop2)
+
+L(end_strcpy):
+	blr
+END (BP_SYM (strcpy))
+libc_hidden_builtin_def (strcpy)
diff --git a/sysdeps/powerpc/powerpc32/405/strlen.S b/sysdeps/powerpc/powerpc32/405/strlen.S
new file mode 100644
index 0000000..146b582
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/strlen.S
@@ -0,0 +1,78 @@
+/* Optimized strlen implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* strlen
+
+	Register Use
+	r3:source address and return length of string
+	r4:byte counter
+
+	Implementation description
+	Load 2 words at a time and count bytes, if we find null we subtract one from
+	the count and return the count value. We need to subtract one because
+	we don't count the null character as a byte. */
+
+EALIGN (BP_SYM (strlen),5,0)
+	neg	r7,r3
+	clrlwi.	r8,r7,29
+	addi	r4,0,0
+	beq	L(byte_count_loop)
+	mtctr	r8
+
+L(loop):
+	lbz	r5,0(r3)
+	cmpi	cr5,r5,0x0
+	addi	r3,r3,0x1
+	addi	r4,r4,0x1
+	beq	cr5,L(end_strlen)
+	bdnz	L(loop)
+
+L(byte_count_loop):
+	lwz	r5,0(r3)
+	lwz	r6,4(r3)
+	dlmzb.	r12,r5,r6
+	add	r4,r4,r12
+	bne	L(end_strlen)
+	lwz	r5,8(r3)
+	lwz	r6,12(r3)
+	dlmzb.	r12,r5,r6
+	add	r4,r4,r12
+	bne	L(end_strlen)
+	lwz	r5,16(r3)
+	lwz	r6,20(r3)
+	dlmzb.	r12,r5,r6
+	add	r4,r4,r12
+	bne	L(end_strlen)
+	lwz	r5,24(r3)
+	lwz	r6,28(r3)
+	addi	r3,r3,0x20
+	dlmzb.	r12,r5,r6
+	add	r4,r4,r12
+	bne	L(end_strlen)
+	b	L(byte_count_loop)
+
+L(end_strlen):
+	addi	r3,r4,-1
+	blr
+END (BP_SYM (strlen))
+libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/powerpc/powerpc32/405/strncmp.S b/sysdeps/powerpc/powerpc32/405/strncmp.S
new file mode 100644
index 0000000..c1beb23
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/405/strncmp.S
@@ -0,0 +1,131 @@
+/* Optimized strncmp implementation for PowerPC476.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* strncmp
+
+	Register Use
+	r0:temp return equality
+	r3:source1 address, return equality
+	r4:source2 address
+	r5:byte count
+
+	Implementation description
+	Touch in 3 lines of D-cache.
+	If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned
+	Check 2 words from src1 and src2. If unequal jump to end and
+	return src1 > src2 or src1 < src2.
+	If null check bytes before null and then jump to end and
+	return src1 > src2, src1 < src2 or src1 = src2.
+	If count = zero check bytes before zero counter and then jump to end and
+	return src1 > src2, src1 < src2 or src1 = src2.
+	If src1 = src2 and no null, repeat. */
+
+EALIGN (BP_SYM(strncmp),5,0)
+	neg	r7,r3
+	clrlwi	r7,r7,20
+	neg	r8,r4
+	clrlwi	r8,r8,20
+	srwi.	r7,r7,3
+	beq	L(prebyte_count_loop)
+	srwi.	r8,r8,3
+	beq	L(prebyte_count_loop)
+	cmplw	r7,r8
+	mtctr	r7
+	ble	L(preword2_count_loop)
+	mtctr	r8
+
+L(preword2_count_loop):
+	srwi.	r6,r5,3
+	beq	L(prebyte_count_loop)
+	mfctr	r7
+	cmplw	r6,r7
+	bgt	L(set_count_loop)
+	mtctr	r6
+	clrlwi	r5,r5,29
+
+L(word2_count_loop):
+	lwz	r10,0(r3)
+	lwz	r6,4(r3)
+	addi	r3,r3,0x08
+	lwz	r8,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,0x08
+	dlmzb.	r12,r10,r6
+	bne	L(end_check)
+	cmplw	r10,r8
+	bne	L(st1)
+	cmplw	r6,r9
+	bne	L(st1)
+	bdnz	L(word2_count_loop)
+
+L(prebyte_count_loop):
+	addi	r5,r5,1
+	mtctr	r5
+	bdz	L(end_strncmp)
+
+L(byte_count_loop):
+	lbz	r6,0(r3)
+	addi	r3,r3,1
+	lbz	r7,0(r4)
+	addi	r4,r4,1
+	cmplw	r6,r7
+	bne	L(st1)
+	cmpwi	r6,0
+	beq	L(end_strncmp)
+	bdnz	L(byte_count_loop)
+	b	L(end_strncmp)
+
+L(set_count_loop):
+	slwi	r7,r7,3
+	subf	r5,r7,r5
+	b	L(word2_count_loop)
+
+L(end_check):
+	subfic	r12,r12,4
+	blt	L(end_check2)
+	rlwinm	r12,r12,3,0,31
+	srw	r10,r10,r12
+	srw	r8,r8,r12
+	cmplw	r10,r8
+	bne	L(st1)
+	b	L(end_strncmp)
+
+L(end_check2):
+	addi	r12,r12,4
+	cmplw	r10,r8
+	rlwinm	r12,r12,3,0,31
+	bne	L(st1)
+	srw	r6,r6,r12
+	srw	r9,r9,r12
+	cmplw	r6,r9
+	bne	L(st1)
+
+L(end_strncmp):
+	addi	r3,r0,0
+	blr
+
+L(st1):
+	mfcr	r3
+	blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/powerpc/powerpc32/440/Implies b/sysdeps/powerpc/powerpc32/440/Implies
new file mode 100644
index 0000000..70c0d2e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/sysdeps/powerpc/powerpc32/464/Implies b/sysdeps/powerpc/powerpc32/464/Implies
new file mode 100644
index 0000000..c3e52c5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/sysdeps/powerpc/powerpc32/476/Implies b/sysdeps/powerpc/powerpc32/476/Implies
new file mode 100644
index 0000000..2829f9c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/sysdeps/powerpc/powerpc32/Makefile b/sysdeps/powerpc/powerpc32/Makefile
new file mode 100644
index 0000000..3d235de
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/Makefile
@@ -0,0 +1,8 @@
+# Some Powerpc32 variants assume soft-fp is the default even though there is
+# an fp variant so provide -mhard-float if --with-fp is explicitly passed.
+
+ifeq ($(with-fp),yes)
++cflags += -mhard-float
+ASFLAGS += -mhard-float
+sysdep-LDFLAGS += -mhard-float
+endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
new file mode 100644
index 0000000..70c0d2e
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
new file mode 100644
index 0000000..c3e52c5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
new file mode 100644
index 0000000..2829f9c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
new file mode 100644
index 0000000..80f9170
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/476/fpu
+powerpc/powerpc32/476



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]