This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PowerPC - A2 memcpy cache line size initialization


The PPC A2 memcpy implementation relies on a correct __cache_line_size value
which is currently initialized on '__libc_start_main' 
(sysdeps/unix/sysv/linux/powerpc/libc-start.c).

In some conditions (a dso constructor for instance) the cache line
size is not yet initialized and calling memset may result in a
SEGFAULT. This patch checks if the cache line size is initialized
and if not, uses simple memcpy algorithm without any prefetch (dcbz)
instructions.

---

2012-01-19  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc32/a2/memcpy.S: Fix for when cache line size is
	not set.
	* sysdeps/powerpc/powerpc64/a2/memcpy.S: Likewise.


diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
index 472f7a3..1aba988 100644
--- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
@@ -1,5 +1,5 @@
 /* Optimized memcpy implementation for PowerPC A2.
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2012 Free Software Foundation, Inc.
    Contributed by Michael Brutman <brutman@us.ibm.com>.
    This file is part of the GNU C Library.
 
@@ -128,8 +128,29 @@ L(dst_aligned):
 
 	cmplwi  cr5, r9, 0
 	bne+    cr5,L(cachelineset)
-	li      r9,64
 
+/* __cache_line_size not set: generic byte copy without much optimization */
+	andi.	r0,r5,1		/* If length is odd copy one byte.  */
+	beq	L(cachelinenotset_align)
+	lbz	r7,0(r4)	/* Read one byte from source.  */
+	addi	r5,r5,-1	/* Update length.  */
+	addi	r4,r4,1		/* Update source pointer address.  */
+	stb	r7,0(r6)	/* Store one byte on dest.  */
+	addi	r6,r6,1		/* Update dest pointer address.  */
+L(cachelinenotset_align):
+	cmpwi   cr7,r5,0	/* If length is 0 return.  */
+	beqlr	cr7
+	ori	r2,r2,0		/* Force a new dispatch group.  */
+L(cachelinenotset_loop):
+	addic.	r5,r5,-2	/* Update length.  */
+	lbz	r7,0(r4)	/* Load 2 bytes from source.  */
+	lbz	r8,1(r4)
+	addi	r4,r4,2		/* Update source pointer address.  */
+	stb	r7,0(r6)	/* Store 2 bytes on dest.  */
+	stb	r8,1(r6)
+	addi	r6,r6,2		/* Update dest pointer address.  */
+	bne	L(cachelinenotset_loop)
+	blr
 
 
 L(cachelineset):
diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
index ac95171..f0c17ce 100644
--- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
@@ -1,5 +1,5 @@
 /* Optimized memcpy implementation for PowerPC A2.
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2010,2012 Free Software Foundation, Inc.
    Contributed by Michael Brutman <brutman@us.ibm.com>.
    This file is part of the GNU C Library.
 
@@ -117,9 +117,35 @@ EALIGN (BP_SYM (memcpy), 5, 0)
 
 	.align 4
 L(dst_aligned):
+	cmpdi	cr0,r9,0	/* Cache line size set? */
+	bne+	cr0,L(cachelineset)
+
+/* __cache_line_size not set: generic byte copy without much optimization */
+	clrldi.	r0,r5,63	/* If length is odd copy one byte */
+	beq	L(cachelinenotset_align)
+	lbz	r7,0(r4)	/* Read one byte from source */
+	addi	r5,r5,-1	/* Update length */
+	addi	r4,r4,1		/* Update source pointer address */
+	stb	r7,0(r6)	/* Store one byte at dest */
+	addi	r6,r6,1		/* Update dest pointer address */
+L(cachelinenotset_align):
+	cmpdi	cr7,r5,0	/* If length is 0 return */
+	beqlr	cr7
+	ori	r2,r2,0		/* Force a new dispatch group */
+L(cachelinenotset_loop):
+	addic.	r5,r5,-2	/* Update length */
+	lbz	r7,0(r4)	/* Load 2 bytes from source */
+	lbz	r8,1(r4)
+	addi	r4,r4,2		/* Update source pointer address */
+	stb	r7,0(r6)	/* Store 2 bytes on dest */
+	stb	r8,1(r6)
+	addi	r6,r6,2		/* Update dest pointer address */
+	bne	L(cachelinenotset_loop)
+	blr
 
 
-	cmpd   cr5,r5,r10       /* Less than a cacheline to go? */
+L(cachelineset):
+	cmpd	cr5,r5,r10       /* Less than a cacheline to go? */
 
 	neg     r7,r6           /* How far to next cacheline bdy? */
 
-- 
1.7.1



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]