This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] libc/machine/m68k: Incorporate memcpy and memset.


Hi,

Attached is a patch to incorporate memcpy and memset optimized for
m68k.

Tested on fido-none-elf and m68k-elf configured with --with-arch=cf.
OK to apply?

Kazu Hirata

2007-04-27  Kazu Hirata  <kazu@codesourcery.com>

	* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S
	and memset.S.
	* libc/machine/m68k/Makefile.in: Regenerate.
	* libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New.

Index: newlib/libc/machine/m68k/Makefile.am
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.am,v
retrieving revision 1.3
diff -u -d -p -r1.3 Makefile.am
--- newlib/libc/machine/m68k/Makefile.am	1 May 2006 22:01:01 -0000	1.3
+++ newlib/libc/machine/m68k/Makefile.am	23 Apr 2007 16:51:16 -0000
@@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
 
 noinst_LIBRARIES = lib.a
 
-lib_a_SOURCES = setjmp.S strcpy.c strlen.c
+lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
 lib_a_CCASFLAGS=$(AM_CCASFLAGS)
 lib_a_CFLAGS=$(AM_CFLAGS)
 
Index: newlib/libc/machine/m68k/Makefile.in
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.in,v
retrieving revision 1.11
diff -u -d -p -r1.11 Makefile.in
--- newlib/libc/machine/m68k/Makefile.in	18 Dec 2006 20:32:45 -0000	1.11
+++ newlib/libc/machine/m68k/Makefile.in	23 Apr 2007 16:51:16 -0000
@@ -56,7 +56,8 @@ ARFLAGS = cru
 lib_a_AR = $(AR) $(ARFLAGS)
 lib_a_LIBADD =
 am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
-	lib_a-strlen.$(OBJEXT)
+	lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
+	lib_a-memset.$(OBJEXT)
 lib_a_OBJECTS = $(am_lib_a_OBJECTS)
 DEFAULT_INCLUDES = -I. -I$(srcdir)
 depcomp =
@@ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus
 INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
 AM_CCASFLAGS = $(INCLUDES)
 noinst_LIBRARIES = lib.a
-lib_a_SOURCES = setjmp.S strcpy.c strlen.c
+lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
 lib_a_CCASFLAGS = $(AM_CCASFLAGS)
 lib_a_CFLAGS = $(AM_CFLAGS)
 ACLOCAL_AMFLAGS = -I ../../..
@@ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S
 lib_a-setjmp.obj: setjmp.S
 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $(CYGPATH_W) '$(srcdir)/setjmp.S'; fi`
 
+lib_a-memcpy.o: memcpy.S
+	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S
+
+lib_a-memcpy.obj: memcpy.S
+	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi`
+
+lib_a-memset.o: memset.S
+	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S
+
+lib_a-memset.obj: memset.S
+	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $(CYGPATH_W) '$(srcdir)/memset.S'; fi`
+
 .c.o:
 	$(COMPILE) -c $<
 
Index: newlib/libc/machine/m68k/memcpy.S
===================================================================
RCS file: newlib/libc/machine/m68k/memcpy.S
diff -N newlib/libc/machine/m68k/memcpy.S
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ newlib/libc/machine/m68k/memcpy.S	23 Apr 2007 16:51:16 -0000
@@ -0,0 +1,93 @@
+/* a-memcpy.s -- memcpy, optimised for m68k asm
+ *
+ * Copyright (c) 2007 mocom software GmbH & Co KG)
+ *
+ * The authors hereby grant permission to use, copy, modify, distribute,
+ * and license this software and its documentation for any purpose, provided
+ * that existing copyright notices are retained in all copies and that this
+ * notice is included verbatim in any distributions. No written agreement,
+ * license, or royalty fee is required for any of the authorized uses.
+ * Modifications to this software may be copyrighted by their authors
+ * and need not follow the licensing terms described here, provided that
+ * the new terms are clearly indicated on the first page of each file where
+ * they apply.
+ */
+
+	.text
+	.align	4
+
+	.globl	memcpy
+	.type	memcpy, @function
+
+/*   memcpy, optimised
+ *
+ *   strategy:
+ *       - no argument testing (the original memcpy from the GNU lib does
+ *         no checking either)
+ *       - make sure the destination pointer (the write pointer) is long word
+ *         aligned. This is the best you can do, because writing to unaligned
+ *         addresses can be the most costfull thing you could do.
+ *       - Once you have figured that out, we do a little loop unrolling
+ *         to further improve speed.
+ */
+
+memcpy:
+	move.l	4(%sp),%a0	| dest ptr
+	move.l	8(%sp),%a1	| src ptr
+	move.l	12(%sp),%d1	| len
+	cmp.l	#8,%d1		| if fewer than 8 bytes to transfer,
+	blo	.Lresidue	| do not optimise
+
+	/* align dest */
+	move.l	%a0,%d0		| copy of dest
+	neg.l	%d0
+	and.l	#3,%d0		| look for the lower two only
+	beq	2f		| is aligned?
+	sub.l	%d0,%d1
+	lsr.l	#1,%d0		| word align needed?
+	bcc	1f
+	move.b	(%a1)+,(%a0)+
+1:
+	lsr.l	#1,%d0		| long align needed?
+	bcc	2f
+	move.w	(%a1)+,(%a0)+
+2:
+
+	/* long word transfers */
+	move.l	%d1,%d0
+	and.l	#3,%d1		| byte residue
+	lsr.l	#3,%d0
+	bcc	1f		| carry set for 4-byte residue
+	move.l	(%a1)+,(%a0)+
+1:
+	lsr.l	#1,%d0		| number of 16-byte transfers
+	bcc	.Lcopy 		| carry set for 8-byte residue
+	bra	.Lcopy8
+
+1:
+	move.l	(%a1)+,(%a0)+
+	move.l	(%a1)+,(%a0)+
+.Lcopy8:
+	move.l	(%a1)+,(%a0)+
+	move.l	(%a1)+,(%a0)+
+.Lcopy:
+#if !defined (__mcoldfire__)
+	dbra	%d0,1b
+#else
+	subq.l	#1,%d0
+	bpl	1b
+#endif
+	bra	.Lresidue
+
+1:
+	move.b	(%a1)+,(%a0)+	| move residue bytes
+
+.Lresidue:
+#if !defined (__mcoldfire__)
+	dbra	%d1,1b		| loop until done
+#else
+	subq.l	#1,%d1
+	bpl	1b
+#endif
+	move.l	4(%sp),%d0	| return value
+	rts
Index: newlib/libc/machine/m68k/memset.S
===================================================================
RCS file: newlib/libc/machine/m68k/memset.S
diff -N newlib/libc/machine/m68k/memset.S
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ newlib/libc/machine/m68k/memset.S	23 Apr 2007 16:51:16 -0000
@@ -0,0 +1,97 @@
+/* a-memset.s -- memset, optimised for fido asm
+ *
+ * Copyright (c) 2007 mocom software GmbH & Co KG)
+ *
+ * The authors hereby grant permission to use, copy, modify, distribute,
+ * and license this software and its documentation for any purpose, provided
+ * that existing copyright notices are retained in all copies and that this
+ * notice is included verbatim in any distributions. No written agreement,
+ * license, or royalty fee is required for any of the authorized uses.
+ * Modifications to this software may be copyrighted by their authors
+ * and need not follow the licensing terms described here, provided that
+ * the new terms are clearly indicated on the first page of each file where
+ * they apply.
+ */
+
+	.text
+	.align	4
+
+	.globl	memset
+	.type	memset, @function
+
+|   memset, optimised
+|
+|   strategy:
+|       - no argument testing (the original memcpy from the GNU lib does
+|         no checking either)
+|       - make sure the destination pointer (the write pointer) is long word
+|         aligned. This is the best you can do, because writing to unaligned
+|         addresses can be the most costfull thing one could do.
+|       - we fill long word wise if possible
+|
+|   VG, 2006
+|
+|	bugfixes:
+|		- distribution of byte value improved - in cases someone gives
+|         non-byte value
+|		- residue byte transfer was not working
+|
+|	VG, April 2007
+|
+memset:
+	move.l	4(%sp),%a0	| dest ptr
+	move.l	8(%sp),%d0	| value
+	move.l	12(%sp),%d1	| len
+	cmp.l	#16,%d1
+	blo	.Lbset		| below, byte fills
+	|
+	move.l	%d2,-(%sp)	| need a register
+	move.b	%d0,%d2		| distribute low byte to all byte in word
+	lsl.l	#8,%d0
+	move.b	%d2,%d0
+	move.w	%d0,%d2
+	swap	%d0		| rotate 16
+	move.w	%d2,%d0
+	|
+	move.l	%a0,%d2		| copy of src
+	neg.l	%d2		| 1 2 3 ==> 3 2 1
+	and.l	#3,%d2
+	beq	2f		| is aligned
+	|
+	sub.l	%d2,%d1		| fix length
+	lsr.l	#1,%d2		| word align needed?
+	bcc	1f
+	move.b	%d0,(%a0)+	| fill byte
+1:
+	lsr.l	#1,%d2		| long align needed?
+	bcc	2f
+	move.w	%d0,(%a0)+	| fill word
+2:
+	move.l	%d1,%d2		| number of long transfers (at least 3)
+	lsr.l	#2,%d2
+	subq.l	#1,%d2
+
+1:
+	move.l	%d0,(%a0)+	| fill long words
+.Llset:
+#if !defined (__mcoldfire__)
+	dbra	%d2,1b		| loop until done
+#else
+	subq.l	#1,%d2
+	bpl	1b
+#endif
+	and.l	#3,%d1		| residue byte transfers, fixed
+	move.l	(%sp)+,%d2	| restore d2
+	bra	.Lbset
+
+1:
+	move.b	%d0,(%a0)+	| fill residue bytes
+.Lbset:
+#if !defined (__mcoldfire__)
+	dbra	%d1,1b		| loop until done
+#else
+	subq.l	#1,%d1
+	bpl	1b
+#endif
+	move.l	4(%sp),%d0	| return value
+	rts


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]