This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch roland/arm-memcpy created. glibc-2.17-823-gd5eebff
- From: roland at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 17 Jun 2013 18:12:23 -0000
- Subject: GNU C Library master sources branch roland/arm-memcpy created. glibc-2.17-823-gd5eebff
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, roland/arm-memcpy has been created
at d5eebff9fdd4cd6609e16eccf40a3499f8cef870 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d5eebff9fdd4cd6609e16eccf40a3499f8cef870
commit d5eebff9fdd4cd6609e16eccf40a3499f8cef870
Author: Roland McGrath <roland@hack.frob.com>
Date: Mon Jun 17 11:12:05 2013 -0700
unfinished
diff --git a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
index f83276a..902b650 100644
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
@@ -33,6 +33,7 @@
#define NO_THUMB
#endif
#include <sysdep.h>
+#include <arm-features.h>
.syntax unified
/* This implementation requires ARM state. */
@@ -73,6 +74,122 @@
#define dst ip
#define tmp2 r10
+/* These two macros both work by repeated invocation of the macro
+ dispatch_step (not defined here). That macro performs one "step",
+ doing one load instruction and one store instruction to copy one
+ "unit". On entry, TMP1 contains the number of bytes to be copied,
+ a multiple of the unit size. The macro clobbers TMP1 in the
+ process of doing a computed jump to the tail containing the
+ appropriate number of steps.
+
+ In dispatch_7_dword, dispatch_step is invoked seven times, with an
+ argument that is 7 for the first and 1 for the last. Units are
+ double-words (8 bytes). TMP1 is at most 56.
+
+ In dispatch_15_word, dispatch_step is invoked fifteen times,
+ with an argument that is 15 for the first and 1 for the last.
+ Units are words (4 bytes). TMP1 is at most 60. */
+
+#ifndef ARM_ALWAYS_BX
+# if ARM_BX_ALIGN_LOG2 != 2
+# error case not handled
+# endif
+ .macro dispatch_7_dword
+ rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE)
+ add pc, pc, tmp1
+ dispatch_step 7
+ dispatch_step 6
+ dispatch_step 5
+ dispatch_step 4
+ dispatch_step 3
+ dispatch_step 2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+
+ .macro dispatch_15_word
+ rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2)
+ add pc, pc, tmp1, lsl #1
+ dispatch_step 15
+ dispatch_step 14
+ dispatch_step 13
+ dispatch_step 12
+ dispatch_step 11
+ dispatch_step 10
+ dispatch_step 9
+ dispatch_step 8
+ dispatch_step 7
+ dispatch_step 6
+ dispatch_step 5
+ dispatch_step 4
+ dispatch_step 3
+ dispatch_step 2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+#else
+# if ARM_BX_ALIGN_LOG2 < 3
+# error case not handled
+# endif
+# error unfinished
+ .macro dispatch_7_dword
+ rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE)
+ add pc, pc, tmp1
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 7
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 6
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 5
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 4
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 3
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 2
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+
+ .macro dispatch_15_word
+ rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2)
+ add pc, pc, tmp1, lsl #1
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 15
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 14
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 13
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 12
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 11
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 10
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 9
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 8
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 7
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 6
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 5
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 4
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 3
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 2
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+
+#endif
+
#ifndef USE_NEON
/* For bulk copies using GP registers. */
#define A_l r2 /* Call-clobbered. */
@@ -140,23 +257,22 @@ ENTRY(memcpy)
.Ltail63unaligned:
#ifdef USE_NEON
- and tmp1, count, #0x38
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
- vld1.8 {d0}, [src]! /* 14 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 12 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 10 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 8 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 6 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 4 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 2 words to go. */
+ /* These need an extra layer of macro just to work around a
+ bug in the assembler's parser when an operand starts with
+ a {...}. */
+ .macro neon_load_d0 src
+ vld1.8 {d0}, [src]!
+ .endm
+ .macro neon_store_d0 dst
vst1.8 {d0}, [dst]!
+ .endm
+
+ and tmp1, count, #0x38
+ .macro dispatch_step i
+ sfi_breg src, neon_load_d0 src
+ sfi_breg dst, neon_store_d0 dst
+ .endm
+ dispatch_7_dword
tst count, #4
ldrne tmp1, [src], #4
@@ -167,47 +283,14 @@ ENTRY(memcpy)
and tmp1, count, #0x3c
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(60 - PC_OFS/2 + INSN_SIZE/2)
/* Jump directly into the sequence below at the correct offset. */
- add pc, pc, tmp1, lsl #1
-
- ldr tmp1, [src, #-60] /* 15 words to go. */
- str tmp1, [dst, #-60]
-
- ldr tmp1, [src, #-56] /* 14 words to go. */
- str tmp1, [dst, #-56]
- ldr tmp1, [src, #-52]
- str tmp1, [dst, #-52]
-
- ldr tmp1, [src, #-48] /* 12 words to go. */
- str tmp1, [dst, #-48]
- ldr tmp1, [src, #-44]
- str tmp1, [dst, #-44]
-
- ldr tmp1, [src, #-40] /* 10 words to go. */
- str tmp1, [dst, #-40]
- ldr tmp1, [src, #-36]
- str tmp1, [dst, #-36]
-
- ldr tmp1, [src, #-32] /* 8 words to go. */
- str tmp1, [dst, #-32]
- ldr tmp1, [src, #-28]
- str tmp1, [dst, #-28]
-
- ldr tmp1, [src, #-24] /* 6 words to go. */
- str tmp1, [dst, #-24]
- ldr tmp1, [src, #-20]
- str tmp1, [dst, #-20]
-
- ldr tmp1, [src, #-16] /* 4 words to go. */
- str tmp1, [dst, #-16]
- ldr tmp1, [src, #-12]
- str tmp1, [dst, #-12]
-
- ldr tmp1, [src, #-8] /* 2 words to go. */
- str tmp1, [dst, #-8]
- ldr tmp1, [src, #-4]
- str tmp1, [dst, #-4]
+ .macro dispatch_step i
+ sfi_breg src, \
+ ldr tmp1, [src, #-(\i * 4)]
+ sfi_breg dst, \
+ str tmp1, [dst, #-(\i * 4)]
+ .endm
+ dispatch_15_word
#endif
lsls count, count, #31
@@ -287,23 +370,13 @@ ENTRY(memcpy)
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
-
- vldr d0, [src, #-56] /* 14 words to go. */
- vstr d0, [dst, #-56]
- vldr d0, [src, #-48] /* 12 words to go. */
- vstr d0, [dst, #-48]
- vldr d0, [src, #-40] /* 10 words to go. */
- vstr d0, [dst, #-40]
- vldr d0, [src, #-32] /* 8 words to go. */
- vstr d0, [dst, #-32]
- vldr d0, [src, #-24] /* 6 words to go. */
- vstr d0, [dst, #-24]
- vldr d0, [src, #-16] /* 4 words to go. */
- vstr d0, [dst, #-16]
- vldr d0, [src, #-8] /* 2 words to go. */
- vstr d0, [dst, #-8]
+ .macro dispatch_step i
+ sfi_breg src, \
+ vldr d0, [src, #-(\i * 8)]
+ sfi_breg dst, \
+ vstr d0, [dst, #-(\i * 8)]
+ .endm
+ dispatch_7_dword
#else
sub src, src, #8
sub dst, dst, #8
@@ -349,24 +422,15 @@ ENTRY(memcpy)
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
- strd A_l, A_h, [dst, #-56]
- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
- strd A_l, A_h, [dst, #-48]
- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
- strd A_l, A_h, [dst, #-40]
- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
- strd A_l, A_h, [dst, #-32]
- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
- strd A_l, A_h, [dst, #-24]
- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
- strd A_l, A_h, [dst, #-16]
- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
- strd A_l, A_h, [dst, #-8]
-
+ .macro dispatch_step i
+ sfi_breg src, \
+ ldrd A_l, A_h, [src, #-(\i * 8)]
+ sfi_breg dst, \
+ strd A_l, A_h, [dst, #-(\i * 8)]
+ .endm
+ dispatch_7_dword
#endif
+
tst tmp2, #4
ldrne tmp1, [src], #4
strne tmp1, [dst], #4
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources