This is the mail archive of the
libc-ports@sources.redhat.com
mailing list for the libc-ports project.
[PATCH roland/arm-memcpy] ARM: Make multiarch memcpy always use NEON when compiler does
- From: Roland McGrath <roland at hack dot frob dot com>
- To: libc-ports at sourceware dot org
- Date: Mon, 13 May 2013 15:47:22 -0700 (PDT)
- Subject: [PATCH roland/arm-memcpy] ARM: Make multiarch memcpy always use NEON when compiler does
When the compiler is emitting NEON instructions anyway, there is no point
in using IFUNC when we can just use the NEON memcpy unconditionally.
Tested on armv7l-linux-gnueabihf with CC='gcc -mfpu=neon',
no check-abi failures, no regressions in 'make check subdirs=string'.
Thanks,
Roland
ports/ChangeLog.arm
2013-05-13 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/armv7/multiarch/memcpy.S [__ARM_NEON__]: Don't define
memcpy here, just __memcpy_arm and __aeabi_memcpy*.
* sysdeps/arm/armv7/multiarch/memcpy_neon.S [__ARM_NEON__]:
Define memcpy here, not __memcpy_neon.
* sysdeps/arm/armv7/multiarch/memcpy_vfp.S [__ARM_NEON__]:
Define nothing here.
* sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list) [__ARM_NEON__]: Don't list __memcpy_vfp;
use memcpy name for NEON implementation.
--- a/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+++ b/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
@@ -35,9 +35,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_NEON,
- __memcpy_neon)
+#ifdef __ARM_NEON__
+ memcpy
+#else
+ __memcpy_neon
+#endif
+ )
+#ifndef __ARM_NEON__
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_VFP,
__memcpy_vfp)
+#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
return i;
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy.S
@@ -22,27 +22,29 @@
#include <sysdep.h>
#include <rtld-global-offsets.h>
-#if !defined NOT_IN_libc
+#ifndef NOT_IN_libc
+/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */
+# ifndef __ARM_NEON__
.text
ENTRY(memcpy)
.type memcpy, %gnu_indirect_function
-#ifdef __SOFTFP__
+# ifdef __SOFTFP__
ldr r1, .Lmemcpy_arm
tst r0, #HWCAP_ARM_VFP
ldrne r1, .Lmemcpy_vfp
-#else
+# else
ldr r1, .Lmemcpy_vfp
-#endif
+# endif
tst r0, #HWCAP_ARM_NEON
ldrne r1, .Lmemcpy_neon
1:
add r0, r1, pc
DO_RET(lr)
-#ifdef __SOFTFP__
+# ifdef __SOFTFP__
.Lmemcpy_arm:
.long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS
-#endif
+# endif
.Lmemcpy_neon:
.long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS
.Lmemcpy_vfp:
@@ -51,6 +53,7 @@ ENTRY(memcpy)
END(memcpy)
libc_hidden_builtin_def (memcpy)
+#endif /* Not __ARM_NEON__. */
/* These versions of memcpy are defined not to clobber any VFP or NEON
registers so they must always call the ARM variant of the memcpy code. */
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S
@@ -1,3 +1,9 @@
+#ifdef __ARM_NEON__
+/* Under __ARM_NEON__, this file defines memcpy directly. */
+libc_hidden_builtin_def (memcpy)
+#else
+# define memcpy __memcpy_neon
+#endif
+
#define MEMCPY_NEON
-#define memcpy __memcpy_neon
#include "memcpy_impl.S"
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
@@ -1,3 +1,7 @@
-#define MEMCPY_VFP
-#define memcpy __memcpy_vfp
-#include "memcpy_impl.S"
+/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly
+ and the __memcpy_vfp code will never be used. */
+#ifndef __ARM_NEON__
+# define MEMCPY_VFP
+# define memcpy __memcpy_vfp
+# include "memcpy_impl.S"
+#endif