This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: Need a way to test ifunc functions
- From: Petr Baudis <pasky at suse dot cz>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: GNU C Library <libc-alpha at sourceware dot org>,Ulrich Drepper <drepper at redhat dot com>
- Date: Fri, 12 Nov 2010 23:34:25 +0100
- Subject: Re: Need a way to test ifunc functions
- References: <AANLkTikgC41wZx9KLziunQJkhCdvSse_MiPxPJ=LTpSG@mail.gmail.com>
Hi!
On Wed, Nov 10, 2010 at 09:29:10AM -0800, H.J. Lu wrote:
> With IFUNC functions, glibc testsuite can only test one implementation.
> We need a way to test SSE2/SSSE3/SSE4 implementations on SSE4
> machines.
For similar purposes, I have written the patch below few weeks ago
(maybe not directly applicable to current Git anymore). I chose the
approach to allow function-specific override of ifunc cpuid resolution
in order to be able to also test performance impact for real-world
applications.
If there is some interest in having this upstream, it would be easy to
modify this to enable this at compile time optionally.
--8<--
This table describes environment switches used to toggle various
functions. The switches may be of several types:
* Boolean - value "0" for false/disabled, non-zero for true
* CPUID - may be used to override cpuid(1) registers by given
values. For nice description of these registers, see e.g.
http://www.flounder.com/cpuid_explorer2.htm#CPUID%281%29
Of particular interest are these bits:
(DX) SSE2 (1 << 26)
(CX) SSSE3 (1 << 9)
(CX) SSE4_1 (1 << 19)
(CX) SSE4_2 (1 << 20)
Therefore, e.g. _IFUNC_CPUID_CX=0x100200 will enable SSSE3
and SSE4.1, but disable SSE4.2. SSE2 bit value (in the DX
register) will be left as-is (set based on current CPU).
Note that the patch currently modifies only x86_64 functions to take
heed of the function-specific switches.
The generic environment variables are as follows:
_IFUNC_CPUID_CX CPUID See above. Impacts all functions.
_IFUNC_CPUID_DX CPUID See above. Impacts all functions.
_IFUNC_FASTREP Boolean Toggles Fast_Rep_String. Assume
rep-instructions are fast, tuned for
i3/i5/i7. Impacts i686 version of
bcopy(), bzero(), memcpy(), memmove(),
mempcpy(), memset().
_IFUNC_FASTCPBACK Boolean Toggles Fast_Copy_Backward. Assume
copy backwards is fast, tuned for
i3/i5/i7. Impacts x86_64 version of
memcpy() and mempcpy().
_IFUNC_SLOWBSF Boolean Toggles Slow_BSF. (Instruction Bit
Scan Forward.) Tuned for Atom. Impacts
x86_64 and i686 version of strlen().
Furthermore, functions-pecific environment variables are available.
All the variables before can be used, just with the function-specific
prefix listed below. E.g., you can use _IFUNC_STRCPY_CPUID_DX to change
the cpuid setup of DX just for the strcpy family of functions.
_IFUNC_STRCPY_* strcpy(), strncpy(), stpcpy(), stpncpy(),
memcpy(), mempcpy(), memmove()
_IFUNC_STRCMP_* strcmp(), strncmp(), memcmp(), strcasecmp_l(),
strncasecmp_l()
_IFUNC_STRSPN_* strspn(), strcspn(), strpbrk()
_IFUNC_STRCHR_* strchr(), strrchr(), rawmemchr()
_IFUNC_STRLEN_* strlen()
_IFUNC_STRSTR_* strstr(), strcasestr(), strcmp(), strncmp(),
memcmp()
diff --git a/sysdeps/unix/sysv/linux/init-first.c b/sysdeps/unix/sysv/linux/init-first.c
index 7b2333d..6b8f3ff 100644
--- a/sysdeps/unix/sysv/linux/init-first.c
+++ b/sysdeps/unix/sysv/linux/init-first.c
@@ -28,6 +28,7 @@
#include <libc-internal.h>
#include <ldsodefs.h>
+#include "multiarch/init-arch.h"
/* Set nonzero if we have to be prepared for more then one libc being
used in the process. Safe assumption if initializer never runs. */
@@ -74,6 +75,9 @@ _init (int argc, char **argv, char **envp)
__libc_argv = argv;
__environ = envp;
+ /* After setting up __environ, force __cpu_features reset. */
+ __cpu_features.kind = arch_kind_unknown;
+
#ifndef SHARED
__libc_init_secure ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 786466d..f49549f 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -19,11 +19,19 @@
02111-1307 USA. */
#include <atomic.h>
+#include <stdlib.h>
#include <cpuid.h>
+#include <unistd.h>
#include "init-arch.h"
struct cpu_features __cpu_features attribute_hidden;
+struct cpu_features __cpu_features_strcpy attribute_hidden;
+struct cpu_features __cpu_features_strstr attribute_hidden;
+struct cpu_features __cpu_features_strchr attribute_hidden;
+struct cpu_features __cpu_features_strlen attribute_hidden;
+struct cpu_features __cpu_features_strspn attribute_hidden;
+struct cpu_features __cpu_features_strcmp attribute_hidden;
static void
@@ -39,6 +47,25 @@ get_common_indeces (unsigned int *family, unsigned int *model)
*model = (eax >> 4) & 0x0f;
}
+char *
+bare_getenv(const char *name)
+{
+ char **ep;
+ if (!__environ) return NULL;
+ for (ep = __environ; *ep != NULL; ++ep)
+ {
+ const char *np = name; char *npe = *ep;
+ for (; *np && *npe && *npe != '='; np++, npe++)
+ if (*np != *npe)
+ goto next;
+ if (*np || *npe != '=')
+ goto next;
+ return npe + 1;
+next:;
+ }
+ return NULL;
+}
+
void
__init_cpu_features (void)
@@ -109,6 +136,41 @@ __init_cpu_features (void)
__cpu_features.model = model;
atomic_write_barrier ();
__cpu_features.kind = kind;
+
+ /* _Now_, we can safely call getenv(). */
+ const char *env;
+ if ((env = bare_getenv("_IFUNC_CPUID_CX")))
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx = strtol(env, NULL, 16);
+ if ((env = bare_getenv("_IFUNC_CPUID_DX")))
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx = strtol(env, NULL, 16);
+#define feattog(FEAT, ID, NAME) \
+ if ((env = bare_getenv("_IFUNC_" # NAME))) \
+ { \
+ if (*env == '0') \
+ FEAT.feature[index_##ID] &= ~bit_##ID; \
+ else \
+ FEAT.feature[index_##ID] |= bit_##ID; \
+ }
+ feattog(__cpu_features, Fast_Rep_String, FASTREP);
+ feattog(__cpu_features, Fast_Copy_Backward, FASTCPBACK);
+ feattog(__cpu_features, Slow_BSF, SLOWBSF);
+
+#define featfor(FEAT, NAME) \
+ memcpy(&FEAT, &__cpu_features, sizeof(FEAT)); \
+ if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_CX"))) \
+ FEAT.cpuid[COMMON_CPUID_INDEX_1].ecx = strtol(env, NULL, 16); \
+ if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_DX"))) \
+ FEAT.cpuid[COMMON_CPUID_INDEX_1].edx = strtol(env, NULL, 16); \
+ feattog(FEAT, Fast_Rep_String, FASTREP); \
+ feattog(FEAT, Fast_Copy_Backward, FASTCPBACK); \
+ feattog(FEAT, Slow_BSF, SLOWBSF);
+
+ featfor(__cpu_features_strcpy, STRCPY);
+ featfor(__cpu_features_strstr, STRSTR);
+ featfor(__cpu_features_strchr, STRCHR);
+ featfor(__cpu_features_strspn, STRSPN);
+ featfor(__cpu_features_strcmp, STRCMP);
+ featfor(__cpu_features_strlen, STRLEN);
}
#undef __get_cpu_features
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 783b020..b3ef9da 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -77,6 +77,14 @@ extern struct cpu_features
unsigned int model;
unsigned int feature[FEATURE_INDEX_MAX];
} __cpu_features attribute_hidden;
+#define __cpu_features_common __cpu_features
+
+extern struct cpu_features __cpu_features_strcpy attribute_hidden;
+extern struct cpu_features __cpu_features_strstr attribute_hidden;
+extern struct cpu_features __cpu_features_strchr attribute_hidden;
+extern struct cpu_features __cpu_features_strlen attribute_hidden;
+extern struct cpu_features __cpu_features_strspn attribute_hidden;
+extern struct cpu_features __cpu_features_strcmp attribute_hidden;
extern void __init_cpu_features (void) attribute_hidden;
@@ -91,35 +99,37 @@ extern const struct cpu_features *__get_cpu_features (void)
__attribute__ ((const));
# ifndef NOT_IN_libc
-# define __get_cpu_features() (&__cpu_features)
+# define __get_cpu_features(family) (&__cpu_features_ ## family)
+# else
+# define __get_cpu_features(family) __get_cpu_features()
# endif
-# define HAS_CPU_FEATURE(idx, reg, bit) \
- ((__get_cpu_features ()->cpuid[idx].reg & (1 << (bit))) != 0)
+# define HAS_CPU_FEATURE(family, idx, reg, bit) \
+ ((__get_cpu_features(family)->cpuid[idx].reg & (1 << (bit))) != 0)
/* Following are the feature tests used throughout libc. */
-# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, 26)
-# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 23)
-# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 9)
-# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
-# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
-# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_SSE2(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, edx, 26)
+# define HAS_POPCOUNT(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 23)
+# define HAS_SSSE3(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 9)
+# define HAS_SSE4_1(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 19)
+# define HAS_SSE4_2(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 20)
+# define HAS_FMA(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 12)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
-#define HAS_ARCH_FEATURE(idx, bit) \
- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
+#define HAS_ARCH_FEATURE(family, idx, bit) \
+ ((__get_cpu_features (family)->feature[idx] & (bit)) != 0)
-#define HAS_FAST_REP_STRING \
- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
+#define HAS_FAST_REP_STRING(family) \
+ HAS_ARCH_FEATURE (family, index_Fast_Rep_String, bit_Fast_Rep_String)
-#define HAS_FAST_COPY_BACKWARD \
- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
+#define HAS_FAST_COPY_BACKWARD(family) \
+ HAS_ARCH_FEATURE (family, index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
-#define HAS_SLOW_BSF \
- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+#define HAS_SLOW_BSF(family) \
+ HAS_ARCH_FEATURE (family, index_Slow_BSF, bit_Slow_BSF)
#endif /* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
index 301ab28..679ae7a 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp.S
@@ -26,11 +26,11 @@
.text
ENTRY(memcmp)
.type memcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ cmpl $0, KIND_OFFSET+__cpu_features_strcmp(%rip)
jne 1f
call __init_cpu_features
1: leaq __memcmp_sse2(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ testl $bit_SSE4_1, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_1(%rip)
jz 2f
leaq __memcmp_sse4_1(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8e9fb19..9b15c53 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -28,14 +28,14 @@
.text
ENTRY(memcpy)
.type memcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip)
jne 1f
call __init_cpu_features
1: leaq __memcpy_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq __memcpy_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
jz 2f
leaq __memcpy_ssse3_back(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 948f61c..797c44d 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -29,14 +29,14 @@
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip)
jne 1f
call __init_cpu_features
1: leaq __memcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq __memcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
jz 2f
leaq __memcpy_chk_ssse3_back(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 3798627..0b383f0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -38,8 +38,8 @@ extern __typeof (memmove) __memmove_ssse3_back attribute_hidden;
#ifndef NOT_IN_libc
libc_ifunc (memmove,
- HAS_SSSE3
- ? (HAS_FAST_COPY_BACKWARD
+ HAS_SSSE3(strcpy)
+ ? (HAS_FAST_COPY_BACKWARD(strcpy)
? __memmove_ssse3_back : __memmove_ssse3)
: __memmove_sse2);
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 962501d..05939c8 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -29,7 +29,7 @@ extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
#include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
- HAS_SSSE3
- ? (HAS_FAST_COPY_BACKWARD
+ HAS_SSSE3(strcpy)
+ ? (HAS_FAST_COPY_BACKWARD(strcpy)
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2);
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index e8152d6..ce70cfa 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -27,14 +27,14 @@
#if defined SHARED && !defined NOT_IN_libc
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip)
jne 1f
call __init_cpu_features
1: leaq __mempcpy_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq __mempcpy_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
jz 2f
leaq __mempcpy_ssse3_back(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 024c775..cf7df6e 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -29,14 +29,14 @@
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip)
jne 1f
call __init_cpu_features
1: leaq __mempcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq __mempcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
jz 2f
leaq __mempcpy_chk_ssse3_back(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
index 2a8a690..c866088 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr.S
@@ -26,11 +26,11 @@
.text
ENTRY(rawmemchr)
.type rawmemchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq __rawmemchr_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __rawmemchr_sse42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c
index cfecf9b..39e940e 100644
--- a/sysdeps/x86_64/multiarch/s_fma.c
+++ b/sysdeps/x86_64/multiarch/s_fma.c
@@ -34,7 +34,7 @@ __fma_fma (double x, double y, double z)
return x;
}
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+libm_ifunc (__fma, HAS_FMA(common) ? __fma_fma : __fma_sse2);
weak_alias (__fma, fma)
# define __fma __fma_sse2
diff --git a/sysdeps/x86_64/multiarch/s_fmaf.c b/sysdeps/x86_64/multiarch/s_fmaf.c
index de1c4b6..a70836f 100644
--- a/sysdeps/x86_64/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/multiarch/s_fmaf.c
@@ -33,7 +33,7 @@ __fmaf_fma (float x, float y, float z)
return x;
}
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+libm_ifunc (__fmaf, HAS_FMA(common) ? __fmaf_fma : __fmaf_sse2);
weak_alias (__fmaf, fmaf)
# define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index fde6dcc..bc5335f 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -34,4 +34,4 @@
#undef __sched_cpucount
libc_ifunc (__sched_cpucount,
- HAS_POPCOUNT ? popcount_cpucount : generic_cpucount);
+ HAS_POPCOUNT(common) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/multiarch/strcasestr-c.c b/sysdeps/x86_64/multiarch/strcasestr-c.c
index 551492d..577f876 100644
--- a/sysdeps/x86_64/multiarch/strcasestr-c.c
+++ b/sysdeps/x86_64/multiarch/strcasestr-c.c
@@ -9,7 +9,7 @@ extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
#if 1
libc_ifunc (__strcasestr,
- HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2);
+ HAS_SSE4_2(strstr) ? __strcasestr_sse42 : __strcasestr_sse2);
#else
libc_ifunc (__strcasestr,
0 ? __strcasestr_sse42 : __strcasestr_sse2);
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index 27eead9..f2f8db8 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -26,11 +26,11 @@
.text
ENTRY(strchr)
.type strchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq __strchr_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strchr_sse42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index 54e5bbc..661cf76 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -86,15 +86,15 @@
.text
ENTRY(STRCMP)
.type STRCMP, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1:
leaq STRCMP_SSE42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
jnz 2f
leaq STRCMP_SSSE3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
jnz 2f
leaq STRCMP_SSE2(%rip), %rax
2: ret
@@ -103,15 +103,15 @@ END(STRCMP)
# ifdef USE_AS_STRCASECMP_L
ENTRY(__strcasecmp)
.type __strcasecmp, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1:
leaq __strcasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
jnz 2f
leaq __strcasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
jnz 2f
leaq __strcasecmp_sse2(%rip), %rax
2: ret
@@ -121,15 +121,15 @@ weak_alias (__strcasecmp, strcasecmp)
# ifdef USE_AS_STRNCASECMP_L
ENTRY(__strncasecmp)
.type __strncasecmp, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1:
leaq __strncasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
jnz 2f
leaq __strncasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
jnz 2f
leaq __strncasecmp_sse2(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 02fa8d0..bd4408d 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -59,11 +59,11 @@
.text
ENTRY(STRCPY)
.type STRCPY, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strcpy+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq STRCPY_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq STRCPY_SSSE3(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
index f00d52c..7ad325d 100644
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ b/sysdeps/x86_64/multiarch/strcspn.S
@@ -45,11 +45,11 @@
.text
ENTRY(STRCSPN)
.type STRCSPN, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strspn+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq STRCSPN_SSE2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq STRCSPN_SSE42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
index 83a88ec..d5d251c 100644
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ b/sysdeps/x86_64/multiarch/strlen.S
@@ -29,15 +29,15 @@
.text
ENTRY(strlen)
.type strlen, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strlen+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq __strlen_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strlen+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strlen_sse42(%rip), %rax
ret
-2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+2: testl $bit_Slow_BSF, __cpu_features_strlen+FEATURE_OFFSET+index_Slow_BSF(%rip)
jz 3f
leaq __strlen_no_bsf(%rip), %rax
3: ret
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 0d17fdb..51728da 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -28,11 +28,11 @@
.text
ENTRY(strrchr)
.type strrchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq __strrchr_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strrchr_sse42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S
index 1c56626..9a72bfa 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/strspn.S
@@ -30,11 +30,11 @@
.text
ENTRY(strspn)
.type strspn, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ cmpl $0, __cpu_features_strspn+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq __strspn_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ testl $bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strspn_sse42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strstr-c.c b/sysdeps/x86_64/multiarch/strstr-c.c
index b8ed316..677dc9a 100644
--- a/sysdeps/x86_64/multiarch/strstr-c.c
+++ b/sysdeps/x86_64/multiarch/strstr-c.c
@@ -12,4 +12,4 @@
extern char *__strstr_sse42 (const char *, const char *) attribute_hidden;
extern __typeof (__strstr_sse2) __strstr_sse2 attribute_hidden;
-libc_ifunc (strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2);
+libc_ifunc (strstr, HAS_SSE4_2(strstr) ? __strstr_sse42 : __strstr_sse2);