This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 1/2 V2] Append AVX2 instruction detection
- From: ling dot ma dot program at gmail dot com
- To: libc-alpha at sourceware dot org
- Cc: rth at twiddle dot net, Yao sihai <sihai dot ysh at alibaba-inc dot com>
- Date: Sun, 7 Jul 2013 23:36:55 -0400
- Subject: [PATCH 1/2 V2] Append AVX2 instruction detection
From: Yao sihai <sihai.ysh@alibaba-inc.com>
This patch appends support to detect AVX2 instructions
on CPU model bigger or equal to 0x3c
Thanks
Sihai
---
In this version we updated original one according to Richard Henderson
sysdeps/x86_64/multiarch/init-arch.c | 11 +++++++++++
sysdeps/x86_64/multiarch/init-arch.h | 11 +++++++++++
2 files changed, 22 insertions(+)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index fed5ab8..4c7ad82 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -96,6 +96,7 @@ __init_cpu_features (void)
case 0x2c:
case 0x2e:
case 0x2f:
+ case 0x3c:
/* Rep string instructions, copy backward, unaligned loads
and pminub are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
@@ -165,6 +166,16 @@ __init_cpu_features (void)
}
}
+ /* Can we get more features? */
+ if (__cpu_features.max_cpuid >= 7)
+ __cpuid_count (7, 0,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
+ if (CPUID_AVX2)
+ __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
+
__cpu_features.family = family;
__cpu_features.model = model;
atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index f33f1c8..83f7c05 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -24,6 +24,7 @@
#define bit_AVX_Usable (1 << 6)
#define bit_FMA_Usable (1 << 7)
#define bit_FMA4_Usable (1 << 8)
+#define bit_AVX2_Usable (1 << 9)
/* CPUID Feature flags. */
#define bit_SSE2 (1 << 26)
@@ -35,6 +36,7 @@
#define bit_POPCOUNT (1 << 23)
#define bit_FMA (1 << 12)
#define bit_FMA4 (1 << 16)
+#define bit_AVX2 (1 << 5)
/* XCR0 Feature flags. */
#define bit_XMM_state (1 << 1)
@@ -49,6 +51,7 @@
# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
@@ -59,6 +62,7 @@
# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable FEATURE_INDEX_7*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -67,6 +71,7 @@
enum
{
COMMON_CPUID_INDEX_1 = 0,
+ COMMON_CPUID_INDEX_7,
COMMON_CPUID_INDEX_80000001, /* for AMD */
/* Keep the following line at the end. */
COMMON_CPUID_INDEX_MAX
@@ -75,6 +80,7 @@ enum
enum
{
FEATURE_INDEX_1 = 0,
+ FEATURE_INDEX_7,
/* Keep the following line at the end. */
FEATURE_INDEX_MAX
};
@@ -137,6 +143,8 @@ extern const struct cpu_features *__get_cpu_features (void)
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
# define CPUID_FMA4 \
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
+# define CPUID_AVX2 \
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
/* HAS_* evaluates to true if we may use the feature at runtime. */
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -144,6 +152,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
+# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
+
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
@@ -153,6 +163,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_AVX_Usable FEATURE_INDEX_1
# define index_FMA_Usable FEATURE_INDEX_1
# define index_FMA4_Usable FEATURE_INDEX_1
+# define index_AVX2_Usable FEATURE_INDEX_7
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
--
1.8.1.4