This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX


On Fri, May 11, 2012 at 2:34 AM, Andreas Jaeger <aj@suse.de> wrote:
> On 05/11/2012 12:18 AM, Carlos O'Donell wrote:
>> On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
>> <carlos@systemhalted.org> ?wrote:
>>> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger<aj@suse.de> ?wrote:
>>>>
>>>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>>>> the multiarch code:
>>>>
>>>> 1. We should disable FAM4 if AVX is not available (due to disabled
>>>> ? ?OSXSAVE)
>>>>
>>>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>>>> ? ?from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>>>> ? ?revert the change for AVX. We really have to disable AVX if it's
>>>>
>>>> The issue is easy reproduceable under Xen for the reporter.
>>>>
>>>> Also, the two commits introduced YMM_Usable and removed its usage but
>>>> did not remove the code from the headers, I'm cleaning this up as well.
>>>>
>>>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>>>
>>>> Ok to commit?
>>>
>>> No.
>>>
>>> I actually think the code Ulrich wrote originally was almost correct,
>>> but missing this:
>>
>> If I had to fix it it would look like this:
>
> Thanks, this looks great - but misses the check for FMA4.
>
> Here's a combined patch that compiled fine for me on
> Linux/x86-64. Jeff, could you test it under Xen, please?
>
> Andreas
>
> 2012-05-11 ?Andreas Jaeger ?<aj@suse.de>
> ? ? ? ? ? ?Carlos O'Donell ?<carlos_odonell@mentor.com>
>
> ? ? ? ?[BZ #14059]
> ? ? ? ?* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
> ? ? ? ?Fix check for AVX, enable FMA4 only if it exists and if AVX is
> ? ? ? ?usable.
>
> ? ? ? ?* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Usable): Renamed
> ? ? ? ?from bit_YMM_usable.
> ? ? ? ?(bit_FMA4_Usable): New macro.
> ? ? ? ?(bit_XMM_state): New macro.
> ? ? ? ?(bit_YMM_state): New macro.
> ? ? ? ?(index_AVX_Usable): Renamed from index_YMM_Usable.
> ? ? ? ?(index_FMA4_Usable): New macro.
> ? ? ? ?(HAS_YMM_USABLE): Delete.
> ? ? ? ?(HAS_AVX): Use HAS_ARCH_FEATURE.
> ? ? ? ?(HAS_FMA4): Likewise.
>
> diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
> index 80527ec..bc8e1df 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.c
> +++ b/sysdeps/x86_64/multiarch/init-arch.c
> @@ -145,16 +145,24 @@ __init_cpu_features (void)
>
> ? if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
> ? ? {
> - ? ? ?/* Reset the AVX bit in case OSXSAVE is disabled. ?*/
> + ? ? ?/* Determine if AVX is usable. ?*/
> ? ? ? if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
> ? ? ? ? ?&& ({ unsigned int xcrlow;
> ? ? ? ? ? ? ? ?unsigned int xcrhigh;
> ? ? ? ? ? ? ? ?asm ("xgetbv"
> ? ? ? ? ? ? ? ? ? ? : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
> - ? ? ? ? ? ? ? (xcrlow & 6) == 6; }))
> - ? ? ? __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
> + ? ? ? ? ? ? ? (xcrlow & (bit_YMM_state | bit_XMM_state)) ==
> + ? ? ? ? ? ? ? ?(bit_YMM_state | bit_XMM_state); }))
> + ? ? ? __cpu_features.feature[index_AVX_Usable] = 1;
> ? ? }
>
> + ?/* FMA4 depends on AVX support. ?*/
> + ?if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
> + ? ? ?&& HAS_AVX)

Shouldn't both of these be index_FMA4_Usable?

> + ? ? ? __cpu_features.feature[index_AVX_Usable] = 1;
> + ?else
> + ? ? ? __cpu_features.feature[index_AVX_Usable] = 0;
> +
> ? __cpu_features.family = family;
> ? __cpu_features.model = model;
> ? atomic_write_barrier ();
> diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
> index 5054e46..2d02e2b 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.h
> +++ b/sysdeps/x86_64/multiarch/init-arch.h
> @@ -21,8 +21,10 @@
> ?#define bit_Prefer_SSE_for_memop ? ? ? (1 << 3)
> ?#define bit_Fast_Unaligned_Load ? ? ? ? ? ? ? ?(1 << 4)
> ?#define bit_Prefer_PMINUB_for_stringop (1 << 5)
> -#define bit_YMM_Usable ? ? ? ? ? ? ? ? (1 << 6)
> +#define bit_AVX_Usable ? ? ? ? ? ? ? ? (1 << 6)
> +#define bit_FMA4_Usable ? ? ? ? ? ? ? ? ? ? ? ?(1 << 7)
>
> +/* CPUID Feature flags. ?*/
> ?#define bit_SSE2 ? ? ? (1 << 26)
> ?#define bit_SSSE3 ? ? ?(1 << 9)
> ?#define bit_SSE4_1 ? ? (1 << 19)
> @@ -33,6 +35,10 @@
> ?#define bit_FMA ? ? ? ? ? ? ? ?(1 << 12)
> ?#define bit_FMA4 ? ? ? (1 << 16)
>
> +/* XCR0 Feature flags. ?*/
> +#define bit_XMM_state ?(1 << 1)
> +#define bit_YMM_state ?(2 << 1)
> +
> ?#ifdef __ASSEMBLER__
>
> ?# include <ifunc-defines.h>
> @@ -49,7 +55,8 @@
> ?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1*FEATURE_SIZE
> ?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1*FEATURE_SIZE
> ?# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
> -# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1*FEATURE_SIZE
> +# define index_AVX_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1*FEATURE_SIZE
> +# define index_FMA4_Usable ? ? ? ? ? ? FEATURE_INDEX_1*FEATURE_SIZE
>
> ?#else ?/* __ASSEMBLER__ */
>
> @@ -119,15 +126,14 @@ extern const struct cpu_features *__get_cpu_features (void)
> ?# define HAS_SSE4_1 ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
> ?# define HAS_SSE4_2 ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
> ?# define HAS_FMA ? ? ? HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
> -# define HAS_AVX ? ? ? HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
> -# define HAS_FMA4 ? ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
>
> ?# define index_Fast_Rep_String ? ? ? ? FEATURE_INDEX_1
> ?# define index_Fast_Copy_Backward ? ? ?FEATURE_INDEX_1
> ?# define index_Slow_BSF ? ? ? ? ? ? ? ? ? ? ? ?FEATURE_INDEX_1
> ?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1
> ?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1
> -# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1
> +# define index_AVX_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1
> +# define index_FMA4_Usable ? ? ? ? ? ? FEATURE_INDEX_1
>
> ?# define HAS_ARCH_FEATURE(name) \
> ? ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
> @@ -142,6 +148,8 @@ extern const struct cpu_features *__get_cpu_features (void)
>
> ?# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
>
> -# define HAS_YMM_USABLE ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE (YMM_Usable)
> +# define HAS_AVX ? ? ? ? ? ? ? HAS_ARCH_FEATURE (AVX_Usable)
> +
> +# define HAS_FMA4 ? ? ? ? ? ? ?HAS_ARCH_FEATURE (FMA4_Usable)
>
> ?#endif /* __ASSEMBLER__ */
>
> --
> ?Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
> ?SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> ? GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
> ? ?GPG fingerprint = 93A3 365E CE47 B889 DF7F ?FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]