This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] [x86_32] Don't use SSE4_2 instructions on Intel Silvermont Micro Architecture.
- From: "Carlos O'Donell" <carlos at redhat dot com>
- To: Liubov Dmitrieva <liubov dot dmitrieva at gmail dot com>
- Cc: GNU C Library <libc-alpha at sourceware dot org>
- Date: Sun, 30 Jun 2013 16:35:06 -0400
- Subject: Re: [PATCH] [x86_32] Don't use SSE4_2 instructions on Intel Silvermont Micro Architecture.
- References: <CAHjhQ93YUtMz14Aj7JfhJ7Tq++eC+23cA6yubh1KPRpKTvvkaw at mail dot gmail dot com>
On 06/30/2013 03:57 PM, Liubov Dmitrieva wrote:
> Same patch as recently committed but for 32 bit.
> Attached performance results for current bench glibc test suite.
> Hopefully I caught a moment before the freeze of 2.18.
To make life easier for the reviewer could you please summarize
the performance differences?
> 2013-06-30 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
>
> * sysdeps/i386/i686/multiarch/memcmp.S: Skip SSE4_2
> version if bit_Slow_SSE4_2 is set.
> * sysdeps/i386/i686/multiarch/strcmp.S: Likewise.
> * sysdeps/i386/i686/multiarch/strncase.S: Likewise.
> * sysdeps/i386/i686/multiarch/strcasecmp.S: Likewise.
> * sysdeps/i386/i686/multiarch/wmemcmp.S: Likewise.
>
> --
> Liubov
>
>
> silvermont2.patch
>
>
> diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
> index 73d1363..8f4b38e 100644
> --- a/sysdeps/i386/i686/multiarch/memcmp.S
> +++ b/sysdeps/i386/i686/multiarch/memcmp.S
> @@ -40,6 +40,8 @@ ENTRY(memcmp)
> leal __memcmp_ssse3@GOTOFF(%ebx), %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> + jnz 2f
> leal __memcmp_sse4_2@GOTOFF(%ebx), %eax
> 2: popl %ebx
> cfi_adjust_cfa_offset (-4)
OK.
> @@ -59,6 +61,8 @@ ENTRY(memcmp)
> leal __memcmp_ssse3, %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> + jnz 2f
> leal __memcmp_sse4_2, %eax
> 2: ret
> END(memcmp)
OK.
> diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
> index 3b38214..79a154e 100644
> --- a/sysdeps/i386/i686/multiarch/strcasecmp.S
> +++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
> @@ -37,6 +37,8 @@ ENTRY(__strcasecmp)
> leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> + jnz 2f
> leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax
> 2: popl %ebx
> cfi_adjust_cfa_offset (-4)
OK.
> @@ -58,6 +60,8 @@ ENTRY(__strcasecmp)
> // XXX Temporarily
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> + jnz 2f
> leal __strcasecmp_sse4_2, %eax
> #endif
> 2: ret
You're adding code to an #ifdef'd out block, which is OK,
but I'd like to know why it's disabled.
Could you please find the history behind this?
> diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
> index 7dc2cef..41dd3b3 100644
> --- a/sysdeps/i386/i686/multiarch/strcmp.S
> +++ b/sysdeps/i386/i686/multiarch/strcmp.S
> @@ -68,6 +68,8 @@ ENTRY(STRCMP)
> leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> + jnz 2f
> leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
> 2: popl %ebx
> cfi_adjust_cfa_offset (-4)
OK.
> @@ -87,6 +89,8 @@ ENTRY(STRCMP)
> leal __STRCMP_SSSE3, %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> + jnz 2f
> leal __STRCMP_SSE4_2, %eax
> 2: ret
> END(STRCMP)
OK.
> diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
> index 51c6d72..4045f71 100644
> --- a/sysdeps/i386/i686/multiarch/strncase.S
> +++ b/sysdeps/i386/i686/multiarch/strncase.S
> @@ -37,6 +37,8 @@ ENTRY(__strncasecmp)
> leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> + jnz 2f
> leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax
> 2: popl %ebx
> cfi_adjust_cfa_offset (-4)
OK.
> @@ -58,6 +60,8 @@ ENTRY(__strncasecmp)
> // XXX Temporarily
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> + jnz 2f
> leal __strncasecmp_sse4_2, %eax
> #endif
> 2: ret
Same #ifdef'd out code block as above, OK, but I'd like to
know why it's disabled.
> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
> index e994038..e685a9f 100644
> --- a/sysdeps/i386/i686/multiarch/wmemcmp.S
> +++ b/sysdeps/i386/i686/multiarch/wmemcmp.S
> @@ -40,6 +40,8 @@ ENTRY(wmemcmp)
> leal __wmemcmp_ssse3@GOTOFF(%ebx), %eax
> testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
> jz 2f
> + testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> + jnz 2f
> leal __wmemcmp_sse4_2@GOTOFF(%ebx), %eax
> 2: popl %ebx
> cfi_adjust_cfa_offset (-4)
OK.
Please post a v2.
Cheers,
Carlos.