This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Minor performance improvement for AMD64 assembly routines


AMD x86-64 processors cannot predict single-byte near RET instructions
when they are either the target of a branch or immediately preceded by a
conditional branch. (See section 6.2 in the Athlon 64/Opteron
optimization guide)

AMD's recommended solution is to either reorder instructions so that
this situation does not occur, or use the REP prefix on the RET
instruction.

Recent versions of GCC will do this automatically, but glibc contains
many hand written assembly routines.

The attached patch changes all (that I found, anyway) relevant RET
instructions to REP RET.

-- 
Nicholas Miell <nmiell@comcast.net>
--- libc/sysdeps/x86_64/fpu/s_fmin.S.~1~	2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmin.S	2004-12-05 19:39:11.189901937 -0800
@@ -31,6 +31,6 @@
 	jp	2f		// then return xmm0
 	movsd	%xmm1, %xmm0	// otherwise return xmm1
 
-2:	ret
+2:	rep ; ret
 END(__fmin)
 weak_alias (__fmin, fmin)
--- libc/sysdeps/x86_64/fpu/s_expm1l.S.~1~	2002-09-09 18:19:47.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_expm1l.S	2004-12-05 19:36:22.586487310 -0800
@@ -78,7 +78,7 @@
 	jz	3f		// If positive, jump.
 	fstp	%st
 	fldl	MO(minus1)	// Set result to -1.0.
-3:	ret
+3:	rep ; ret
 END(__expm1l)
 libm_hidden_def (__expm1l)
 weak_alias (__expm1l, expm1l)
--- libc/sysdeps/x86_64/fpu/e_exp2l.S.~1~	2001-09-19 03:24:08.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/e_exp2l.S	2004-12-05 19:31:23.664212416 -0800
@@ -34,5 +34,5 @@
 	jz	2f			/* If positive, jump.  */
 	fstp	%st
 	fldz				/* Set result to 0.  */
-2:	ret
+2:	rep ; ret
 END (__ieee754_exp2l)
--- libc/sysdeps/x86_64/fpu/s_fmaxf.S.~1~	2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmaxf.S	2004-12-05 19:37:42.437792442 -0800
@@ -31,6 +31,6 @@
 	jp	2f		// then return xmm0
 	movss	%xmm1, %xmm0	// otherwise return xmm1
 
-2:	ret
+2:	rep ; ret
 END(__fmaxf)
 weak_alias (__fmaxf, fmaxf)
--- libc/sysdeps/x86_64/fpu/s_cosl.S.~1~	2001-09-19 03:24:08.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_cosl.S	2004-12-05 19:36:03.613166933 -0800
@@ -16,7 +16,7 @@
 	fnstsw	%ax
 	testl	$0x400,%eax
 	jnz	1f
-	ret
+	rep ; ret
 	.align ALIGNARG(4)
 1:	fldpi
 	fadd	%st(0)
--- libc/sysdeps/x86_64/fpu/s_fmax.S.~1~	2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmax.S	2004-12-05 19:38:31.746630572 -0800
@@ -31,6 +31,6 @@
 	jp	2f		// then return xmm0
 	movsd	%xmm1, %xmm0	// otherwise return xmm1
 
-2:	ret
+2:	rep ; ret
 END(__fmax)
 weak_alias (__fmax, fmax)
--- libc/sysdeps/x86_64/fpu/s_fminf.S.~1~	2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fminf.S	2004-12-05 19:38:44.226552423 -0800
@@ -31,6 +31,6 @@
 	jp	2f		// then return xmm0
 	movss	%xmm1, %xmm0	// otherwise return xmm1
 
-2:	ret
+2:	rep ; ret
 END(__fminf)
 weak_alias (__fminf, fminf)
--- libc/sysdeps/x86_64/strchr.S.~1~	2003-04-29 15:47:18.000000000 -0700
+++ libc/sysdeps/x86_64/strchr.S	2004-12-05 19:49:02.562126710 -0800
@@ -283,8 +283,7 @@
 	incq %rax
 
 6:
-	nop
-	retq
+	rep ; retq
 END (BP_SYM (strchr))
 
 weak_alias (BP_SYM (strchr), BP_SYM (index))

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]