This is the mail archive of the
cygwin-developers
mailing list for the Cygwin project.
Re: Bad codegen in pthread_mutex causing 100% cpu spin loop related to inline asm ilockcmpexchg
- From: Dave Korn <dave dot korn dot cygwin at googlemail dot com>
- To: Dave Korn <dave dot korn dot cygwin at googlemail dot com>
- Cc: cygwin-developers at cygwin dot com
- Date: Thu, 28 May 2009 22:49:42 +0100
- Subject: Re: Bad codegen in pthread_mutex causing 100% cpu spin loop related to inline asm ilockcmpexchg
- References: <4A1EC4BC.9070104@gmail.com> <4A1EDCDB.8020005@gmail.com>
Dave Korn wrote:
> This new version changes the approach to use an "m" constraint to refer
> directly to the contents of *t, and not hope the compiler can infer the
> relationship between the address of t in operand 2 and the content of t in
> operand 1. It requires rewriting the instruction template, and will generate
> more different addressing modes than the original, which would only create
> register-indirect addressing:
>
> extern __inline__ long
> ilockcmpexch (volatile long *t, long v, long c)
> {
> register int __res;
> __asm__ __volatile__ ("\n\
> lock cmpxchgl %2,%1\n\
> ": "=a" (__res), "+m" (*t) : "q" (v), "0" (c) : "memory", "cc");
> return __res;
> }
Then I thought, why not go the whole hog and get rid of the matching
constraints altogether by making __res an in/out operand and passing 'c' in
it. So I tried this:
Index: winsup/cygwin/winbase.h
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/winbase.h,v
retrieving revision 1.14
diff -p -u -r1.14 winbase.h
--- winsup/cygwin/winbase.h 12 Jul 2008 18:09:17 -0000 1.14
+++ winsup/cygwin/winbase.h 28 May 2009 20:33:21 -0000
@@ -38,21 +38,21 @@ ilockdecr (volatile long *m)
extern __inline__ long
ilockexch (volatile long *t, long v)
{
- register int __res;
+ register long __res = *t;
__asm__ __volatile__ ("\n\
-1: lock cmpxchgl %3,(%1)\n\
+1: lock cmpxchgl %2,%1\n\
jne 1b\n\
- ": "=a" (__res), "=q" (t): "1" (t), "q" (v), "0" (*t): "cc");
+ ": "+a" (__res), "+m" (*t): "q" (v) : "memory", "cc");
return __res;
}
extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
- register int __res;
+ register long __res = c;
__asm__ __volatile__ ("\n\
- lock cmpxchgl %3,(%1)\n\
- ": "=a" (__res), "=q" (t) : "1" (t), "q" (v), "0" (c): "cc");
+ lock cmpxchgl %2,%1\n\
+ ": "+a" (__res), "+m" (*t) : "q" (v) : "memory", "cc");
return __res;
}
The ilockexch function proved interesting. There was some needless register
motion generated:
/* Second half of user shared initialization: Initialize content. */
void
user_shared_initialize ()
{
DWORD sversion = (DWORD) InterlockedExchange ((LONG *)
&user_shared->version, USER_VERSION_MAGIC);
translates into:
movl $779157505, %edx #, tmp65
movl _user_shared, %ecx # user_shared, D.25215
movl (%ecx), %ebx #* D.25215, __res
movl %ebx, %eax # __res,
/APP
# 42 "/gnu/winsup/src/winsup/cygwin/winbase.h" 1
1: lock cmpxchgl %edx,(%ecx) # tmp65,* D.25215
jne 1b
# 0 "" 2
/NO_APP
testl %eax, %eax # __res
movl %eax, %ebx #, __res
... where (%ecx) is moved via %ebx into %eax, and then %eax stored back into
%ebx. The compiler isn't clever enough to allocate __res and sversion both in
%eax and automatically satisfy the asm constraints, so it assigns __res to
%ebx and has to copy it back and forth. So I'm going to try this slight
modification:
extern __inline__ long
ilockexch (volatile long *t, long v)
{
register long __res __asm__ ("%eax") = *t;
__asm__ __volatile__ ("\n\
1: lock cmpxchgl %2,%1\n\
jne 1b\n\
": "+a" (__res), "+m" (*t): "q" (v) : "memory", "cc");
return __res;
}
extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
register long __res __asm ("%eax") = c;
__asm__ __volatile__ ("\n\
lock cmpxchgl %2,%1\n\
": "+a" (__res), "+m" (*t) : "q" (v) : "memory", "cc");
return __res;
}
which at least gets me this healthier-looking output:
movl $779157505, %ecx #, tmp66
movl _user_shared, %edx # user_shared, D.25216
movl (%edx), %eax #* D.25216, __res.0
1: lock cmpxchgl %ecx,(%edx) # tmp66,* D.25216
jne 1b
testl %eax, %eax # sversion
movl %eax, %ebx # __res, sversion
I'm running with this change in my local cygwin DLL now and I'll see how it
goes over the next few days.
cheers,
DaveK