This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 04/11] Add elision to pthread_mutex_{try,timed,un,}lock
- From: Torvald Riegel <triegel at redhat dot com>
- To: Andi Kleen <andi at firstfloor dot org>
- Cc: libc-alpha at sourceware dot org, Andi Kleen <ak at linux dot jf dot intel dot com>
- Date: Thu, 13 Jun 2013 18:27:58 +0200
- Subject: Re: [PATCH 04/11] Add elision to pthread_mutex_{try,timed,un,}lock
- References: <1370969416-8337-1-git-send-email-andi at firstfloor dot org> <1370969416-8337-5-git-send-email-andi at firstfloor dot org>
On Tue, 2013-06-11 at 09:50 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add elision paths to the basic mutex locks.
>
> The normal path has a check for RTM and upgrades the lock
> to RTM when available. Trylocks cannot automatically upgrade,
> so they check for elision every time.
>
> We use a 4 byte value in the mutex to store the lock
> elision adaptation state. This is separate from the adaptive
> spin state and uses a separate field.
>
> Condition variables currently do not support elision.
>
> Recursive mutexes and condition variables may be supported at some point,
> but are not in the current implementation. Also "trylock" will
> not automatically enable elision unless some other lock call
> has been already called on the lock.
>
> This version does not use IFUNC, so it means every lock has one
> additional check for elision. Benchmarking showed the overhead
> to be negligible.
>
> 2013-05-16 Andi Kleen <ak@linux.intel.com>
> Hongjiu Lu <hongjiu.lu@intel.com>
>
> * pthread_mutex_lock.c (adaptive_lock): Add
> (__pthread_mutex_lock): Add lock elision support.
> * pthread_mutex_timedlock.c (pthread_mutex_timedlock): dito.
> * pthread_mutex_trylock.c (__pthread_mutex_trylock): dito.
> * pthread_mutex_unlock.c (__pthread_mutex_unlock_usercnt): dito.
> * sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c: dito.
> * sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h: dito.
> * sysdeps/unix/sysv/linux/x86/Makefile: New file.
> * sysdeps/unix/sysv/linux/x86/force-elision.h: New file
> * sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c: dito.
> * sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c: dito.
> * sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c: dito.
> * sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c: dito.
> * sysdeps/unix/sysv/linux/x86/pthread_mutex_unlock.c: dito.
> * pthreadP.h: (PTHREAD_MUTEX_UPGRADED_ELISION_NP): Add.
> ---
> nptl/pthread_mutex_lock.c | 127 ++++++++++++++++-----
> nptl/pthread_mutex_timedlock.c | 43 ++++++-
> nptl/pthread_mutex_trylock.c | 46 +++++++-
> nptl/pthread_mutex_unlock.c | 26 ++++-
> nptl/sysdeps/pthread/pthread.h | 1 +
> .../unix/sysv/linux/pthread_mutex_cond_lock.c | 7 ++
> .../unix/sysv/linux/x86/bits/pthreadtypes.h | 13 ++-
> nptl/sysdeps/unix/sysv/linux/x86/force-elision.h | 34 ++++++
> .../unix/sysv/linux/x86/pthread_mutex_cond_lock.c | 5 +
> .../unix/sysv/linux/x86/pthread_mutex_lock.c | 23 ++++
> .../unix/sysv/linux/x86/pthread_mutex_timedlock.c | 23 ++++
> .../unix/sysv/linux/x86/pthread_mutex_trylock.c | 23 ++++
> .../unix/sysv/linux/x86/pthread_mutex_unlock.c | 2 +
> 13 files changed, 336 insertions(+), 37 deletions(-)
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/force-elision.h
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_unlock.c
>
> diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
> index fbedfd7..4b0607d 100644
> --- a/nptl/pthread_mutex_lock.c
> +++ b/nptl/pthread_mutex_lock.c
> @@ -25,6 +25,14 @@
> #include <lowlevellock.h>
> #include <stap-probe.h>
>
> +#ifndef lll_lock_elision
> +#define lll_lock_elision(lock, try_lock, private) ({ \
> + lll_lock (lock, private); 0; })
> +#endif
> +
> +#ifndef lll_trylock_elision
> +#define lll_trylock_elision(a,t,u) lll_trylock(a)
> +#endif
>
> #ifndef LLL_MUTEX_LOCK
> # define LLL_MUTEX_LOCK(mutex) \
> @@ -34,12 +42,53 @@
> # define LLL_ROBUST_MUTEX_LOCK(mutex, id) \
> lll_robust_lock ((mutex)->__data.__lock, id, \
> PTHREAD_ROBUST_MUTEX_PSHARED (mutex))
> +# define LLL_MUTEX_LOCK_ELISION(mutex) \
> + lll_lock_elision ((mutex)->__data.__lock, (mutex)->__data.__elision, \
> + PTHREAD_MUTEX_PSHARED (mutex))
> +# define LLL_MUTEX_TRYLOCK_ELISION(mutex) \
> + lll_trylock_elision((mutex)->__data.__lock, (mutex)->__data.__elision, \
> + PTHREAD_MUTEX_PSHARED (mutex))
> +#endif
> +
> +#ifndef ENABLE_ELISION
> +#define ENABLE_ELISION 0
> #endif
>
> +#ifndef FORCE_ELISION
> +#define FORCE_ELISION(m, s)
> +#endif
>
> static int __pthread_mutex_lock_full (pthread_mutex_t *mutex)
> __attribute_noinline__;
>
> +static inline __attribute__((always_inline)) void
> +adaptive_lock (pthread_mutex_t *mutex)
> +{
> + if (! __is_smp)
> + return;
How can this be correct? Depending on __is_smp, we either return with
an acquired lock or without. The original code called the following in
case of ! __is_smp:
/* Normal mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
> + if (LLL_MUTEX_TRYLOCK (mutex) != 0)
> + {
> + int cnt = 0;
> + int max_cnt = MIN (MAX_ADAPTIVE_COUNT, mutex->__data.__spins * 2 + 10);
> + do
> + {
> + if (cnt++ >= max_cnt)
> + {
> + LLL_MUTEX_LOCK (mutex);
> + break;
> + }
> +
> +#ifdef BUSY_WAIT_NOP
> + BUSY_WAIT_NOP;
> +#endif
> + }
> + while (LLL_MUTEX_TRYLOCK (mutex) != 0);
> +
> + mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
> + }
> + assert (mutex->__data.__owner == 0);
> +}
> +
>
> int
> __pthread_mutex_lock (mutex)
> @@ -47,26 +96,43 @@ __pthread_mutex_lock (mutex)
> {
> assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
>
> - unsigned int type = PTHREAD_MUTEX_TYPE (mutex);
> + unsigned int type = PTHREAD_MUTEX_TYPE_EL (mutex);
>
> LIBC_PROBE (mutex_entry, 1, mutex);
>
> - if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0))
> + if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP
> + | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
> return __pthread_mutex_lock_full (mutex);
>
> pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
>
> - if (__builtin_expect (type, PTHREAD_MUTEX_TIMED_NP)
> - == PTHREAD_MUTEX_TIMED_NP)
> + /* A switch would be likely faster */
formatting. Want to add a FIXME or similar too?
> +
> + if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_NP, 1))
> {
> + FORCE_ELISION (mutex, goto elision);
> simple:
> /* Normal mutex. */
> LLL_MUTEX_LOCK (mutex);
> assert (mutex->__data.__owner == 0);
> }
> + else if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_ELISION_NP, 1))
> + {
> + elision: __attribute__((unused))
> + if (!ENABLE_ELISION)
> + {
> + mutex->__data.__kind &= ~PTHREAD_MUTEX_ELISION_NP;
> + goto simple;
> + }
> + /* Don't record owner or users for elision case. */
> + int ret = LLL_MUTEX_LOCK_ELISION (mutex);
> + assert (mutex->__data.__owner == 0);
> + return ret;
> + }
> else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
> {
> /* Recursive mutex. */
> + recursive:
>
> /* Check whether we already hold the mutex. */
> if (mutex->__data.__owner == id)
> @@ -89,35 +155,40 @@ __pthread_mutex_lock (mutex)
> }
> else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
> {
> - if (! __is_smp)
> - goto simple;
> -
> - if (LLL_MUTEX_TRYLOCK (mutex) != 0)
> + FORCE_ELISION (mutex, goto elision_adaptive);
> + adaptive:
> + adaptive_lock (mutex);
> + }
> + else if (type == PTHREAD_MUTEX_TIMED_ELISION_NP)
> + goto elision;
> + else if (type == PTHREAD_MUTEX_TIMED_NO_ELISION_NP)
> + goto simple;
> + else if (type == PTHREAD_MUTEX_ADAPTIVE_NO_ELISION_NP)
> + goto adaptive;
> + else if (type == PTHREAD_MUTEX_ADAPTIVE_ELISION_NP)
> + {
> + elision_adaptive: __attribute__((unused))
> + if (!ENABLE_ELISION)
> {
> - int cnt = 0;
> - int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
> - mutex->__data.__spins * 2 + 10);
> - do
> - {
> - if (cnt++ >= max_cnt)
> - {
> - LLL_MUTEX_LOCK (mutex);
> - break;
> - }
> -
> -#ifdef BUSY_WAIT_NOP
> - BUSY_WAIT_NOP;
> -#endif
> - }
> - while (LLL_MUTEX_TRYLOCK (mutex) != 0);
> -
> - mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
> + mutex->__data.__kind &= ~PTHREAD_MUTEX_ELISION_NP;
> + goto adaptive;
> }
> - assert (mutex->__data.__owner == 0);
> + /* FIXME: This is a poor algorithm currently. */
Say in the comment why it's poor: It just tries elision once, and then
falls back to no-elision spinning.
> + if (!LLL_MUTEX_TRYLOCK_ELISION (mutex))
> + return 0;
> + adaptive_lock (mutex);
> + /* No owner for elision */
> + return 0;
> + }
> + else if (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_RECURSIVE_NP)
> + {
> + /* In case the user set the elision flags here.
> + Elision not supported so far. */
whitespace
> + goto recursive;
> }
> else
> {
> - assert (type == PTHREAD_MUTEX_ERRORCHECK_NP);
> + assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP);
> /* Check whether we already hold the mutex. */
> if (__builtin_expect (mutex->__data.__owner == id, 0))
> return EDEADLK;
> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
> index 3a36424..d0bc7a1 100644
> --- a/nptl/pthread_mutex_timedlock.c
> +++ b/nptl/pthread_mutex_timedlock.c
> @@ -25,6 +25,21 @@
>
> #include <stap-probe.h>
>
> +#ifndef lll_timedlock_elision
> +#define lll_timedlock_elision(a,dummy,b,c) lll_timedlock(a, b, c)
> +#endif
> +
> +#ifndef lll_trylock_elision
> +#define lll_trylock_elision(a,t,u) lll_trylock(a)
> +#endif
> +
> +#ifndef ENABLE_ELISION
> +#define ENABLE_ELISION 0
> +#endif
> +
> +#ifndef FORCE_ELISION
> +#define FORCE_ELISION(m, s)
> +#endif
>
> int
> pthread_mutex_timedlock (mutex, abstime)
> @@ -40,10 +55,12 @@ pthread_mutex_timedlock (mutex, abstime)
> /* We must not check ABSTIME here. If the thread does not block
> abstime must not be checked for a valid value. */
>
> - switch (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex),
> + switch (__builtin_expect (PTHREAD_MUTEX_TYPE_EL (mutex),
> PTHREAD_MUTEX_TIMED_NP))
> {
> /* Recursive mutex. */
> + case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_ELISION_NP:
> + case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_NO_ELISION_NP:
I believe it should help to explicitly remind people that we don't
support elision for recursive locks here, and thus can just ignore those
flags here.
> case PTHREAD_MUTEX_RECURSIVE_NP:
> /* Check whether we already hold the mutex. */
> if (mutex->__data.__owner == id)
> @@ -78,13 +95,37 @@ pthread_mutex_timedlock (mutex, abstime)
> /* FALLTHROUGH */
>
> case PTHREAD_MUTEX_TIMED_NP:
> + FORCE_ELISION (mutex, goto elision);
> + case PTHREAD_MUTEX_TIMED_NO_ELISION_NP:
> simple:
> /* Normal mutex. */
> result = lll_timedlock (mutex->__data.__lock, abstime,
> PTHREAD_MUTEX_PSHARED (mutex));
> break;
>
> + case PTHREAD_MUTEX_TIMED_ELISION_NP:
> + elision: __attribute__((unused))
> + /* Don't record ownership */
> + if (!ENABLE_ELISION)
> + goto simple;
> + return lll_timedlock_elision (mutex->__data.__lock,
> + mutex->__data.__spins,
> + abstime,
> + PTHREAD_MUTEX_PSHARED (mutex));
> +
> +
> + case PTHREAD_MUTEX_ADAPTIVE_ELISION_NP:
> + adaptive_elision: __attribute__((unused))
> + if (ENABLE_ELISION
> + && !lll_trylock_elision (mutex->__data.__lock, mutex->__data.__elision, 0))
Add a FIXME here that refers to the related FIXME comment above.
> + return 0;
> + goto adaptive;
> +
> case PTHREAD_MUTEX_ADAPTIVE_NP:
> + FORCE_ELISION (mutex, goto adaptive_elision);
> +
> + case PTHREAD_MUTEX_ADAPTIVE_NO_ELISION_NP:
> + adaptive:
> if (! __is_smp)
> goto simple;
>
> diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c
> index 8f5279d..4702409 100644
> --- a/nptl/pthread_mutex_trylock.c
> +++ b/nptl/pthread_mutex_trylock.c
> @@ -22,6 +22,20 @@
> #include "pthreadP.h"
> #include <lowlevellock.h>
>
> +#ifndef lll_trylock_elision
> +#define lll_trylock_elision(a,t,u) lll_trylock(a)
> +#endif
> +
> +#ifndef ENABLE_ELISION
> +#define ENABLE_ELISION 0
> +#endif
> +
> +#ifndef DO_ELISION
> +#define DO_ELISION(m) 0
> +#endif
> +
> +/* We don't force elision in trylock, because this can lead to inconsistent
> + lock state if the lock was actually busy. */
>
> int
> __pthread_mutex_trylock (mutex)
> @@ -30,10 +44,12 @@ __pthread_mutex_trylock (mutex)
> int oldval;
> pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
>
> - switch (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex),
> + switch (__builtin_expect (PTHREAD_MUTEX_TYPE_EL (mutex),
> PTHREAD_MUTEX_TIMED_NP))
> {
> /* Recursive mutex. */
> + case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_ELISION_NP:
> + case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_NO_ELISION_NP:
Same as above for timedlock().
> case PTHREAD_MUTEX_RECURSIVE_NP:
> /* Check whether we already hold the mutex. */
> if (mutex->__data.__owner == id)
> @@ -57,10 +73,29 @@ __pthread_mutex_trylock (mutex)
> }
> break;
>
> - case PTHREAD_MUTEX_ERRORCHECK_NP:
> + case PTHREAD_MUTEX_TIMED_ELISION_NP:
> + case PTHREAD_MUTEX_ADAPTIVE_ELISION_NP:
> + elision:
> + if (!ENABLE_ELISION)
> + goto normal;
> + if (lll_trylock_elision (mutex->__data.__lock,
> + mutex->__data.__elision,
> + mutex->__data.__kind
> + & PTHREAD_MUTEX_UPGRADED_ELISION_NP) != 0)
> + break;
> + /* Don't record the ownership. */
> + return 0;
> +
> case PTHREAD_MUTEX_TIMED_NP:
> case PTHREAD_MUTEX_ADAPTIVE_NP:
> - /* Normal mutex. */
Don't remove the comment. Maybe adapt it.
> + if (DO_ELISION (mutex))
> + goto elision;
> + /*FALL THROUGH*/
> + case PTHREAD_MUTEX_ADAPTIVE_NO_ELISION_NP:
> + /*FALL THROUGH*/
> + case PTHREAD_MUTEX_TIMED_NO_ELISION_NP:
> + case PTHREAD_MUTEX_ERRORCHECK_NP:
> + normal:
> if (lll_trylock (mutex->__data.__lock) != 0)
> break;
>
> @@ -378,4 +413,9 @@ __pthread_mutex_trylock (mutex)
>
> return EBUSY;
> }
> +
> +#ifndef __pthread_mutex_trylock
> +#ifndef pthread_mutex_trylock
> strong_alias (__pthread_mutex_trylock, pthread_mutex_trylock)
> +#endif
> +#endif
> diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
> index c0249f7..90b39df 100644
> --- a/nptl/pthread_mutex_unlock.c
> +++ b/nptl/pthread_mutex_unlock.c
> @@ -23,6 +23,14 @@
> #include <lowlevellock.h>
> #include <stap-probe.h>
>
> +#ifndef lll_unlock_elision
> +#define lll_unlock_elision(a,b) ({ lll_unlock (a,b); 0; })
> +#endif
> +
> +#ifndef ENABLE_ELISION
> +#define ENABLE_ELISION 0
> +#endif
> +
> static int
> internal_function
> __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
> @@ -34,8 +42,9 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
> pthread_mutex_t *mutex;
> int decr;
> {
> - int type = PTHREAD_MUTEX_TYPE (mutex);
> - if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0))
> + int type = PTHREAD_MUTEX_TYPE_EL (mutex);
> + if (__builtin_expect (type &
> + ~(PTHREAD_MUTEX_KIND_MASK_NP|PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
> return __pthread_mutex_unlock_full (mutex, decr);
>
> if (__builtin_expect (type, PTHREAD_MUTEX_TIMED_NP)
> @@ -55,6 +64,15 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
>
> return 0;
> }
> + else if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_ELISION_NP, 1)
> + || (type == PTHREAD_MUTEX_ADAPTIVE_ELISION_NP))
> + {
> + if (!ENABLE_ELISION)
> + goto normal;
> + /* Don't reset the owner/users fields for elision */
formatting
> + return lll_unlock_elision (mutex->__data.__lock,
> + PTHREAD_MUTEX_PSHARED (mutex));
needs a comment
> + }
> else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
> {
> /* Recursive mutex. */
> @@ -66,7 +84,9 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
> return 0;
> goto normal;
> }
> - else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
> + type &= ~PTHREAD_MUTEX_ELISION_FLAGS_NP;
> + if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1) ||
> + __builtin_expect (type == PTHREAD_MUTEX_TIMED_NP, 1))
> goto normal;
> else
> {
> diff --git a/nptl/sysdeps/pthread/pthread.h b/nptl/sysdeps/pthread/pthread.h
> index bba5852..ab0e08f 100644
> --- a/nptl/sysdeps/pthread/pthread.h
> +++ b/nptl/sysdeps/pthread/pthread.h
> @@ -48,6 +48,7 @@ enum
>
> PTHREAD_MUTEX_ELISION_NP = 1024,
> PTHREAD_MUTEX_NO_ELISION_NP = 2048,
> + PTHREAD_MUTEX_UPGRADED_ELISION_NP = 4096,
The UPGRADED flag shouldn't be exposed to users, and isn't part of the
ABI. This needs to go somewhere else.
> PTHREAD_MUTEX_PSHARED_NP = 128
>
> #if defined __USE_UNIX98 || defined __USE_XOPEN2K8
> diff --git a/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c b/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
> index b417da5..7b6fbc1 100644
> --- a/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
> +++ b/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
> @@ -2,8 +2,15 @@
>
> #define LLL_MUTEX_LOCK(mutex) \
> lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
> +
> +/* Not actually elided so far. Needed? */
whitespace
> +#define LLL_MUTEX_LOCK_ELISION(mutex) \
> + ({ lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)); 0; })
> +
> #define LLL_MUTEX_TRYLOCK(mutex) \
> lll_cond_trylock ((mutex)->__data.__lock)
> +#define LLL_MUTEX_TRYLOCK_ELISION(mutex) LLL_MUTEX_TRYLOCK(mutex)
> +
> #define LLL_ROBUST_MUTEX_LOCK(mutex, id) \
> lll_robust_cond_lock ((mutex)->__data.__lock, id, \
> PTHREAD_ROBUST_MUTEX_PSHARED (mutex))
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> index ccd896c..1852e07 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> @@ -101,14 +101,23 @@ typedef union
> binary compatibility. */
> int __kind;
> #ifdef __x86_64__
> - int __spins;
> + short __spins;
> + short __elision;
> __pthread_list_t __list;
> # define __PTHREAD_MUTEX_HAVE_PREV 1
> +# define __PTHREAD_MUTEX_HAVE_ELISION 1
> #else
> unsigned int __nusers;
> __extension__ union
> {
> - int __spins;
> + struct
> + {
> + short __espins;
> + short __elision;
> +# define __spins d.__espins
> +# define __elision d.__elision
Those macros are awful and error-prone. Is there no other way to do
this that doesn't pollute the namespace like that?
> +# define __PTHREAD_MUTEX_HAVE_ELISION 2
> + } d;
> __pthread_slist_t __list;
> };
> #endif
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h b/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h
> new file mode 100644
> index 0000000..c08bded
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h
> @@ -0,0 +1,34 @@
> +/* force-elision.h: Automatic enabling of elision for mutexes
> + Copyright (C) 2013 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* Check for elision on this lock without upgrading */
Whitespace.
And this really needs more extensive documentation. Ideally, there
should be another place where you discuss how the "upgrade" works, which
you could reference here.
> +#define DO_ELISION(m) \
> + (__pthread_force_elision \
> + && (m->__data.__kind & PTHREAD_MUTEX_NO_ELISION_NP) == 0 \
> + && __is_smp) \
> +
> +/* Automatically enable elision for existing user lock kinds */
Same here. You also need to point out that this here preserves the
POSIX semantics, despite the "force" in the name, unlike having just
DO_ELISION. IMHO, lots of the complexity here exists because you have
the "upgrade" backwards (see my reply to the whole patch set).
> +#define FORCE_ELISION(m, s) \
> + if (__pthread_force_elision \
> + && (m->__data.__kind & PTHREAD_MUTEX_ELISION_FLAGS_NP) == 0 \
> + && __is_smp) \
> + { \
> + mutex->__data.__kind |= PTHREAD_MUTEX_ELISION_NP \
> + | PTHREAD_MUTEX_UPGRADED_ELISION_NP; \
> + s; \
> + }
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c
> new file mode 100644
> index 0000000..4468d16
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c
copyright header
> @@ -0,0 +1,5 @@
> +/* The cond lock is not actually elided yet, but we still need to handle
> + already elided locks and have a working ENABLE_ELISION. */
whitespace
> +#include <elision-conf.h>
> +#define ENABLE_ELISION (__elision_available != 0)
> +#include "sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c"
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c
> new file mode 100644
> index 0000000..e08b5f1
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c
> @@ -0,0 +1,23 @@
> +/* Elided version of pthread_mutex_lock.
> + Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +#include <elision-conf.h>
> +#include "force-elision.h"
> +#include "init-arch.h"
> +
> +#define ENABLE_ELISION (__elision_available != 0)
> +#include "nptl/pthread_mutex_lock.c"
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c
> new file mode 100644
> index 0000000..6cb79f7
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c
> @@ -0,0 +1,23 @@
> +/* Elided version of pthread_mutex_timedlock.
> + Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +#include <elision-conf.h>
> +#include "force-elision.h"
> +#include "init-arch.h"
> +
> +#define ENABLE_ELISION (__elision_available != 0)
> +#include "nptl/pthread_mutex_timedlock.c"
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c
> new file mode 100644
> index 0000000..d07a93c
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c
> @@ -0,0 +1,23 @@
> +/* Elided version of pthread_mutex_trylock.
> + Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +#include <elision-conf.h>
> +#include "force-elision.h"
> +#include "init-arch.h"
> +
> +#define ENABLE_ELISION (__elision_available != 0)
> +#include "nptl/pthread_mutex_trylock.c"
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_unlock.c b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_unlock.c
> new file mode 100644
> index 0000000..3b29b28
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_unlock.c
copyright header
> @@ -0,0 +1,2 @@
> +#define ENABLE_ELISION 1
> +#include "nptl/pthread_mutex_unlock.c"
Add a comment why you can set ENABLE_ELISION unconditionally to 1 here.
For example:
We do not need to check whether elision is available because we do this
implicitly in the unlock function when we check whether the lock is not
currently acquired; because callers must unlock only locks they have
acquired, we can use this to check whether we used elision in this
critical section.