This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
Re: malloc() and spinlocks
- From: Wolfram Gloger <Wolfram dot Gloger at dent dot med dot uni-muenchen dot de>
- To: l dot lunak at suse dot cz
- Cc: libc-alpha at sources dot redhat dot com
- Date: Tue, 26 Nov 2002 14:45:23 +0100 ("MET)
- Subject: Re: malloc() and spinlocks
- References: <200211251733.14550.l.lunak@suse.cz>
Hi,
> It looks like the newer malloc() version is in, which results in slightly
> better performance than glibc-2.2. I even see the spinlock code in
> glibc/malloc/thread-m.h , but the placement of that code is somewhat ...
> unfortunate. If you look carefully, you'll notice it's placed in #else part
> of #if defined(_LIBC).
Yes, sorry, this was an unfortunate oversight. I usually test malloc
performance outside of glibc and therefore haven't noticed this. :-(
I already had something like the patch below in my tree since January,
but it somehow got lost. Need to do a full glibc build with this, but
if it compiles, it should be very stable, since I've tested the
spinlocks outside glibc for a _long_ time.
Regards,
Wolfram.
2002-11-26 Wolfram Gloger <wg@malloc.de>
* malloc/thread-m.h: Activate inline spinlocks for i386 and
x86_84.
--- malloc/thread-m.h Sun Apr 7 21:40:08 2002
+++ malloc/thread-m.h Tue Nov 26 14:34:09 2002
@@ -28,6 +28,68 @@
#define _THREAD_M_H
#undef thread_atfork_static
+#undef HAVE_MUTEX
+
+/* Use fast inline spinlocks with gcc. */
+#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \
+ !defined USE_NO_SPINLOCKS
+
+#include <time.h>
+#include <sched.h>
+
+typedef struct {
+ volatile unsigned int lock;
+ int pad0_;
+} mutex_t;
+
+#define MUTEX_INITIALIZER { 0 }
+#define mutex_init(m) ((m)->lock = 0)
+static inline int mutex_lock(mutex_t *m) {
+ int cnt = 0, r;
+ struct timespec tm;
+
+ for(;;) {
+ __asm__ __volatile__
+ ("xchgl %0, %1"
+ : "=r"(r), "=m"(m->lock)
+ : "0"(1), "m"(m->lock)
+ : "memory");
+ if(!r)
+ return 0;
+ if(cnt < 50) {
+#ifdef _LIBC
+ __sched_yield();
+#else
+ sched_yield();
+#endif
+ cnt++;
+ } else {
+ tm.tv_sec = 0;
+ tm.tv_nsec = 2000001;
+ nanosleep(&tm, NULL);
+ cnt = 0;
+ }
+ }
+}
+static inline int mutex_trylock(mutex_t *m) {
+ int r;
+
+ __asm__ __volatile__
+ ("xchgl %0, %1"
+ : "=r"(r), "=m"(m->lock)
+ : "0"(1), "m"(m->lock)
+ : "memory");
+ return r;
+}
+static inline int mutex_unlock(mutex_t *m) {
+ m->lock = 0;
+ __asm __volatile ("" : "=m" (m->lock) : "0" (m->lock));
+ return 0;
+}
+
+#define HAVE_MUTEX
+
+#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */
#if defined(_LIBC) /* The GNU C library, a special case of Posix threads */
@@ -38,6 +100,7 @@
typedef pthread_t thread_id;
/* mutex */
+#ifndef HAVE_MUTEX
typedef pthread_mutex_t mutex_t;
#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
@@ -58,6 +121,7 @@
#define mutex_unlock(m) \
(__pthread_mutex_unlock != NULL \
? __pthread_mutex_unlock (m) : (*(int*)(m) = 0))
+#endif /* HAVE_MUTEX */
#define thread_atfork(prepare, parent, child) \
(__pthread_atfork != NULL ? __pthread_atfork(prepare, parent, child) : 0)
@@ -65,6 +129,7 @@
#elif defined(MUTEX_INITIALIZER)
/* Assume hurd, with cthreads */
+#ifndef HAVE_MUTEX
/* Cthreads `mutex_t' is a pointer to a mutex, and malloc wants just the
mutex itself. */
#undef mutex_t
@@ -80,6 +145,7 @@
#define mutex_unlock(m) (__mutex_unlock(m), 0)
#define mutex_trylock(m) (!__mutex_trylock(m))
+#endif /* HAVE_MUTEX */
#define thread_atfork(prepare, parent, child) do {} while(0)
#define thread_atfork_static(prepare, parent, child) \
@@ -117,59 +183,7 @@
typedef pthread_t thread_id;
/* mutex */
-#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \
- !defined USE_NO_SPINLOCKS
-
-#include <time.h>
-
-/* Use fast inline spinlocks. */
-typedef struct {
- volatile unsigned int lock;
- int pad0_;
-} mutex_t;
-
-#define MUTEX_INITIALIZER { 0 }
-#define mutex_init(m) ((m)->lock = 0)
-static inline int mutex_lock(mutex_t *m) {
- int cnt = 0, r;
- struct timespec tm;
-
- for(;;) {
- __asm__ __volatile__
- ("xchgl %0, %1"
- : "=r"(r), "=m"(m->lock)
- : "0"(1), "m"(m->lock)
- : "memory");
- if(!r)
- return 0;
- if(cnt < 50) {
- sched_yield();
- cnt++;
- } else {
- tm.tv_sec = 0;
- tm.tv_nsec = 2000001;
- nanosleep(&tm, NULL);
- cnt = 0;
- }
- }
-}
-static inline int mutex_trylock(mutex_t *m) {
- int r;
-
- __asm__ __volatile__
- ("xchgl %0, %1"
- : "=r"(r), "=m"(m->lock)
- : "0"(1), "m"(m->lock)
- : "memory");
- return r;
-}
-static inline int mutex_unlock(mutex_t *m) {
- m->lock = 0;
- __asm __volatile ("" : "=m" (m->lock) : "0" (m->lock));
- return 0;
-}
-
-#else
+#ifndef HAVE_MUTEX
/* Normal pthread mutex. */
typedef pthread_mutex_t mutex_t;
@@ -180,7 +194,7 @@
#define mutex_trylock(m) pthread_mutex_trylock(m)
#define mutex_unlock(m) pthread_mutex_unlock(m)
-#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */
+#endif /* HAVE_MUTEX */
/* thread specific data */
#if defined(__sgi) || defined(USE_TSD_DATA_HACK)
@@ -272,6 +286,7 @@
typedef int thread_id;
+#ifndef HAVE_MUTEX
/* The mutex functions used to do absolutely nothing, i.e. lock,
trylock and unlock would always just return 0. However, even
without any concurrently active threads, a mutex can be used
@@ -285,6 +300,7 @@
#define mutex_lock(m) ((*(m) = 1), 0)
#define mutex_trylock(m) (*(m) ? 1 : ((*(m) = 1), 0))
#define mutex_unlock(m) (*(m) = 0)
+#endif /* HAVE_MUTEX */
typedef void *tsd_key_t;
#define tsd_key_create(key, destr) do {} while(0)