This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
SH atomic asms in glibc and the stack pointer
- From: "Joseph S. Myers" <joseph at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org, libc-alpha at sourceware dot org, aoliva at redhat dot com, kkojima at gcc dot gnu dot org
- Date: Tue, 29 Nov 2011 17:10:58 +0000 (UTC)
- Subject: SH atomic asms in glibc and the stack pointer
I've observed failures on SH of the glibc test csu/tst-atomic where the
problem is that
#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
({ __typeof (*(mem)) __result; \
__asm __volatile ("\
.align 2\n\
mova 1f,r0\n\
nop\n\
mov r15,r1\n\
mov #-8,r15\n\
0: mov.l @%1,%0\n\
cmp/eq %0,%3\n\
bf 1f\n\
mov.l %2,@%1\n\
1: mov r1,r15"\
: "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
: "r0", "r1", "t", "memory"); \
__result; })
allocates the "r" (mem) input to the stack pointer r15. That is, the
object on which the atomic operation is being carried out happens to be
exactly where the stack pointer points, so GCC saves a register by using
r15 as the input. Because this input is used during the period when r15
has its special negative value (minus the length of the restartable code
section, so the kernel can restart this code sequence if the thread is
scheduled during it), this obviously doesn't work.
This asm doesn't tell the compiler that r15 is temporarily saved and
restored, and marking it as clobbering the stack pointer is unlikely to
work. It might be possible to avoid problems by marking all the inputs
that are used while the stack pointer has its special value as being
read/write operands, so they do not get assigned to the stack pointer, or
by clobbering additional fixed registers that are used to load the input
values at the start of the asm. But both of these seem inefficient
(requiring extra registers to be allocated) when what's actually required
is simply to allocate the inputs to any register other than r15. So in my
view the best fix is to add a new constraint that means "any
general-purpose register other than r15" and to use it in glibc when the
compiler version is new enough. That is what these GCC and glibc patches
do. Comments? OK?
Note: I have not been able to test these with GCC 4.7 as building glibc at
all with 4.7 (for any target) is currently broken; it has been tested with
4.6-based sources (with the __GNUC_PREREQ test appropriately adjusted).
The problem with 4.7 was
<http://gcc.gnu.org/bugzilla/show_bug.cgi?id=51117>, moved to
<http://sourceware.org/bugzilla/show_bug.cgi?id=13411> but I think it's
really a GCC bug (that it might be possible to work around in glibc), not
a glibc one; clobbers of local variables in C do not require any code to
be executed on stack unwinding and so however they are represented
internally in GCC should not result in exception regions in the final
output EH tables.
2011-11-29 Joseph Myers <joseph@codesourcery.com>
* config/sh/sh.h (enum reg_class, REG_CLASS_NAMES)
(REG_CLASS_CONTENTS): Add NON_SP_REGS.
(REGCLASS_HAS_GENERAL_REG): Handle NON_SP_REGS.
* config/sh/constraints.md (u): New constraint.
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h (revision 181804)
+++ gcc/config/sh/sh.h (working copy)
@@ -1059,6 +1059,7 @@ enum reg_class
MAC_REGS,
FPUL_REGS,
SIBCALL_REGS,
+ NON_SP_REGS,
GENERAL_REGS,
FP0_REGS,
FP_REGS,
@@ -1084,6 +1085,7 @@ enum reg_class
"MAC_REGS", \
"FPUL_REGS", \
"SIBCALL_REGS", \
+ "NON_SP_REGS", \
"GENERAL_REGS", \
"FP0_REGS", \
"FP_REGS", \
@@ -1116,6 +1118,8 @@ enum reg_class
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00400000 }, \
/* SIBCALL_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \
+/* NON_SP_REGS: */ \
+ { 0xffff7fff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 }, \
/* GENERAL_REGS: */ \
{ 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 }, \
/* FP0_REGS: */ \
@@ -2072,7 +2076,7 @@ struct sh_args {
register information here is not used for SFmode. */
#define REGCLASS_HAS_GENERAL_REG(CLASS) \
- ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS \
+ ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS || (CLASS) == NON_SP_REGS \
|| (! TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS))
#define REGCLASS_HAS_FP_REG(CLASS) \
Index: gcc/config/sh/constraints.md
===================================================================
--- gcc/config/sh/constraints.md (revision 181804)
+++ gcc/config/sh/constraints.md (working copy)
@@ -75,6 +75,9 @@
(define_register_constraint "t" "T_REGS"
"T register.")
+(define_register_constraint "u" "NON_SP_REGS"
+ "Non-stack-pointer register.")
+
(define_register_constraint "w" "FP0_REGS"
"Floating-point register 0.")
2011-11-29 Joseph Myers <joseph@codesourcery.com>
* sysdeps/unix/sysv/linux/sh/bits/atomic.h (rNOSP): Define
conditional on GCC version.
(__arch_compare_and_exchange_val_8_acq)
(__arch_compare_and_exchange_val_16_acq)
(__arch_compare_and_exchange_val_32_acq, atomic_exchange_and_add)
(atomic_add, atomic_add_negative, atomic_add_zero, atomic_bit_set)
(atomic_bit_test_set): Use rNOSP instead of "r" constraints.
diff --git a/sysdeps/unix/sysv/linux/sh/bits/atomic.h b/sysdeps/unix/sysv/linux/sh/bits/atomic.h
index a0e5918..d1928af 100644
--- a/sysdeps/unix/sysv/linux/sh/bits/atomic.h
+++ b/sysdeps/unix/sysv/linux/sh/bits/atomic.h
@@ -1,5 +1,5 @@
/* Atomic operations used inside libc. Linux/SH version.
- Copyright (C) 2003 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -65,6 +65,12 @@ typedef uintmax_t uatomic_max_t;
r1: saved stack pointer
*/
+#if __GNUC_PREREQ (4, 7)
+# define rNOSP "u"
+#else
+# define rNOSP "r"
+#endif
+
#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
({ __typeof (*(mem)) __result; \
__asm __volatile ("\
@@ -78,7 +84,7 @@ typedef uintmax_t uatomic_max_t;
bf 1f\n\
mov.b %2,@%1\n\
1: mov r1,r15"\
- : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
+ : "=&r" (__result) : rNOSP (mem), rNOSP (newval), rNOSP (oldval) \
: "r0", "r1", "t", "memory"); \
__result; })
@@ -95,7 +101,7 @@ typedef uintmax_t uatomic_max_t;
bf 1f\n\
mov.w %2,@%1\n\
1: mov r1,r15"\
- : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
+ : "=&r" (__result) : rNOSP (mem), rNOSP (newval), rNOSP (oldval) \
: "r0", "r1", "t", "memory"); \
__result; })
@@ -112,7 +118,7 @@ typedef uintmax_t uatomic_max_t;
bf 1f\n\
mov.l %2,@%1\n\
1: mov r1,r15"\
- : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
+ : "=&r" (__result) : rNOSP (mem), rNOSP (newval), rNOSP (oldval) \
: "r0", "r1", "t", "memory"); \
__result; })
@@ -136,7 +142,7 @@ typedef uintmax_t uatomic_max_t;
add %0,%1\n\
mov.b %1,@%2\n\
1: mov r1,r15"\
- : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=&r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -148,7 +154,7 @@ typedef uintmax_t uatomic_max_t;
add %0,%1\n\
mov.w %1,@%2\n\
1: mov r1,r15"\
- : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=&r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -160,7 +166,7 @@ typedef uintmax_t uatomic_max_t;
add %0,%1\n\
mov.l %1,@%2\n\
1: mov r1,r15"\
- : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=&r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "memory"); \
else \
{ \
@@ -185,7 +191,7 @@ typedef uintmax_t uatomic_max_t;
add r2,%0\n\
mov.b %0,@%1\n\
1: mov r1,r15"\
- : "=&r" (__tmp) : "r" (mem), "0" (__value) \
+ : "=&r" (__tmp) : rNOSP (mem), "0" (__value) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -197,7 +203,7 @@ typedef uintmax_t uatomic_max_t;
add r2,%0\n\
mov.w %0,@%1\n\
1: mov r1,r15"\
- : "=&r" (__tmp) : "r" (mem), "0" (__value) \
+ : "=&r" (__tmp) : rNOSP (mem), "0" (__value) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -209,7 +215,7 @@ typedef uintmax_t uatomic_max_t;
add r2,%0\n\
mov.l %0,@%1\n\
1: mov r1,r15"\
- : "=&r" (__tmp) : "r" (mem), "0" (__value) \
+ : "=&r" (__tmp) : rNOSP (mem), "0" (__value) \
: "r0", "r1", "r2", "memory"); \
else \
{ \
@@ -238,7 +244,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
shal %1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -252,7 +258,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
shal %1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -266,7 +272,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
shal %1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else \
abort (); \
@@ -287,7 +293,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
tst %1,%1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -301,7 +307,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
tst %1,%1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -315,7 +321,7 @@ typedef uintmax_t uatomic_max_t;
1: mov r1,r15\n\
tst %1,%1\n\
movt %0"\
- : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
+ : "=r" (__result), "=&r" (__tmp) : rNOSP (mem), "1" (__value) \
: "r0", "r1", "r2", "t", "memory"); \
else \
abort (); \
@@ -336,7 +342,7 @@ typedef uintmax_t uatomic_max_t;
or %1,r2\n\
mov.b r2,@%0\n\
1: mov r1,r15"\
- : : "r" (mem), "r" (__mask) \
+ : : rNOSP (mem), rNOSP (__mask) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -348,7 +354,7 @@ typedef uintmax_t uatomic_max_t;
or %1,r2\n\
mov.w r2,@%0\n\
1: mov r1,r15"\
- : : "r" (mem), "r" (__mask) \
+ : : rNOSP (mem), rNOSP (__mask) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -360,7 +366,7 @@ typedef uintmax_t uatomic_max_t;
or %1,r2\n\
mov.l r2,@%0\n\
1: mov r1,r15"\
- : : "r" (mem), "r" (__mask) \
+ : : rNOSP (mem), rNOSP (__mask) \
: "r0", "r1", "r2", "memory"); \
else \
abort (); \
@@ -382,7 +388,7 @@ typedef uintmax_t uatomic_max_t;
mov.b %1,@%2\n\
1: mov r1,r15"\
: "=&r" (__result), "=&r" (__mask) \
- : "r" (mem), "0" (__result), "1" (__mask) \
+ : rNOSP (mem), "0" (__result), "1" (__mask) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 2) \
__asm __volatile ("\
@@ -397,7 +403,7 @@ typedef uintmax_t uatomic_max_t;
mov.w %1,@%2\n\
1: mov r1,r15"\
: "=&r" (__result), "=&r" (__mask) \
- : "r" (mem), "0" (__result), "1" (__mask) \
+ : rNOSP (mem), "0" (__result), "1" (__mask) \
: "r0", "r1", "r2", "memory"); \
else if (sizeof (*(mem)) == 4) \
__asm __volatile ("\
@@ -412,7 +418,7 @@ typedef uintmax_t uatomic_max_t;
mov.l %1,@%2\n\
1: mov r1,r15"\
: "=&r" (__result), "=&r" (__mask) \
- : "r" (mem), "0" (__result), "1" (__mask) \
+ : rNOSP (mem), "0" (__result), "1" (__mask) \
: "r0", "r1", "r2", "memory"); \
else \
abort (); \
--
Joseph S. Myers
joseph@codesourcery.com