This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][BZ #6816][3/5] VSX implementation for *context routines (powerpc32)



VSX implementation for getcontext, setcontext and swapcontext (powerpc32).


--
Carlos Eduardo Seo
Software Engineer
IBM Linux Technology Center
2008-07-31  Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
	    Steven Munroe  <sjmunroe@us.ibm.com>

	[BZ #6816]
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S:
	Added a new scheme necessary for handling VSX registers, in 
	addition to the existing one.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S:
	Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S:
	Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Added
	new ucontext size and symbol versioning code for GLIBC 2.9.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Likewise.
	  
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S	2008-07-31 16:33:02.000000000 -0500
@@ -24,6 +24,7 @@
    and if appropriate:
      __CONTEXT_ENABLE_FPRS
      __CONTEXT_ENABLE_VRS
+     __CONTEXT_ENABLE_VSRS
    Any archecture that implements the Vector unit is assumed to also 
    implement the floating unit.  */
 
@@ -105,7 +106,12 @@
 	stw	r0,_UC_GREGS+(PT_MSR*4)(r3)
 	stw	r0,_UC_GREGS+(PT_MQ*4)(r3)
 
-#ifdef __CONTEXT_ENABLE_FPRS
+/* For VSR registers, we need a new scheme for saving the
+   registers in order to perform less store operations. If the
+   processor does not have VSR, use the old scheme.  */
+#ifndef __CONTEXT_ENABLE_VSRS
+ /* Old scheme for storing FP and VR registers.  */
+# ifdef __CONTEXT_ENABLE_FPRS
 /* Save the floating-point registers */
 	stfd	fp0,_UC_FREGS+(0*8)(r3)
 	stfd	fp1,_UC_FREGS+(1*8)(r3)
@@ -142,37 +148,37 @@
 	stfd	fp31,_UC_FREGS+(31*8)(r3)
 	stfd	fp0,_UC_FREGS+(32*8)(r3)
 
-# ifdef __CONTEXT_ENABLE_VRS
-#  ifdef PIC
+#  ifdef __CONTEXT_ENABLE_VRS
+#   ifdef PIC
 	mflr    r8
-#   ifdef HAVE_ASM_PPC_REL16
+#    ifdef HAVE_ASM_PPC_REL16
 	bcl	20,31,1f
 1:	mflr	r7
 	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
 	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-#   else
+#    else
 	bl      _GLOBAL_OFFSET_TABLE_@local-4
 	mflr    r7
-#   endif
-#   ifdef SHARED
+#    endif
+#    ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
 	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-#   else
+#    else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
 	lwz     r7,0(r7)
-#   endif
-#  else
+#    endif
+#   else
 	lis	r7,_dl_hwcap@ha
 	lwz     r7,_dl_hwcap@l(r7)
-#  endif
+#   endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 
 	la	r10,(_UC_VREGS)(r3)
 	la	r9,(_UC_VREGS+16)(r3)
 	
-	beq	2f	/* L(no_vec) */
+	beq	6f	/* L(no_vec) */
 /* address of the combined VSCR/VSAVE quadword.  */	
 	la	r8,(_UC_VREGS+512)(r3)
 
@@ -264,9 +270,261 @@
 
  	stw	r0,0(r8)
 
-2: /* L(no_vec): */
+6: /* L(no_vec): */
+#  endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for saving the VSR registers. We need to save
+    the last 32 VSR registers first.*/
+# ifdef PIC
+	mflr    r8
+#  ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,3f
+3:	mflr	r7
+	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-3b@ha
+	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-3b@l
+#  else
+	bl      _GLOBAL_OFFSET_TABLE_@local-4
+	mflr    r7
+#  endif
+#  ifdef SHARED
+	lwz     r7,_rtld_global_ro@got(r7)
+	mtlr    r8
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+#  else
+	lwz     r7,_dl_hwcap@got(r7)
+	mtlr    r8
+	lwz     r7,0(r7)
+#  endif
+# else
+	lis	r7,_dl_hwcap@ha
+	lwz     r7,_dl_hwcap@l(r7)
 # endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+   has VSX registers also has Altivec capability.  */
+	andi.	r7,r7,PPC_FEATURE_HAS_VSX
+	la      r6,(_UC_VSREGS)(r3)
+	beq     5f /* L(no_vs)  */
+
+	la	r10,(_UC_VREGS)(r3)
+	la	r9,(_UC_VREGS+16)(r3)
+/* address of the combined VSCR/VSAVE quadword.  */
+	la	r8,(_UC_VREGS+512)(r3)
+
+/* Save the vector registers */
+	stvx  v0,0,r10
+	stvx  v1,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+/* We need to get the Vector Status and Control Register early to avoid
+   store order problems later with the VSAVE register that shares the
+   same quadword.  */
+	mfvscr	v0
+
+	stvx  v2,0,r10
+	stvx  v3,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx	v0,0,r8
+
+	stvx  v4,0,r10
+	stvx  v5,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v6,0,r10
+	stvx  v7,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v8,0,r10
+	stvx  v9,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v10,0,r10
+	stvx  v11,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v12,0,r10
+	stvx  v13,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v14,0,r10
+	stvx  v15,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v16,0,r10
+	stvx  v17,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v18,0,r10
+	stvx  v19,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v20,0,r10
+	stvx  v21,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v22,0,r10
+	stvx  v23,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v24,0,r10
+	stvx  v25,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v26,0,r10
+	stvx  v27,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v28,0,r10
+	stvx  v29,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	mfvscr	v0
+	stvx  v30,0,r10
+	stvx  v31,0,r9
+	stw	r0,0(r8)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7".  */
+
+ /* Proceeding to the FP registers and the doubleword 1
+    of the first 32 VSR registers.  */
+	la	r7,(_UC_FREGS)(r3)
+	la	r6,(_UC_VSREGS)(r3)
+	/* Save fp0 and fp1 into vs32.  */
+	xxmrghd vs32,vs0,vs1
+	/* Save vs0[1] and vs1[1] into vs33.  */
+	xxmrgld vs33,vs0,vs1
+	/* Save f0 and f1.  */
+	stxvd2x  vs32,0,r7
+	/* Save vs0[1] and vs1[1].  */
+	stxvd2x  vs33,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs34,vs2,vs3
+	xxmrgld vs35,vs2,vs3
+	stxvd2x  vs34,0,r7
+	stxvd2x  vs35,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs36,vs4,vs5
+	xxmrgld vs37,vs4,vs5
+	stxvd2x  vs36,0,r7
+	stxvd2x  vs37,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs38,vs6,vs7
+	xxmrgld vs39,vs6,vs7
+	stxvd2x  vs38,0,r7
+	stxvd2x  vs39,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs40,vs8,vs9
+	xxmrgld vs41,vs8,vs9
+	stxvd2x  vs40,0,r7
+	stxvd2x  vs41,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs42,vs10,vs11
+	xxmrgld vs43,vs10,vs11
+	stxvd2x  vs42,0,r7
+	stxvd2x  vs43,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs44,vs12,vs13
+	xxmrgld vs45,vs12,vs13
+	stxvd2x  vs44,0,r7
+	stxvd2x  vs45,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs46,vs14,vs15
+	xxmrgld vs47,vs14,vs15
+	stxvd2x  vs46,0,r7
+	stxvd2x  vs47,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs48,vs16,vs17
+	xxmrgld vs49,vs16,vs17
+	stxvd2x  vs48,0,r7
+	stxvd2x  vs49,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs50,vs18,vs19
+	xxmrgld vs51,vs18,vs19
+	stxvd2x  vs50,0,r7
+	stxvd2x  vs51,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs52,vs20,vs21
+	xxmrgld vs53,vs20,vs21
+	stxvd2x  vs52,0,r7
+	stxvd2x  vs53,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs54,vs22,vs23
+	xxmrgld vs55,vs22,vs23
+	stxvd2x  vs54,0,r7
+	stxvd2x  vs55,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs56,vs24,vs25
+	xxmrgld vs57,vs24,vs25
+	stxvd2x  vs56,0,r7
+	stxvd2x  vs57,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs58,vs26,vs27
+	xxmrgld vs59,vs26,vs27
+	stxvd2x  vs58,0,r7
+	stxvd2x  vs59,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs60,vs28,vs29
+	xxmrgld vs61,vs28,vs29
+	stxvd2x  vs60,0,r7
+	stxvd2x  vs61,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs62,vs30,vs31
+	xxmrgld vs63,vs30,vs31
+	stxvd2x  vs62,0,r7
+	stxvd2x  vs63,0,r6
+
+#else
+# warning "Binutils does not support VSX instructions."
 #endif
+5:/*L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS */
+
 /* We need to set up parms and call sigprocmask which will clobber
    volatile registers. So before the call we need to retrieve the
    original ucontext ptr (parm1) from stack and store the UC_REGS_PTR
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S	2008-07-31 16:30:56.000000000 -0500
@@ -29,15 +29,17 @@
 #define __CONTEXT_FUNC_NAME __getcontext
 #define __CONTEXT_ENABLE_FPRS 1
 #define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
 
 /* Size of ucontext in GLIBC_2.3.4 and later.  */
 #define _UC_SIZE_2_3_4	1184
+#define _UC_SIZE_2_9	1440
 
 #ifdef __ASSUME_SWAPCONTEXT_SYSCALL
 	.section ".text";
 ENTRY (__getcontext)
 	li	r4,0
-	li	r5,_UC_SIZE_2_3_4;
+	li	r5,_UC_SIZE_2_9;
 	DO_CALL (SYS_ify (swapcontext));
 	bso-	cr0,1f
 /* the kernel does not set the return code for the success case */
@@ -50,16 +52,42 @@
 # include "getcontext-common.S"
 #endif
 
-versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+	compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_getcontext)
+	li	r4,0
+	li	r5,_UC_SIZE_2_3_4;
+	DO_CALL (SYS_ify (swapcontext));
+	bso-	cr0,2f
+/* the kernel does not set the return code for the success case */
+	li	r3,0
+	blr
+2:
+	b	__syscall_error@local
+END (__novsx_getcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_getcontext
+# include "getcontext-common.S"
+
+	.previous
+#endif
+compat_symbol (libc, __novsx_getcontext, getcontext, GLIBC_2_3_4)
+#endif
 
 #if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
 	compat_text_section
 	
-# undef __CONTEXT_FUNC_NAME	
+# undef __CONTEXT_FUNC_NAME
 # define __CONTEXT_FUNC_NAME __novec_getcontext
+# undef __CONTEXT_ENABLE_VSRS
 # undef __CONTEXT_ENABLE_VRS
 
-# clude "getcontext-common.S"
+# include "getcontext-common.S"
 
 	.previous
 
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S	2008-07-31 16:33:44.000000000 -0500
@@ -24,6 +24,7 @@
    and if appropriate:
      __CONTEXT_ENABLE_FPRS
      __CONTEXT_ENABLE_VRS
+     __CONTEXT_ENABLE_VSRS
    Any archecture that implements the Vector unit is assumed to also 
    implement the floating unit.  */
 
@@ -71,32 +72,36 @@
 	cmpwi	r3,0
 	bne	3f	/* L(error_exit) */
 
-#ifdef __CONTEXT_ENABLE_FPRS
-# ifdef __CONTEXT_ENABLE_VRS
-#  ifdef PIC
+/* For VSR registers, we need a new scheme for restoring the
+   registers in order to perform less load operations. If the
+   processor does not have VSR, use the old scheme.  */
+#ifndef __CONTEXT_ENABLE_VSRS
+# ifdef __CONTEXT_ENABLE_FPRS
+#  ifdef __CONTEXT_ENABLE_VRS
+#   ifdef PIC
 	mflr    r8
-#   ifdef HAVE_ASM_PPC_REL16
+#    ifdef HAVE_ASM_PPC_REL16
 	bcl	20,31,1f
 1:	mflr	r7
 	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
 	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-#   else
+#    else
 	bl      _GLOBAL_OFFSET_TABLE_@local-4
 	mflr    r7
-#   endif
-#   ifdef SHARED
+#    endif
+#    ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
 	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-#   else
+#    else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
 	lwz     r7,0(r7)
-#   endif
-#  else
+#    endif
+#   else
 	lis	r7,_dl_hwcap@ha
 	lwz     r7,_dl_hwcap@l(r7)
-#  endif
+#   endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	la	r10,(_UC_VREGS)(r31)
 	beq	2f	/* L(has_no_vec) */
@@ -195,7 +200,7 @@
 	lvx   v11,0,r9
 
 2: /* L(has_no_vec): */
-# endif /* __CONTEXT_ENABLE_VRS */
+#  endif /* __CONTEXT_ENABLE_VRS */
 	/* Restore the floating-point registers */
 	lfd	fp31,_UC_FREGS+(32*8)(r31)
 	lfd	fp0,_UC_FREGS+(0*8)(r31)
@@ -231,7 +236,259 @@
 	lfd	fp29,_UC_FREGS+(29*8)(r31)
 	lfd	fp30,_UC_FREGS+(30*8)(r31)
 	lfd	fp31,_UC_FREGS+(31*8)(r31)
-#endif /* __CONTEXT_ENABLE_FPRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for restoring the VSR registers. We need to load
+    the last 32 VSR registers first.*/
+# ifdef PIC
+	mflr    r8
+#  ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,5f
+5:	mflr	r7
+	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-5b@ha
+	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-5b@l
+#  else
+	bl      _GLOBAL_OFFSET_TABLE_@local-4
+	mflr    r7
+#  endif
+	mtlr    r8
+#  ifdef SHARED
+	lwz     r7,_rtld_global_ro@got(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+#  else
+	lwz     r7,_dl_hwcap@got(r7)
+	lwz     r7,0(r7)
+#  endif
+# else
+	lis	r7,_dl_hwcap@ha
+	lwz     r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+   has VSX registers also has Altivec capability.  */
+	andi.	r7,r7,PPC_FEATURE_HAS_VSX
+	la      r6,(_UC_VSREGS)(r31)
+	beq     6f /* L(no_vs) */
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7".  */
+
+/* Using VMX registers as temps to minimize the number of loads for
+   restoring the FP and the doubleword 1 of VSR[0-31].  */
+	la	r7,(_UC_FREGS)(r31)
+	la	r6,(_UC_VSREGS)(r31)
+	/* Load f0 and f1 register state into vs32.  */
+	lxvd2x  vs32,0,r7
+	/* Load vs0[1] and vs1[1] register state into vs33.  */
+	lxvd2x   vs33,0,r6
+	/* Merge f0 and vs0[1] register state into vs0.  */
+	xxmrghd vs0,vs32,vs33
+	/* Merge f1 and vs1[1] register state into vs1.  */
+	xxmrgld vs1,vs32,vs33
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs34,0,r7
+	lxvd2x  vs35,0,r6
+	xxmrghd vs2,vs34,vs35
+	xxmrghd vs3,vs34,vs35
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs36,0,r7
+	lxvd2x  vs37,0,r6
+	xxmrghd vs4,vs36,vs37
+	xxmrghd vs5,vs36,vs37
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs38,0,r7
+	lxvd2x  vs39,0,r6
+	xxmrghd vs6,vs38,vs39
+	xxmrghd vs7,vs38,vs39
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs40,0,r7
+	lxvd2x  vs41,0,r6
+	xxmrghd vs8,vs40,vs41
+	xxmrghd vs9,vs40,vs41
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs42,0,r7
+	lxvd2x  vs43,0,r6
+	xxmrghd vs10,vs42,vs43
+	xxmrghd vs11,vs42,vs43
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs44,0,r7
+	lxvd2x  vs45,0,r6
+	xxmrghd vs12,vs44,vs45
+	xxmrghd vs13,vs44,vs45
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs46,0,r7
+	lxvd2x  vs47,0,r6
+	xxmrghd vs14,vs46,vs47
+	xxmrghd vs15,vs46,vs47
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs48,0,r7
+	lxvd2x  vs49,0,r6
+	xxmrghd vs16,vs48,vs49
+	xxmrghd vs17,vs48,vs49
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs50,0,r7
+	lxvd2x  vs51,0,r6
+	xxmrghd vs18,vs50,vs51
+	xxmrghd vs19,vs50,vs51
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs52,0,r7
+	lxvd2x  vs53,0,r6
+	xxmrghd vs20,vs52,vs53
+	xxmrghd vs21,vs52,vs53
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs54,0,r7
+	lxvd2x  vs55,0,r6
+	xxmrghd vs22,vs54,vs55
+	xxmrghd vs23,vs54,vs55
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs56,0,r7
+	lxvd2x  vs57,0,r6
+	xxmrghd vs24,vs56,vs57
+	xxmrghd vs25,vs56,vs57
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs58,0,r7
+	lxvd2x  vs59,0,r6
+	xxmrghd vs26,vs58,vs59
+	xxmrghd vs27,vs58,vs59
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs60,0,r7
+	lxvd2x  vs61,0,r6
+	xxmrghd vs28,vs60,vs61
+	xxmrghd vs29,vs60,vs61
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs62,0,r7
+	lxvd2x  vs63,0,r6
+	xxmrghd vs30,vs62,vs63
+	xxmrghd vs31,vs62,vs63
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif
+
+/* Now we can proceed restoring the VMX registers.  */
+	la	r10,(_UC_VREGS)(r31)
+	lwz   r0,(32*16)(r10)
+	li    r9,(32*16)
+	cmpwi r0,0
+	mtspr VRSAVE,r0
+	lvx   v19,r9,r10
+	la    r9,(16)(r10)
+
+	lvx   v0,0,r10
+	lvx   v1,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	mtvscr  v19
+	lvx   v2,0,r10
+	lvx   v3,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v4,0,r10
+	lvx   v5,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v6,0,r10
+	lvx   v7,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v8,0,r10
+	lvx   v9,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v10,0,r10
+	lvx   v11,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v12,0,r10
+	lvx   v13,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v14,0,r10
+	lvx   v15,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v16,0,r10
+	lvx   v17,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v18,0,r10
+	lvx   v19,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v20,0,r10
+	lvx   v21,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v22,0,r10
+	lvx   v23,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v24,0,r10
+	lvx   v25,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v26,0,r10
+	lvx   v27,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v28,0,r10
+	lvx   v29,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v30,0,r10
+	lvx   v31,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v10,0,r10
+	lvx   v11,0,r9
+
+6: /* L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS  */
 
 	/* Restore LR and CCR, and set CTR to the NIP value */
 	lwz	r3,_UC_GREGS+(PT_LNK*4)(r31)
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S	2008-07-31 16:30:56.000000000 -0500
@@ -29,16 +29,18 @@
 #define __CONTEXT_FUNC_NAME __setcontext
 #define __CONTEXT_ENABLE_FPRS 1
 #define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
 
 /* Size of ucontext in GLIBC_2.3.4 and later.  */
 #define _UC_SIZE_2_3_4	1184
+#define _UC_SIZE_2_9	1440
 
 #ifdef __ASSUME_SWAPCONTEXT_SYSCALL
 	.section ".text";
 ENTRY (__setcontext)
 	mr	r4,r3
 	li	r3,0
-	li	r5,_UC_SIZE_2_3_4;
+	li	r5,_UC_SIZE_2_9;
 	DO_CALL (SYS_ify (swapcontext));
 	bso-	cr0,1f
 /* the kernel does not set the return code for the success case */
@@ -51,7 +53,33 @@
 # include "setcontext-common.S"
 #endif
 
-versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+	compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_setcontext)
+	mr	r4,r3
+	li	r3,0
+	li	r5,_UC_SIZE_2_3_4;
+	DO_CALL (SYS_ify (swapcontext));
+	bso-	cr0,2f
+/* the kernel does not set the return code for the success case */
+	li	r3,0
+	blr
+2:
+	b	__syscall_error@local
+END (__novsx_setcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_setcontext
+# include "setcontext-common.S"
+
+	.previous
+#endif
+compat_symbol (libc, __novsx_setcontext, setcontext, GLIBC_2_3_4)
+#endif
 
 #if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
 
@@ -59,6 +87,7 @@
 	
 # undef __CONTEXT_FUNC_NAME	
 # define __CONTEXT_FUNC_NAME __novec_setcontext
+# undef __CONTEXT_ENABLE_VSRS
 # undef __CONTEXT_ENABLE_VRS
 
 # include "setcontext-common.S"
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S	2008-07-31 16:35:49.000000000 -0500
@@ -24,6 +24,7 @@
    and if appropriate:
      __CONTEXT_ENABLE_FPRS
      __CONTEXT_ENABLE_VRS
+     __CONTEXT_ENABLE_VSRS
    Any archecture that implements the Vector unit is assumed to also 
    implement the floating unit.  */
 
@@ -107,7 +108,12 @@
 	stw	r0,_UC_GREGS+(PT_MSR*4)(r3)
 	stw	r0,_UC_GREGS+(PT_MQ*4)(r3)
 
-#ifdef __CONTEXT_ENABLE_FPRS
+/* For VSR registers, we need a new scheme for saving the
+   registers in order to perform less store operations. If the
+   processor does not have VSR, use the old scheme.  */
+#ifndef __CONTEXT_ENABLE_VSRS
+ /* Old scheme for storing FP and VR registers.  */
+# ifdef __CONTEXT_ENABLE_FPRS
 	/* Save the floating-point registers */
 	stfd	fp0,_UC_FREGS+(0*8)(r3)
 	stfd	fp1,_UC_FREGS+(1*8)(r3)
@@ -143,32 +149,32 @@
 	stfd	fp30,_UC_FREGS+(30*8)(r3)
 	stfd	fp31,_UC_FREGS+(31*8)(r3)
 	stfd	fp0,_UC_FREGS+(32*8)(r3)
-	
-# ifdef __CONTEXT_ENABLE_VRS
-#  ifdef PIC
+
+#  ifdef __CONTEXT_ENABLE_VRS
+#   ifdef PIC
 	mflr    r8
-#   ifdef HAVE_ASM_PPC_REL16
+#    ifdef HAVE_ASM_PPC_REL16
 	bcl	20,31,1f
 1:	mflr	r7
 	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
 	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-#   else
+#    else
 	bl      _GLOBAL_OFFSET_TABLE_@local-4
 	mflr    r7
-#   endif
-#   ifdef SHARED
+#    endif
+#    ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
 	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-#   else
+#    else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
 	lwz     r7,0(r7)
-#   endif
-#  else
+#    endif
+#   else
 	lis	r7,_dl_hwcap@ha
 	lwz     r7,_dl_hwcap@l(r7)
-#  endif
+#   endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 
 	la	r10,(_UC_VREGS)(r3)
@@ -267,8 +273,259 @@
  	stw	r0,0(r8)
 	
 2: /*L(no_vec):*/
-# endif /* __CONTEXT_ENABLE_VRS */
-#endif /* __CONTEXT_ENABLE_FPRS */
+#  endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for saving the VSR registers. We need to save
+    the last 32 VSR registers first.*/
+# ifdef PIC
+	mflr    r8
+#  ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,7f
+7:	mflr	r7
+	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-7b@ha
+	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-7b@l
+#  else
+	bl      _GLOBAL_OFFSET_TABLE_@local-4
+	mflr    r7
+#  endif
+#  ifdef SHARED
+	lwz     r7,_rtld_global_ro@got(r7)
+	mtlr    r8
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+#  else
+	lwz     r7,_dl_hwcap@got(r7)
+	mtlr    r8
+	lwz     r7,0(r7)
+#  endif
+# else
+	lis	r7,_dl_hwcap@ha
+	lwz     r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+   has VSX registers also has Altivec capability.  */
+	andi.	r7,r7,PPC_FEATURE_HAS_VSX
+	la      r6,(_UC_VSREGS)(r3)
+	beq     8f /* L(no_vs)  */
+
+	la	r10,(_UC_VREGS)(r3)
+	la	r9,(_UC_VREGS+16)(r3)
+/* address of the combined VSCR/VSAVE quadword.  */
+	la	r8,(_UC_VREGS+512)(r3)
+
+/* Save the vector registers */
+	stvx  v0,0,r10
+	stvx  v1,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+/* We need to get the Vector Status and Control Register early to avoid
+   store order problems later with the VSAVE register that shares the
+   same quadword.  */
+	mfvscr	v0
+
+	stvx  v2,0,r10
+	stvx  v3,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx	v0,0,r8
+
+	stvx  v4,0,r10
+	stvx  v5,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v6,0,r10
+	stvx  v7,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v8,0,r10
+	stvx  v9,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v10,0,r10
+	stvx  v11,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v12,0,r10
+	stvx  v13,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v14,0,r10
+	stvx  v15,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v16,0,r10
+	stvx  v17,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v18,0,r10
+	stvx  v19,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v20,0,r10
+	stvx  v21,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v22,0,r10
+	stvx  v23,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v24,0,r10
+	stvx  v25,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v26,0,r10
+	stvx  v27,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	stvx  v28,0,r10
+	stvx  v29,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	mfvscr	v0
+	stvx  v30,0,r10
+	stvx  v31,0,r9
+	stw	r0,0(r8)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7".  */
+
+ /* Proceeding to the FP registers and the doubleword 1
+    of the first 32 VSR registers.  */
+	la	r7,(_UC_FREGS)(r3)
+	la	r6,(_UC_VSREGS)(r3)
+	/* Save fp0 and fp1 into vs32.  */
+	xxmrghd vs32,vs0,vs1
+	/* Save vs0[1] and vs1[1] into vs33.  */
+	xxmrgld vs33,vs0,vs1
+	/* Save f0 and f1.  */
+	stxvd2x  vs32,0,r7
+	/* Save vs0[1] and vs1[1].  */
+	stxvd2x  vs33,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs34,vs2,vs3
+	xxmrgld vs35,vs2,vs3
+	stxvd2x  vs34,0,r7
+	stxvd2x  vs35,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs36,vs4,vs5
+	xxmrgld vs37,vs4,vs5
+	stxvd2x  vs36,0,r7
+	stxvd2x  vs37,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs38,vs6,vs7
+	xxmrgld vs39,vs6,vs7
+	stxvd2x  vs38,0,r7
+	stxvd2x  vs39,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs40,vs8,vs9
+	xxmrgld vs41,vs8,vs9
+	stxvd2x  vs40,0,r7
+	stxvd2x  vs41,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs42,vs10,vs11
+	xxmrgld vs43,vs10,vs11
+	stxvd2x  vs42,0,r7
+	stxvd2x  vs43,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs44,vs12,vs13
+	xxmrgld vs45,vs12,vs13
+	stxvd2x  vs44,0,r7
+	stxvd2x  vs45,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs46,vs14,vs15
+	xxmrgld vs47,vs14,vs15
+	stxvd2x  vs46,0,r7
+	stxvd2x  vs47,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs48,vs16,vs17
+	xxmrgld vs49,vs16,vs17
+	stxvd2x  vs48,0,r7
+	stxvd2x  vs49,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs50,vs18,vs19
+	xxmrgld vs51,vs18,vs19
+	stxvd2x  vs50,0,r7
+	stxvd2x  vs51,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs52,vs20,vs21
+	xxmrgld vs53,vs20,vs21
+	stxvd2x  vs52,0,r7
+	stxvd2x  vs53,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs54,vs22,vs23
+	xxmrgld vs55,vs22,vs23
+	stxvd2x  vs54,0,r7
+	stxvd2x  vs55,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs56,vs24,vs25
+	xxmrgld vs57,vs24,vs25
+	stxvd2x  vs56,0,r7
+	stxvd2x  vs57,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs58,vs26,vs27
+	xxmrgld vs59,vs26,vs27
+	stxvd2x  vs58,0,r7
+	stxvd2x  vs59,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs60,vs28,vs29
+	xxmrgld vs61,vs28,vs29
+	stxvd2x  vs60,0,r7
+	stxvd2x  vs61,0,r6
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	xxmrghd vs62,vs30,vs31
+	xxmrgld vs63,vs30,vs31
+	stxvd2x  vs62,0,r7
+	stxvd2x  vs63,0,r6
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif /* HAVE_ASM_PPC_VSX */
+8:/*L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS */
 
 /* Restore ucontext (parm1) from stack.  */
 	lwz	r12,_FRAME_PARM_SAVE1(r1)
@@ -297,33 +554,38 @@
 	cmpwi	r0,0
 	bne	4f	/* L(do_sigret) */
 
-#ifdef __CONTEXT_ENABLE_FPRS
-# ifdef __CONTEXT_ENABLE_VRS
+/* For VSR registers, we need a new scheme for restoring the
+   registers in order to perform less load operations. If the
+   processor does not have VSR, use the old scheme.  */
+#ifndef __CONTEXT_ENABLE_VSRS
+# ifdef __CONTEXT_ENABLE_FPRS
+#  ifdef __CONTEXT_ENABLE_VRS
 
-#  ifdef PIC
+#   ifdef PIC
 	mflr    r8
-#   ifdef HAVE_ASM_PPC_REL16
+#    ifdef HAVE_ASM_PPC_REL16
 	bcl	20,31,5f
 5:	mflr	r7
 	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
 	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-#   else
+#    else
 	bl      _GLOBAL_OFFSET_TABLE_@local-4
 	mflr    r7
-#   endif
+#    endif
 	mtlr    r8
-#   ifdef SHARED
+#    ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-#   else
+#    else
 	lwz     r7,_dl_hwcap@got(r7)
 	lwz     r7,0(r7)
-#   endif
-#  else
+#    endif
+#   else
 	lis	r7,_dl_hwcap@ha
 	lwz     r7,_dl_hwcap@l(r7)
-#  endif
+#   endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
+
 	la	r10,(_UC_VREGS)(r31)
 	beq	6f	/* L(has_no_vec) */
 
@@ -421,7 +683,7 @@
 	lvx   v11,0,r9
 
 6: /* L(has_no_vec): */
-# endif /* __CONTEXT_ENABLE_VRS */
+#  endif /* __CONTEXT_ENABLE_VRS */
 	/* Restore the floating-point registers */
 	lfd	fp31,_UC_FREGS+(32*8)(r31)
 	lfd	fp0,_UC_FREGS+(0*8)(r31)
@@ -457,7 +719,258 @@
 	lfd	fp29,_UC_FREGS+(29*8)(r31)
 	lfd	fp30,_UC_FREGS+(30*8)(r31)
 	lfd	fp31,_UC_FREGS+(31*8)(r31)
-#endif /* __CONTEXT_ENABLE_FPRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for restoring the VSR registers. We need to load
+    the last 32 VSR registers first.*/
+# ifdef PIC
+	mflr    r8
+#  ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,9f
+9:	mflr	r7
+	addis	r7,r7,_GLOBAL_OFFSET_TABLE_-9b@ha
+	addi	r7,r7,_GLOBAL_OFFSET_TABLE_-9b@l
+#  else
+	bl      _GLOBAL_OFFSET_TABLE_@local-4
+	mflr    r7
+#  endif
+	mtlr    r8
+#  ifdef SHARED
+	lwz     r7,_rtld_global_ro@got(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+#  else
+	lwz     r7,_dl_hwcap@got(r7)
+	lwz     r7,0(r7)
+#  endif
+# else
+	lis	r7,_dl_hwcap@ha
+	lwz     r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+   has VSX registers also has Altivec capability.  */
+	andi.	r7,r7,PPC_FEATURE_HAS_VSX
+	la      r6,(_UC_VSREGS)(r31)
+	beq     L(no_vs)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7".  */
+
+/* Using VMX registers as temps to minimize the number of loads for
+   restoring the FP and the doubleword 1 of VSR[0-31].  */
+	la	r7,(_UC_FREGS)(r31)
+	la	r6,(_UC_VSREGS)(r31)
+	/* Load f0 and f1 register state into vs32.  */
+	lxvd2x  vs32,0,r7
+	/* Load vs0[1] and vs1[1] register state into vs33.  */
+	lxvd2x   vs33,0,r6
+	/* Merge f0 and vs0[1] register state into vs0.  */
+	xxmrghd vs0,vs32,vs33
+	/* Merge f1 and vs1[1] register state into vs1.  */
+	xxmrgld vs1,vs32,vs33
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs34,0,r7
+	lxvd2x  vs35,0,r6
+	xxmrghd vs2,vs34,vs35
+	xxmrghd vs3,vs34,vs35
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs36,0,r7
+	lxvd2x  vs37,0,r6
+	xxmrghd vs4,vs36,vs37
+	xxmrghd vs5,vs36,vs37
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs38,0,r7
+	lxvd2x  vs39,0,r6
+	xxmrghd vs6,vs38,vs39
+	xxmrghd vs7,vs38,vs39
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs40,0,r7
+	lxvd2x  vs41,0,r6
+	xxmrghd vs8,vs40,vs41
+	xxmrghd vs9,vs40,vs41
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs42,0,r7
+	lxvd2x  vs43,0,r6
+	xxmrghd vs10,vs42,vs43
+	xxmrghd vs11,vs42,vs43
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs44,0,r7
+	lxvd2x  vs45,0,r6
+	xxmrghd vs12,vs44,vs45
+	xxmrghd vs13,vs44,vs45
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs46,0,r7
+	lxvd2x  vs47,0,r6
+	xxmrghd vs14,vs46,vs47
+	xxmrghd vs15,vs46,vs47
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs48,0,r7
+	lxvd2x  vs49,0,r6
+	xxmrghd vs16,vs48,vs49
+	xxmrghd vs17,vs48,vs49
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs50,0,r7
+	lxvd2x  vs51,0,r6
+	xxmrghd vs18,vs50,vs51
+	xxmrghd vs19,vs50,vs51
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs52,0,r7
+	lxvd2x  vs53,0,r6
+	xxmrghd vs20,vs52,vs53
+	xxmrghd vs21,vs52,vs53
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs54,0,r7
+	lxvd2x  vs55,0,r6
+	xxmrghd vs22,vs54,vs55
+	xxmrghd vs23,vs54,vs55
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs56,0,r7
+	lxvd2x  vs57,0,r6
+	xxmrghd vs24,vs56,vs57
+	xxmrghd vs25,vs56,vs57
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs58,0,r7
+	lxvd2x  vs59,0,r6
+	xxmrghd vs26,vs58,vs59
+	xxmrghd vs27,vs58,vs59
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs60,0,r7
+	lxvd2x  vs61,0,r6
+	xxmrghd vs28,vs60,vs61
+	xxmrghd vs29,vs60,vs61
+	addi    r7,r7,16
+	addi    r6,r6,16
+
+	lxvd2x  vs62,0,r7
+	lxvd2x  vs63,0,r6
+	xxmrghd vs30,vs62,vs63
+	xxmrghd vs31,vs62,vs63
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif
+/* Now we can proceed restoring the VMX registers.  */
+	la	r10,(_UC_VREGS)(r31)
+	lwz   r0,(32*16)(r10)
+	li    r9,(32*16)
+	cmpwi r0,0
+	mtspr VRSAVE,r0
+	lvx   v19,r9,r10
+	la    r9,(16)(r10)
+
+	lvx   v0,0,r10
+	lvx   v1,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	mtvscr  v19
+	lvx   v2,0,r10
+	lvx   v3,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v4,0,r10
+	lvx   v5,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v6,0,r10
+	lvx   v7,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v8,0,r10
+	lvx   v9,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v10,0,r10
+	lvx   v11,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v12,0,r10
+	lvx   v13,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v14,0,r10
+	lvx   v15,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v16,0,r10
+	lvx   v17,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v18,0,r10
+	lvx   v19,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v20,0,r10
+	lvx   v21,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v22,0,r10
+	lvx   v23,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v24,0,r10
+	lvx   v25,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v26,0,r10
+	lvx   v27,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v28,0,r10
+	lvx   v29,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v30,0,r10
+	lvx   v31,0,r9
+	addi  r10,r10,32
+	addi  r9,r9,32
+
+	lvx   v10,0,r10
+	lvx   v11,0,r9
+
+L(no_vs):
+#endif /* __CONTEXT_ENABLE_VSRS  */
 
 	/* Restore LR and CCR, and set CTR to the NIP value */
 	lwz	r3,_UC_GREGS+(PT_LNK*4)(r31)
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S	2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S	2008-07-31 16:30:56.000000000 -0500
@@ -29,14 +29,16 @@
 #define __CONTEXT_FUNC_NAME __swapcontext
 #define __CONTEXT_ENABLE_FPRS 1
 #define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
 
 /* Size of ucontext in GLIBC_2.3.4 and later.  */
 #define _UC_SIZE_2_3_4	1184
+#define _UC_SIZE_2_9	1440
 
 #ifdef __ASSUME_SWAPCONTEXT_SYSCALL
 	.section ".text";
 ENTRY (__swapcontext)
-	li	r5,_UC_SIZE_2_3_4;
+	li	r5,_UC_SIZE_2_9;
 	DO_CALL (SYS_ify (swapcontext));
 	bso-	cr0,1f
 /* the kernel does not set the return code for the success case */
@@ -49,7 +51,31 @@
 # include "swapcontext-common.S"
 #endif
 
-versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+	compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_swapcontext)
+	li	r5,_UC_SIZE_2_3_4;
+	DO_CALL (SYS_ify (swapcontext));
+	bso-	cr0,2f
+/* the kernel does not set the return code for the success case */
+	li	r3,0
+	blr
+2:
+	b	__syscall_error@local
+END (__novsx_swapcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_swapcontext
+# include "swapcontext-common.S"
+
+	.previous
+#endif
+compat_symbol (libc, __novsx_swapcontext, swapcontext, GLIBC_2_3_4)
+#endif
 
 #if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
 
@@ -57,6 +83,7 @@
 	
 # undef __CONTEXT_FUNC_NAME	
 # define __CONTEXT_FUNC_NAME __novec_swapcontext
+# undef __CONTEXT_ENABLE_VSRS
 # undef __CONTEXT_ENABLE_VRS
 
 # include "swapcontext-common.S"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]