This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.10-151-gca41922


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  ca419225a3c4f9f341eddf582b201211d1bf2aec (commit)
       via  47fc9b710bcadb4196f8ef71813d6724d954fcb2 (commit)
       via  d7bd7a8ae8cdb3f1414b1e032759d9ef324eb040 (commit)
       via  59cbcac015cdd446c346cfd2c2ada3f94ef540b2 (commit)
       via  b0ecde3a63fd3e987137aa9eb76da3b556b14559 (commit)
      from  786b74f41a076ac67b5d4fe59ab26e55745095df (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ca419225a3c4f9f341eddf582b201211d1bf2aec

commit ca419225a3c4f9f341eddf582b201211d1bf2aec
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Wed Jul 15 17:59:14 2009 -0700

    Fix thinko in AVX audit patch.
    
    Don't use AVX instructions too often.

diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index d09001b..7f20491 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -249,17 +249,7 @@ L(no_avx1):
 	jmp	1f
 
 L(no_avx2):
-	vmovdqa		    (LR_XMM_OFFSET)(%rsp), %xmm0
-	vmovdqa	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-
-1:
-# else
+# endif
 	movaps		    (LR_XMM_OFFSET)(%rsp), %xmm0
 	movaps	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
 	movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
@@ -268,9 +258,8 @@ L(no_avx2):
 	movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
 	movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
 	movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-# endif
 
-	movq 16(%rbx), %r10	# Anything in framesize?
+1:	movq 16(%rbx), %r10	# Anything in framesize?
 	testq %r10, %r10
 	jns 3f
 
@@ -390,16 +379,11 @@ L(no_avx3):
 	jmp 1f
 
 L(no_avx4):
-	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
-	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
-
-1:
-# else
+# endif
 	movaps LRV_XMM0_OFFSET(%rsp), %xmm0
 	movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-# endif
 
-	fldt LRV_ST1_OFFSET(%rsp)
+1:	fldt LRV_ST1_OFFSET(%rsp)
 	fldt LRV_ST0_OFFSET(%rsp)
 
 	movq %rbx, %rsp

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=47fc9b710bcadb4196f8ef71813d6724d954fcb2

commit 47fc9b710bcadb4196f8ef71813d6724d954fcb2
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Wed Jul 15 17:51:11 2009 -0700

    Fix typo in last change.

diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 2f55639..d09001b 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -142,7 +142,7 @@ L(have_avx):
 	testl	$(1 << 28), %ecx
 	jne	2f
 	negl	%eax
-2:	movl	%eax, L(have_eax)(%rip)
+2:	movl	%eax, L(have_avx)(%rip)
 	cmpl	$0, %eax
 
 1:	js	L(no_avx1)

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d7bd7a8ae8cdb3f1414b1e032759d9ef324eb040

commit d7bd7a8ae8cdb3f1414b1e032759d9ef324eb040
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Wed Jul 15 17:41:36 2009 -0700

    Secure AVX changes for auditing code.
    
    The original AVX patch used a function pointer to handle the difference
    between machines with and without AVX support.  This is insecure.  A
    well-placed memory exploit could lead to redirection of the execution.
    Using a variable and several tests is a bit slower but cannot be
    exploited in this way.

diff --git a/ChangeLog b/ChangeLog
index bece41b..c355ea4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,8 @@
-2009-07-07  H.J. Lu  <hongjiu.lu@intel.com>
+2009-07-15  Ulrich Drepper  <drepper@redhat.com>
 
-	* elf/Makefile: Don't build modules for tst-audit4 and tst-audit5
-	for anything but x86-64 targets.
+	* sysdeps/x86-64/dl-trampoline.h: Remove after integrating code into...
+	* sysdeps/x86-64/dl-trampoline.S: ...here.  Rewrite to avoid function
+	pointers in writable memory.
 
 2009-07-07  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index f605351..2f55639 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -96,9 +96,9 @@ _dl_runtime_profile:
 	   to detect if any xmm0-xmm7 registers are changed by audit
 	   module.  */
 	subq $(LR_SIZE + XMM_SIZE*8), %rsp
-#else
+# else
 	subq $LR_SIZE, %rsp		# sizeof(La_x86_64_regs)
-#endif
+# endif
 	movq %rsp, 24(%rbx)
 
 	/* Fill the La_x86_64_regs structure.  */
@@ -110,45 +110,308 @@ _dl_runtime_profile:
 	movq %rdi, LR_RDI_OFFSET(%rsp)
 	movq %rbp, LR_RBP_OFFSET(%rsp)
 
+	leaq 48(%rbx), %rax
+	movq %rax, LR_RSP_OFFSET(%rsp)
+
+	/* We always store the XMM registers even if AVX is available.
+	   This is to provide backward binary compatility for existing
+	   audit modules.  */
+	movaps %xmm0,		   (LR_XMM_OFFSET)(%rsp)
+	movaps %xmm1, (LR_XMM_OFFSET +   XMM_SIZE)(%rsp)
+	movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
+	movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
+	movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
+	movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
+	movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
+	movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
+
 # ifdef HAVE_AVX_SUPPORT
-	jmp *L(save_and_restore_vector)(%rip)
+	.data
+L(have_avx):
+	.zero 4
+	.size L(have_avx), 4
+	.previous
 
-	.align 16
-L(save_and_restore_vector_sse):
+	cmpl	$0, L(have_avx)(%rip)
+	jne	1f
+	movq	%rbx, %r11		# Save rbx
+	movl	$1, %eax
+	cpuid
+	movq	%r11,%rbx		# Restore rbx
+	movl	$1, %eax
+	testl	$(1 << 28), %ecx
+	jne	2f
+	negl	%eax
+2:	movl	%eax, L(have_eax)(%rip)
+	cmpl	$0, %eax
+
+1:	js	L(no_avx1)
+
+	/* This is to support AVX audit modules.  */
+	vmovdqu %ymm0,		      (LR_VECTOR_OFFSET)(%rsp)
+	vmovdqu %ymm1, (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
+	vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+	vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+	vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+	vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+	vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+	vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+
+	/* Save xmm0-xmm7 registers to detect if any of them are
+	   changed by audit module.  */
+	vmovdqa %xmm0,		    (LR_SIZE)(%rsp)
+	vmovdqa %xmm1, (LR_SIZE +   XMM_SIZE)(%rsp)
+	vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
+	vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
+	vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
+	vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
+	vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
+	vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
+
+L(no_avx1):
 # endif
 
-# define MOVXMM movaps
-# include "dl-trampoline.h"
+	movq %rsp, %rcx		# La_x86_64_regs pointer to %rcx.
+	movq 48(%rbx), %rdx	# Load return address if needed.
+	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
+	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
+	leaq 16(%rbx), %r8
+	call _dl_profile_fixup	# Call resolver.
+
+	movq %rax, %r11		# Save return value.
+
+	movq 8(%rbx), %rax	# Get back register content.
+	movq LR_RDX_OFFSET(%rsp), %rdx
+	movq  LR_R8_OFFSET(%rsp), %r8
+	movq  LR_R9_OFFSET(%rsp), %r9
 
 # ifdef HAVE_AVX_SUPPORT
-#  undef  MOVXMM
-#  define MOVXMM vmovdqa
-#  define RESTORE_AVX
-	.align 16
-L(save_and_restore_vector_avx):
-#  include "dl-trampoline.h"
+	cmpl	$0, L(have_avx)(%rip)
+	js	L(no_avx2)
+
+	/* Check if any xmm0-xmm7 registers are changed by audit
+	   module.  */
+	vmovdqa (LR_XMM_OFFSET)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu			(LR_VECTOR_OFFSET)(%rsp), %ymm0
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+	vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
+	vpmovmskb %xmm2, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu	  (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+	vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm3
+	vpmovmskb %xmm3, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+	vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm4
+	vpmovmskb %xmm4, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+	vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm5
+	vpmovmskb %xmm5, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+	vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm6
+	vpmovmskb %xmm6, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+	vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm7
+	vpmovmskb %xmm7, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+
+1:	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+	vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
+	vpmovmskb %xmm8, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+	jmp	1f
+
+L(no_avx2):
+	vmovdqa		    (LR_XMM_OFFSET)(%rsp), %xmm0
+	vmovdqa	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+
+1:
+# else
+	movaps		    (LR_XMM_OFFSET)(%rsp), %xmm0
+	movaps	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+	movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+	movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+	movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+	movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+	movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+	movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
 # endif
 
-	cfi_endproc
-	.size _dl_runtime_profile, .-_dl_runtime_profile
+	movq 16(%rbx), %r10	# Anything in framesize?
+	testq %r10, %r10
+	jns 3f
+
+	/* There's nothing in the frame size, so there
+	   will be no call to the _dl_call_pltexit. */
+
+	/* Get back registers content.  */
+	movq LR_RCX_OFFSET(%rsp), %rcx
+	movq LR_RSI_OFFSET(%rsp), %rsi
+	movq LR_RDI_OFFSET(%rsp), %rdi
 
+	movq %rbx, %rsp
+	movq (%rsp), %rbx
+	cfi_restore(rbx)
+	cfi_def_cfa_register(%rsp)
+
+	addq $48, %rsp		# Adjust the stack to the return value
+				# (eats the reloc index and link_map)
+	cfi_adjust_cfa_offset(-48)
+	jmp *%r11		# Jump to function address.
+
+3:
+	cfi_adjust_cfa_offset(48)
+	cfi_rel_offset(%rbx, 0)
+	cfi_def_cfa_register(%rbx)
+
+	/* At this point we need to prepare new stack for the function
+	   which has to be called.  We copy the original stack to a
+	   temporary buffer of the size specified by the 'framesize'
+	   returned from _dl_profile_fixup */
+
+	leaq LR_RSP_OFFSET(%rbx), %rsi	# stack
+	addq $8, %r10
+	andq $0xfffffffffffffff0, %r10
+	movq %r10, %rcx
+	subq %r10, %rsp
+	movq %rsp, %rdi
+	shrq $3, %rcx
+	rep
+	movsq
+
+	movq 24(%rdi), %rcx	# Get back register content.
+	movq 32(%rdi), %rsi
+	movq 40(%rdi), %rdi
+
+	call *%r11
+
+	mov 24(%rbx), %rsp	# Drop the copied stack content
+
+	/* Now we have to prepare the La_x86_64_retval structure for the
+	   _dl_call_pltexit.  The La_x86_64_regs is being pointed by rsp now,
+	   so we just need to allocate the sizeof(La_x86_64_retval) space on
+	   the stack, since the alignment has already been taken care of. */
 # ifdef HAVE_AVX_SUPPORT
-L(check_avx):
-	mov	%rbx,%r11		# Save rbx
-	movl	$1, %eax
-	cpuid
-	mov	%r11,%rbx		# Restore rbx
-	leaq    L(save_and_restore_vector_sse)(%rip), %rax
-	andl	$(1 << 28), %ecx	# Check if AVX is available.
-	jz	L(ret)
-	leaq    L(save_and_restore_vector_avx)(%rip), %rax
-L(ret):
-	movq	%rax,L(save_and_restore_vector)(%rip)
-	jmp	*%rax
-
-	.section .data.rel.local,"aw",@progbits
-	.align	8
-L(save_and_restore_vector):
-	.quad L(check_avx)
+	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
+	   registers to detect if xmm0/xmm1 registers are changed
+	   by audit module.  */
+	subq $(LRV_SIZE + XMM_SIZE*2), %rsp
+# else
+	subq $LRV_SIZE, %rsp	# sizeof(La_x86_64_retval)
+# endif
+	movq %rsp, %rcx		# La_x86_64_retval argument to %rcx.
+
+	/* Fill in the La_x86_64_retval structure.  */
+	movq %rax, LRV_RAX_OFFSET(%rcx)
+	movq %rdx, LRV_RDX_OFFSET(%rcx)
+
+	movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
+	movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
+
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	js	L(no_avx3)
+
+	/* This is to support AVX audit modules.  */
+	vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
+	vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+
+	/* Save xmm0/xmm1 registers to detect if they are changed
+	   by audit module.  */
+	vmovdqa %xmm0,		  (LRV_SIZE)(%rcx)
+	vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
+
+L(no_avx3):
 # endif
+
+	fstpt LRV_ST0_OFFSET(%rcx)
+	fstpt LRV_ST1_OFFSET(%rcx)
+
+	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
+	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
+        movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
+	call _dl_call_pltexit
+
+	/* Restore return registers.  */
+	movq LRV_RAX_OFFSET(%rsp), %rax
+	movq LRV_RDX_OFFSET(%rsp), %rdx
+
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	js	L(no_avx4)
+
+	/* Check if xmm0/xmm1 registers are changed by audit module.  */
+	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
+	vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+
+1:	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
+	vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
+	vpmovmskb %xmm2, %esi
+	cmpl $0xffff, %esi
+	je 1f
+	vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+	jmp 1f
+
+L(no_avx4):
+	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
+	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
+
+1:
+# else
+	movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+	movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+# endif
+
+	fldt LRV_ST1_OFFSET(%rsp)
+	fldt LRV_ST0_OFFSET(%rsp)
+
+	movq %rbx, %rsp
+	movq (%rsp), %rbx
+	cfi_restore(rbx)
+	cfi_def_cfa_register(%rsp)
+
+	addq $48, %rsp		# Adjust the stack to the return value
+				# (eats the reloc index and link_map)
+	cfi_adjust_cfa_offset(-48)
+	retq
+
+	cfi_endproc
+	.size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
deleted file mode 100644
index d63b7d0..0000000
--- a/sysdeps/x86_64/dl-trampoline.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* Partial PLT profile trampoline to save and restore x86-64 vector
-   registers.
-   Copyright (C) 2009 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-	leaq 48(%rbx), %rax
-	movq %rax, LR_RSP_OFFSET(%rsp)
-
-	/* This is to provide backward binary compatility for existing
-	   audit modules.  */
-	MOVXMM %xmm0,		   (LR_XMM_OFFSET)(%rsp)
-	MOVXMM %xmm1, (LR_XMM_OFFSET +   XMM_SIZE)(%rsp)
-	MOVXMM %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
-	MOVXMM %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
-	MOVXMM %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
-	MOVXMM %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
-	MOVXMM %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
-	MOVXMM %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
-
-#ifdef RESTORE_AVX
-	/* This is to support AVX audit modules.  */
-	vmovdqu %ymm0,		      (LR_VECTOR_OFFSET)(%rsp)
-	vmovdqu %ymm1, (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
-	vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
-	vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
-	vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
-	vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
-	vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
-	vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
-
-	/* Save xmm0-xmm7 registers to detect if any of them are
-	   changed by audit module.  */
-	vmovdqa %xmm0,		    (LR_SIZE)(%rsp)
-	vmovdqa %xmm1, (LR_SIZE +   XMM_SIZE)(%rsp)
-	vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
-	vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
-	vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
-	vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
-	vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
-	vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
-#endif
-
-	movq %rsp, %rcx		# La_x86_64_regs pointer to %rcx.
-	movq 48(%rbx), %rdx	# Load return address if needed.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	leaq 16(%rbx), %r8
-	call _dl_profile_fixup	# Call resolver.
-
-	movq %rax, %r11		# Save return value.
-
-	movq 8(%rbx), %rax	# Get back register content.
-	movq LR_RDX_OFFSET(%rsp), %rdx
-	movq  LR_R8_OFFSET(%rsp), %r8
-	movq  LR_R9_OFFSET(%rsp), %r9
-
-#ifdef RESTORE_AVX
-	/* Check if any xmm0-xmm7 registers are changed by audit
-	   module.  */
-	vmovdqa (LR_XMM_OFFSET)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm0
-	vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 1f
-
-	/* We restore AVX registers only if xmm0-xmm7 registers are
-	   unchanged.  */
-	vmovdqu			(LR_VECTOR_OFFSET)(%rsp), %ymm0
-	vmovdqu	  (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
-	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
-	jmp 2f
-
-1:
-	vmovdqa		     (LR_XMM_OFFSET)(%rsp), %xmm0
-	vmovdqa   (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
-	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-
-2:
-#else
-	movaps		    (LR_XMM_OFFSET)(%rsp), %xmm0
-	movaps	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
-	movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
-	movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
-	movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
-	movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
-	movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
-	movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-#endif
-
-	movq 16(%rbx), %r10	# Anything in framesize?
-	testq %r10, %r10
-	jns 3f
-
-	/* There's nothing in the frame size, so there
-	   will be no call to the _dl_call_pltexit. */
-
-	/* Get back registers content.  */
-	movq LR_RCX_OFFSET(%rsp), %rcx
-	movq LR_RSI_OFFSET(%rsp), %rsi
-	movq LR_RDI_OFFSET(%rsp), %rdi
-
-	movq %rbx, %rsp
-	movq (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	jmp *%r11		# Jump to function address.
-
-3:
-	cfi_adjust_cfa_offset(48)
-	cfi_rel_offset(%rbx, 0)
-	cfi_def_cfa_register(%rbx)
-
-	/* At this point we need to prepare new stack for the function
-	   which has to be called.  We copy the original stack to a
-	   temporary buffer of the size specified by the 'framesize'
-	   returned from _dl_profile_fixup */
-
-	leaq LR_RSP_OFFSET(%rbx), %rsi	# stack
-	addq $8, %r10
-	andq $0xfffffffffffffff0, %r10
-	movq %r10, %rcx
-	subq %r10, %rsp
-	movq %rsp, %rdi
-	shrq $3, %rcx
-	rep
-	movsq
-
-	movq 24(%rdi), %rcx	# Get back register content.
-	movq 32(%rdi), %rsi
-	movq 40(%rdi), %rdi
-
-	call *%r11
-
-	mov 24(%rbx), %rsp	# Drop the copied stack content
-
-	/* Now we have to prepare the La_x86_64_retval structure for the
-	   _dl_call_pltexit.  The La_x86_64_regs is being pointed by rsp now,
-	   so we just need to allocate the sizeof(La_x86_64_retval) space on
-	   the stack, since the alignment has already been taken care of. */
-#ifdef RESTORE_AVX
-	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
-	   registers to detect if xmm0/xmm1 registers are changed
-	   by audit module.  */
-	subq $(LRV_SIZE + XMM_SIZE*2), %rsp
-#else
-	subq $LRV_SIZE, %rsp	# sizeof(La_x86_64_retval)
-#endif
-	movq %rsp, %rcx		# La_x86_64_retval argument to %rcx.
-
-	/* Fill in the La_x86_64_retval structure.  */
-	movq %rax, LRV_RAX_OFFSET(%rcx)
-	movq %rdx, LRV_RDX_OFFSET(%rcx)
-
-	MOVXMM %xmm0, LRV_XMM0_OFFSET(%rcx)
-	MOVXMM %xmm1, LRV_XMM1_OFFSET(%rcx)
-
-#ifdef RESTORE_AVX
-	/* This is to support AVX audit modules.  */
-	vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
-	vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
-
-	/* Save xmm0/xmm1 registers to detect if they are changed
-	   by audit module.  */
-	vmovdqa %xmm0,		  (LRV_SIZE)(%rcx)
-	vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
-#endif
-
-	fstpt LRV_ST0_OFFSET(%rcx)
-	fstpt LRV_ST1_OFFSET(%rcx)
-
-	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-        movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	call _dl_call_pltexit
-
-	/* Restore return registers.  */
-	movq LRV_RAX_OFFSET(%rsp), %rax
-	movq LRV_RDX_OFFSET(%rsp), %rdx
-
-#ifdef RESTORE_AVX
-	/* Check if xmm0/xmm1 registers are changed by audit module.  */
-	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
-	vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 4f
-
-	/* We restore AVX registers only if xmm0/xmm1 registers are
-	   unchanged.  */
-	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm0
-	vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm0, %xmm1
-	vpmovmskb %xmm1, %esi
-	cmpl $0xffff, %esi
-	jne 4f
-
-	vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
-	vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
-	jmp 5f
-
-4:
-	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
-	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
-5:
-#else
-	movaps LRV_XMM0_OFFSET(%rsp), %xmm0
-	movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-#endif
-
-	fldt LRV_ST0_OFFSET(%rsp)
-	fldt LRV_ST0_OFFSET(%rsp)
-
-	movq %rbx, %rsp
-	movq (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	retq

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=59cbcac015cdd446c346cfd2c2ada3f94ef540b2

commit 59cbcac015cdd446c346cfd2c2ada3f94ef540b2
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Wed Jul 15 08:27:19 2009 -0700

    Fix build issue with modules for audit test on machines != x86-64.

diff --git a/ChangeLog b/ChangeLog
index e90d19f..bece41b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2009-07-07  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* elf/Makefile: Don't build modules for tst-audit4 and tst-audit5
+	for anything but x86-64 targets.
+
+2009-07-07  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* config.h.in: Add HAVE_AVX_SUPPORT entry.
 	* config.make.in: Add config-cflags-avx entry.
 	* configure.in: Substitute libc_cv_cc_avx.
diff --git a/elf/Makefile b/elf/Makefile
index e4b977e..21d131e 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -239,9 +239,6 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
 		$(modules-execstack-$(have-z-execstack)) \
 		tst-dlopenrpathmod tst-deep1mod1 tst-deep1mod2 tst-deep1mod3 \
 		tst-dlmopen1mod tst-auditmod1 \
-		tst-auditmod3a tst-auditmod3b \
-		tst-auditmod4a tst-auditmod4b \
-		tst-auditmod5a tst-auditmod5b \
 		unload3mod1 unload3mod2 unload3mod3 unload3mod4 \
 		unload4mod1 unload4mod2 unload4mod3 unload4mod4 \
 		unload6mod1 unload6mod2 unload6mod3 \
@@ -255,6 +252,11 @@ endif
 ifeq (yesyes,$(have-fpie)$(build-shared))
 modules-names += tst-piemod1
 endif
+ifeq (x86_64,$(config-machine))
+modules-names += tst-auditmod3a tst-auditmod3b \
+		tst-auditmod4a tst-auditmod4b \
+		tst-auditmod5a tst-auditmod5b
+endif
 modules-execstack-yes = tst-execstack-mod
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
 # We need this variable to be sure the test modules get the right CPPFLAGS.

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b0ecde3a63fd3e987137aa9eb76da3b556b14559

commit b0ecde3a63fd3e987137aa9eb76da3b556b14559
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Fri Jul 10 12:04:14 2009 -0700

    Add AVX support to ld.so auditing for x86-64.

diff --git a/ChangeLog b/ChangeLog
index 04760b0..e90d19f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2009-07-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config.h.in: Add HAVE_AVX_SUPPORT entry.
+	* config.make.in: Add config-cflags-avx entry.
+	* configure.in: Substitute libc_cv_cc_avx.
+	* elf/Makefile: Add rules to build and run tst-audit4 and tst-audit5.
+	* elf/tst-audit4.c: New file.
+	* elf/tst-audit5.c: New file.
+	* elf/tst-auditmod4a.c: New file.
+	* elf/tst-auditmod4b.c: New file.
+	* elf/tst-auditmod5a.c: New file.
+	* elf/tst-auditmod5b.c: New file.
+	* sysdeps/x86_64/Makefile (gen-as-const-headers): Add
+	link-defines.sym.
+	* sysdeps/x86_64/bits/link.h (La_x86_64_ymm): New.
+	(La_x86_64_vector): Likewise.
+	(La_x86_64_regs): Append lr_vector.
+	(La_x86_64_retval): Append lr_vector0/lrv_vector1.
+	* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Move
+	saving and restoring SSE registers to ...
+	* sysdeps/x86_64/dl-trampoline.h: This.  New file.
+	* sysdeps/x86_64/dl-trampoline.S: Include <config.h> and
+	<link-defines.h>.
+	(_dl_runtime_profile): Use LR_SIZE to allocate space for
+	La_x86_64_regs.  Allocate extra space and jump to memory at
+	save_and_restore_vector if HAVE_AVX_SUPPORT is defined.
+	(save_and_restore_vector_sse): New.
+	(save_and_restore_vector_avx): Likewise.
+	(check_avx): Likewise.
+	(save_and_restore_vector): Likewise.
+	* sysdeps/x86_64/elf/configure.in: Set libc_cv_cc_avx and
+	HAVE_AVX_SUPPORT.
+	* sysdeps/x86_64/link-defines.sym: New file.
+
 2009-07-10  Ulrich Drepper  <drepper@redhat.com>
 
 	* elf/do-lookup.h: Removed after folding content into...
diff --git a/config.h.in b/config.h.in
index 5f16874..18bf01a 100644
--- a/config.h.in
+++ b/config.h.in
@@ -135,6 +135,9 @@
 /* Define if gcc supports SSE4.  */
 #undef	HAVE_SSE4_SUPPORT
 
+/* Define if gcc supports AVX.  */
+#undef	HAVE_AVX_SUPPORT
+
 /* Define if the compiler's exception support is based on libunwind.  */
 #undef	HAVE_CC_WITH_LIBUNWIND
 
diff --git a/config.make.in b/config.make.in
index 5fb5c81..d65706c 100644
--- a/config.make.in
+++ b/config.make.in
@@ -35,6 +35,7 @@ cflags-cpu = @libc_cv_cc_submachine@
 asflags-cpu = @libc_cv_cc_submachine@
 
 config-cflags-sse4 = @libc_cv_cc_sse4@
+config-cflags-avx = @libc_cv_cc_avx@
 
 defines = @DEFINES@
 sysincludes = @SYSINCLUDES@
diff --git a/configure b/configure
index 4e49f70..48e6952 100755
--- a/configure
+++ b/configure
@@ -657,6 +657,7 @@ xcoff
 elf
 ldd_rewrite_script
 use_ldconfig
+libc_cv_cc_avx
 libc_cv_cc_sse4
 libc_cv_cpp_asm_debuginfo
 libc_cv_forced_unwind
@@ -8772,6 +8773,7 @@ fi
 
 
 
+
 if test $elf = yes; then
   cat >>confdefs.h <<\_ACEOF
 #define HAVE_ELF 1
diff --git a/configure.in b/configure.in
index 61c8741..4584afe 100644
--- a/configure.in
+++ b/configure.in
@@ -2277,6 +2277,7 @@ AC_SUBST(libc_cv_forced_unwind)
 dnl sysdeps/CPU/configure.in checks set this via arch-specific asm tests
 AC_SUBST(libc_cv_cpp_asm_debuginfo)
 AC_SUBST(libc_cv_cc_sse4)
+AC_SUBST(libc_cv_cc_avx)
 
 AC_SUBST(use_ldconfig)
 AC_SUBST(ldd_rewrite_script)
diff --git a/elf/Makefile b/elf/Makefile
index 3e656ae..e4b977e 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -89,8 +89,10 @@ distribute	:= rtld-Rules \
 		   unload4mod1.c unload4mod2.c unload4mod3.c unload4mod4.c \
 		   unload6mod1.c unload6mod2.c unload6mod3.c \
 		   unload7mod1.c unload7mod2.c \
-		   tst-audit1.c tst-audit2.c tst-audit3.c \
+		   tst-audit1.c tst-audit2.c tst-audit3.c tst-audit4.c \
 		   tst-auditmod1.c tst-auditmod3a.c tst-auditmod3b.c \
+		   tst-auditmod4a.c tst-auditmod4b.c \
+		   tst-audit5.c tst-auditmod5a.c tst-auditmod5b.c \
 		   order2mod1.c order2mod2.c order2mod3.c order2mod4.c \
 		   tst-stackguard1.c tst-stackguard1-static.c \
 		   tst-array5.c tst-array5-static.c tst-array5dep.c \
@@ -198,7 +200,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
 test-srcs = tst-pathopt
 tests-execstack-yes = tst-execstack tst-execstack-needed tst-execstack-prog
 ifeq (x86_64,$(config-machine))
-tests += tst-audit3
+tests += tst-audit3 tst-audit4 tst-audit5
 endif
 endif
 ifeq (yesyes,$(have-fpie)$(build-shared))
@@ -238,6 +240,8 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
 		tst-dlopenrpathmod tst-deep1mod1 tst-deep1mod2 tst-deep1mod3 \
 		tst-dlmopen1mod tst-auditmod1 \
 		tst-auditmod3a tst-auditmod3b \
+		tst-auditmod4a tst-auditmod4b \
+		tst-auditmod5a tst-auditmod5b \
 		unload3mod1 unload3mod2 unload3mod3 unload3mod4 \
 		unload4mod1 unload4mod2 unload4mod3 unload4mod4 \
 		unload6mod1 unload6mod2 unload6mod3 \
@@ -973,6 +977,14 @@ $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
 $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
 tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
 
+$(objpfx)tst-audit4: $(objpfx)tst-auditmod4a.so
+$(objpfx)tst-audit4.out: $(objpfx)tst-auditmod4b.so
+tst-audit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod4b.so
+
+$(objpfx)tst-audit5: $(objpfx)tst-auditmod5a.so
+$(objpfx)tst-audit5.out: $(objpfx)tst-auditmod5b.so
+tst-audit5-ENV = LD_AUDIT=$(objpfx)tst-auditmod5b.so
+
 $(objpfx)tst-global1: $(libdl)
 $(objpfx)tst-global1.out: $(objpfx)testobj6.so $(objpfx)testobj2.so
 
@@ -1115,3 +1127,9 @@ $(objpfx)tst-unique1.out: $(objpfx)tst-unique1mod1.so \
 
 $(objpfx)tst-unique2: $(libdl) $(objpfx)tst-unique2mod1.so
 $(objpfx)tst-unique2.out: $(objpfx)tst-unique2mod2.so
+
+ifeq (yes,$(config-cflags-avx))
+CFLAGS-tst-audit4.c += -mavx
+CFLAGS-tst-auditmod4a.c += -mavx
+CFLAGS-tst-auditmod4b.c += -mavx
+endif
diff --git a/elf/tst-audit4.c b/elf/tst-audit4.c
new file mode 100644
index 0000000..b17d4a6
--- /dev/null
+++ b/elf/tst-audit4.c
@@ -0,0 +1,35 @@
+/* Test case for x86-64 preserved registers in dynamic linker.  */
+
+#ifdef __AVX__
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <immintrin.h>
+
+extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
+			   __m256i, __m256i, __m256i, __m256i);
+int
+main (void)
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  /* Run AVX test only if AVX is supported.  */
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+      && (ecx & bit_AVX))
+    {
+      __m256i ymm = _mm256_setzero_si256 ();
+      __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
+
+      ymm =  _mm256_set1_epi32 (0x12349876);
+      if (memcmp (&ymm, &ret, sizeof (ret)))
+	abort ();
+    }
+  return 0;
+}
+#else
+int
+main (void)
+{
+  return 0;
+}
+#endif
diff --git a/elf/tst-audit5.c b/elf/tst-audit5.c
new file mode 100644
index 0000000..0094fee
--- /dev/null
+++ b/elf/tst-audit5.c
@@ -0,0 +1,21 @@
+/* Test case for x86-64 preserved registers in dynamic linker.  */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <emmintrin.h>
+
+extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
+			   __m128i, __m128i, __m128i, __m128i);
+int
+main (void)
+{
+  __m128i xmm = _mm_setzero_si128 ();
+  __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
+
+  xmm = _mm_set1_epi32 (0x12349876);
+  if (memcmp (&xmm, &ret, sizeof (ret)))
+    abort ();
+
+  return 0;
+}
diff --git a/elf/tst-auditmod4a.c b/elf/tst-auditmod4a.c
new file mode 100644
index 0000000..c9c24c0
--- /dev/null
+++ b/elf/tst-auditmod4a.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved registers in dynamic linker.  */
+
+#ifdef __AVX__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m256i
+audit_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3,
+	    __m256i x4, __m256i x5, __m256i x6, __m256i x7)
+{
+  __m256i ymm;
+
+  ymm = _mm256_set1_epi32 (1);
+  if (memcmp (&ymm, &x0, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (2);
+  if (memcmp (&ymm, &x1, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (3);
+  if (memcmp (&ymm, &x2, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (4);
+  if (memcmp (&ymm, &x3, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (5);
+  if (memcmp (&ymm, &x4, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (6);
+  if (memcmp (&ymm, &x5, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (7);
+  if (memcmp (&ymm, &x6, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (8);
+  if (memcmp (&ymm, &x7, sizeof (ymm)))
+    abort ();
+
+  return _mm256_setzero_si256 ();
+}
+#endif
diff --git a/elf/tst-auditmod4b.c b/elf/tst-auditmod4b.c
new file mode 100644
index 0000000..a6d3c6a
--- /dev/null
+++ b/elf/tst-auditmod4b.c
@@ -0,0 +1,206 @@
+/* Verify that changing AVX registers in audit library won't affect
+   function parameter passing/return.  */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+  setlinebuf (stdout);
+
+  printf ("version: %u\n", v);
+
+  char buf[20];
+  sprintf (buf, "%u", v);
+
+  return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+  if (flag == LA_ACT_CONSISTENT)
+    printf ("activity: consistent\n");
+  else if (flag == LA_ACT_ADD)
+    printf ("activity: add\n");
+  else if (flag == LA_ACT_DELETE)
+    printf ("activity: delete\n");
+  else
+    printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+  char buf[100];
+  const char *flagstr;
+  if (flag == LA_SER_ORIG)
+    flagstr = "LA_SET_ORIG";
+  else if (flag == LA_SER_LIBPATH)
+    flagstr = "LA_SER_LIBPATH";
+  else if (flag == LA_SER_RUNPATH)
+    flagstr = "LA_SER_RUNPATH";
+  else if (flag == LA_SER_CONFIG)
+    flagstr = "LA_SER_CONFIG";
+  else if (flag == LA_SER_DEFAULT)
+    flagstr = "LA_SER_DEFAULT";
+  else if (flag == LA_SER_SECURE)
+    flagstr = "LA_SER_SECURE";
+  else
+    {
+       sprintf (buf, "unknown flag %d", flag);
+       flagstr = buf;
+    }
+  printf ("objsearch: %s, %s\n", name, flagstr);
+
+  return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+  printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+  return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+  printf ("preinit\n");
+}
+
+unsigned int
+la_objclose  (uintptr_t *cookie)
+{
+  printf ("objclose\n");
+  return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+	      uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+  printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+  return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+  if (avx == -1)
+    {
+      unsigned int eax, ebx, ecx, edx;
+
+      if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+	  && (ecx & bit_AVX))
+	avx = 1;
+      else
+	avx = 0;
+    }
+  return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+	  uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+	  const char *symname, long int *framesizep)
+{
+  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+  if (check_avx () && strcmp (symname, "audit_test") == 0)
+    {
+      __m256i zero = _mm256_setzero_si256 ();
+      if (memcmp (&regs->lr_vector[0], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[1], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[2], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[3], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[4], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[5], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[6], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_vector[7], &zero, sizeof (zero)))
+	abort ();
+
+      for (int i = 0; i < 8; i++)
+	regs->lr_vector[i].ymm[0]
+	  = (La_x86_64_ymm) _mm256_set1_epi32 (i + 1);
+
+      __m256i ymm = _mm256_set1_epi32 (-1);
+      asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+      asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+      asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+      asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+      asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+      asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+      asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+      asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+      *framesizep = 1024;
+    }
+#endif
+
+  return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+	 uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+	 const char *symname)
+{
+  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+	  symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+  if (check_avx () && strcmp (symname, "audit_test") == 0)
+    {
+      __m256i zero = _mm256_setzero_si256 ();
+      if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero)))
+	abort ();
+
+      for (int i = 0; i < 8; i++)
+	{
+	  __m256i ymm = _mm256_set1_epi32 (i + 1);
+	  if (memcmp (&inregs->lr_vector[i], &ymm, sizeof (ymm)) != 0)
+	    abort ();
+	}
+
+      outregs->lrv_vector0.ymm[0]
+	= (La_x86_64_ymm) _mm256_set1_epi32 (0x12349876);
+
+      __m256i ymm = _mm256_set1_epi32 (-1);
+      asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+      asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+    }
+#endif
+
+  return 0;
+}
diff --git a/elf/tst-auditmod5a.c b/elf/tst-auditmod5a.c
new file mode 100644
index 0000000..8511a70
--- /dev/null
+++ b/elf/tst-auditmod5a.c
@@ -0,0 +1,46 @@
+/* Test case for x86-64 preserved registers in dynamic linker.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <emmintrin.h>
+
+__m128i
+audit_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+	    __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+  __m128i xmm;
+
+  xmm =  _mm_set1_epi32 (1);
+  if (memcmp (&xmm, &x0, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (2);
+  if (memcmp (&xmm, &x1, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (3);
+  if (memcmp (&xmm, &x2, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (4);
+  if (memcmp (&xmm, &x3, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (5);
+  if (memcmp (&xmm, &x4, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (6);
+  if (memcmp (&xmm, &x5, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (7);
+  if (memcmp (&xmm, &x6, sizeof (xmm)))
+    abort ();
+
+  xmm =  _mm_set1_epi32 (8);
+  if (memcmp (&xmm, &x7, sizeof (xmm)))
+    abort ();
+
+  return _mm_setzero_si128 ();
+}
diff --git a/elf/tst-auditmod5b.c b/elf/tst-auditmod5b.c
new file mode 100644
index 0000000..7e1e941
--- /dev/null
+++ b/elf/tst-auditmod5b.c
@@ -0,0 +1,178 @@
+/* Verify that changing xmm registers in audit library won't affect
+   function parameter passing/return.  */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+#include <emmintrin.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+  setlinebuf (stdout);
+
+  printf ("version: %u\n", v);
+
+  char buf[20];
+  sprintf (buf, "%u", v);
+
+  return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+  if (flag == LA_ACT_CONSISTENT)
+    printf ("activity: consistent\n");
+  else if (flag == LA_ACT_ADD)
+    printf ("activity: add\n");
+  else if (flag == LA_ACT_DELETE)
+    printf ("activity: delete\n");
+  else
+    printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+  char buf[100];
+  const char *flagstr;
+  if (flag == LA_SER_ORIG)
+    flagstr = "LA_SET_ORIG";
+  else if (flag == LA_SER_LIBPATH)
+    flagstr = "LA_SER_LIBPATH";
+  else if (flag == LA_SER_RUNPATH)
+    flagstr = "LA_SER_RUNPATH";
+  else if (flag == LA_SER_CONFIG)
+    flagstr = "LA_SER_CONFIG";
+  else if (flag == LA_SER_DEFAULT)
+    flagstr = "LA_SER_DEFAULT";
+  else if (flag == LA_SER_SECURE)
+    flagstr = "LA_SER_SECURE";
+  else
+    {
+       sprintf (buf, "unknown flag %d", flag);
+       flagstr = buf;
+    }
+  printf ("objsearch: %s, %s\n", name, flagstr);
+
+  return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+  printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+  return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+  printf ("preinit\n");
+}
+
+unsigned int
+la_objclose  (uintptr_t *cookie)
+{
+  printf ("objclose\n");
+  return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+	      uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+  printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+  return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+	  uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+	  const char *symname, long int *framesizep)
+{
+  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+  __m128i minusone = _mm_set1_epi32 (-1);
+
+  if (strcmp (symname, "audit_test") == 0)
+    {
+      __m128i zero = _mm_setzero_si128 ();
+      if (memcmp (&regs->lr_xmm[0], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[1], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[2], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[3], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[4], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[5], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[6], &zero, sizeof (zero))
+	  || memcmp (&regs->lr_xmm[7], &zero, sizeof (zero)))
+	abort ();
+
+      for (int i = 0; i < 8; i++)
+	regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 1);
+
+      *framesizep = 1024;
+    }
+
+  asm volatile ("movdqa %0, %%xmm0" : : "x" (minusone) : "xmm0" );
+  asm volatile ("movdqa %0, %%xmm1" : : "x" (minusone) : "xmm1" );
+  asm volatile ("movdqa %0, %%xmm2" : : "x" (minusone) : "xmm2" );
+  asm volatile ("movdqa %0, %%xmm3" : : "x" (minusone) : "xmm3" );
+  asm volatile ("movdqa %0, %%xmm4" : : "x" (minusone) : "xmm4" );
+  asm volatile ("movdqa %0, %%xmm5" : : "x" (minusone) : "xmm5" );
+  asm volatile ("movdqa %0, %%xmm6" : : "x" (minusone) : "xmm6" );
+  asm volatile ("movdqa %0, %%xmm7" : : "x" (minusone) : "xmm7" );
+
+  return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+	 uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+	 const char *symname)
+{
+  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+	  symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+  __m128i xmm;
+
+  if (strcmp (symname, "audit_test") == 0)
+    {
+      __m128i zero = _mm_setzero_si128 ();
+      if (memcmp (&outregs->lrv_xmm0, &zero, sizeof (zero)))
+	abort ();
+
+      for (int i = 0; i < 8; i++)
+	{
+	  xmm = _mm_set1_epi32 (i + 1);
+	  if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm)) != 0)
+	    abort ();
+	}
+
+      outregs->lrv_xmm0 = (La_x86_64_xmm) _mm_set1_epi32 (0x12349876);
+    }
+
+  xmm = _mm_set1_epi32 (-1);
+  asm volatile ("movdqa %0, %%xmm0" : : "x" (xmm) : "xmm0" );
+  asm volatile ("movdqa %0, %%xmm1" : : "x" (xmm) : "xmm1" );
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index da82093..78fdb04 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -4,6 +4,7 @@ long-double-fcts = yes
 ifeq ($(subdir),csu)
 sysdep_routines += hp-timing
 elide-routines.os += hp-timing
+gen-as-const-headers += link-defines.sym
 endif
 
 ifeq ($(subdir),gmon)
diff --git a/sysdeps/x86_64/bits/link.h b/sysdeps/x86_64/bits/link.h
index 5676b78..643a293 100644
--- a/sysdeps/x86_64/bits/link.h
+++ b/sysdeps/x86_64/bits/link.h
@@ -65,10 +65,19 @@ __END_DECLS
 /* Registers for entry into PLT on x86-64.  */
 # if __GNUC_PREREQ (4,0)
 typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
+typedef float La_x86_64_ymm __attribute__ ((__vector_size__ (32)));
 # else
 typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
 # endif
 
+typedef union
+{
+# if __GNUC_PREREQ (4,0)
+  La_x86_64_ymm ymm[2];
+# endif
+  La_x86_64_xmm xmm[4];
+} La_x86_64_vector __attribute__ ((aligned(16)));
+
 typedef struct La_x86_64_regs
 {
   uint64_t lr_rdx;
@@ -80,6 +89,7 @@ typedef struct La_x86_64_regs
   uint64_t lr_rbp;
   uint64_t lr_rsp;
   La_x86_64_xmm lr_xmm[8];
+  La_x86_64_vector lr_vector[8];
 } La_x86_64_regs;
 
 /* Return values for calls from PLT on x86-64.  */
@@ -91,6 +101,8 @@ typedef struct La_x86_64_retval
   La_x86_64_xmm lrv_xmm1;
   long double lrv_st0;
   long double lrv_st1;
+  La_x86_64_vector lrv_vector0;
+  La_x86_64_vector lrv_vector1;
 } La_x86_64_retval;
 
 
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 33e6115..f605351 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -17,7 +17,9 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <config.h>
 #include <sysdep.h>
+#include <link-defines.h>
 
 	.text
 	.globl _dl_runtime_resolve
@@ -89,135 +91,64 @@ _dl_runtime_profile:
 
 	/* Actively align the La_x86_64_regs structure.  */
 	andq $0xfffffffffffffff0, %rsp
-	subq $192, %rsp		# sizeof(La_x86_64_regs)
+# ifdef HAVE_AVX_SUPPORT
+	/* sizeof(La_x86_64_regs).  Need extra space for 8 SSE registers
+	   to detect if any xmm0-xmm7 registers are changed by audit
+	   module.  */
+	subq $(LR_SIZE + XMM_SIZE*8), %rsp
+#else
+	subq $LR_SIZE, %rsp		# sizeof(La_x86_64_regs)
+#endif
 	movq %rsp, 24(%rbx)
 
-	movq %rdx,   (%rsp)	# Fill the La_x86_64_regs structure.
-	movq %r8,   8(%rsp)
-	movq %r9,  16(%rsp)
-	movq %rcx, 24(%rsp)
-	movq %rsi, 32(%rsp)
-	movq %rdi, 40(%rsp)
-	movq %rbp, 48(%rsp)
-	leaq 48(%rbx), %rax
-	movq %rax, 56(%rsp)
-	movaps %xmm0,  64(%rsp)
-	movaps %xmm1,  80(%rsp)
-	movaps %xmm2,  96(%rsp)
-	movaps %xmm3, 112(%rsp)
-	movaps %xmm4, 128(%rsp)
-	movaps %xmm5, 144(%rsp)
-	movaps %xmm6, 160(%rsp)
-	movaps %xmm7, 176(%rsp)
-
-	movq %rsp, %rcx		# La_x86_64_regs pointer to %rcx.
-	movq 48(%rbx), %rdx	# Load return address if needed.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	leaq 16(%rbx), %r8
-	call _dl_profile_fixup	# Call resolver.
-
-	movq %rax, %r11		# Save return value.
-
-	movq 8(%rbx), %rax	# Get back register content.
-	movq      (%rsp), %rdx
-	movq     8(%rsp), %r8
-	movq    16(%rsp), %r9
-	movaps  64(%rsp), %xmm0
-	movaps  80(%rsp), %xmm1
-	movaps  96(%rsp), %xmm2
-	movaps 112(%rsp), %xmm3
-	movaps 128(%rsp), %xmm4
-	movaps 144(%rsp), %xmm5
-	movaps 160(%rsp), %xmm6
-	movaps 176(%rsp), %xmm7
-
-	movq 16(%rbx), %r10	# Anything in framesize?
-	testq %r10, %r10
-	jns 1f
-
-	/* There's nothing in the frame size, so there
-	   will be no call to the _dl_call_pltexit. */
-
-	movq 24(%rsp), %rcx	# Get back registers content.
-	movq 32(%rsp), %rsi
-	movq 40(%rsp), %rdi
-
-	movq %rbx, %rsp
-	movq (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	jmp *%r11		# Jump to function address.
+	/* Fill the La_x86_64_regs structure.  */
+	movq %rdx, LR_RDX_OFFSET(%rsp)
+	movq %r8,  LR_R8_OFFSET(%rsp)
+	movq %r9,  LR_R9_OFFSET(%rsp)
+	movq %rcx, LR_RCX_OFFSET(%rsp)
+	movq %rsi, LR_RSI_OFFSET(%rsp)
+	movq %rdi, LR_RDI_OFFSET(%rsp)
+	movq %rbp, LR_RBP_OFFSET(%rsp)
 
-1:
-	cfi_adjust_cfa_offset(48)
-	cfi_rel_offset(%rbx, 0)
-	cfi_def_cfa_register(%rbx)
+# ifdef HAVE_AVX_SUPPORT
+	jmp *L(save_and_restore_vector)(%rip)
 
-	/* At this point we need to prepare new stack for the function
-	   which has to be called.  We copy the original stack to a
-	   temporary buffer of the size specified by the 'framesize'
-	   returned from _dl_profile_fixup */
-
-	leaq 56(%rbx), %rsi	# stack
-	addq $8, %r10
-	andq $0xfffffffffffffff0, %r10
-	movq %r10, %rcx
-	subq %r10, %rsp
-	movq %rsp, %rdi
-	shrq $3, %rcx
-	rep
-	movsq
-
-	movq 24(%rdi), %rcx	# Get back register content.
-	movq 32(%rdi), %rsi
-	movq 40(%rdi), %rdi
-
-	call *%r11
-
-	mov 24(%rbx), %rsp	# Drop the copied stack content
-
-	/* Now we have to prepare the La_x86_64_retval structure for the
-	   _dl_call_pltexit.  The La_x86_64_regs is being pointed by rsp now,
-	   so we just need to allocate the sizeof(La_x86_64_retval) space on
-	   the stack, since the alignment has already been taken care of. */
-
-	subq $80, %rsp		# sizeof(La_x86_64_retval)
-	movq %rsp, %rcx		# La_x86_64_retval argument to %rcx.
-
-	movq %rax, (%rcx)	# Fill in the La_x86_64_retval structure.
-	movq %rdx, 8(%rcx)
-	movaps %xmm0, 16(%rcx)
-	movaps %xmm1, 32(%rcx)
-	fstpt 48(%rcx)
-	fstpt 64(%rcx)
-
-	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-        movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	call _dl_call_pltexit
-
-	movq  (%rsp), %rax	# Restore return registers.
-	movq 8(%rsp), %rdx
-	movaps 16(%rsp), %xmm0
-	movaps 32(%rsp), %xmm1
-	fldt 64(%rsp)
-	fldt 48(%rsp)
-
-	movq %rbx, %rsp
-	movq  (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	retq
+	.align 16
+L(save_and_restore_vector_sse):
+# endif
+
+# define MOVXMM movaps
+# include "dl-trampoline.h"
+
+# ifdef HAVE_AVX_SUPPORT
+#  undef  MOVXMM
+#  define MOVXMM vmovdqa
+#  define RESTORE_AVX
+	.align 16
+L(save_and_restore_vector_avx):
+#  include "dl-trampoline.h"
+# endif
 
 	cfi_endproc
 	.size _dl_runtime_profile, .-_dl_runtime_profile
+
+# ifdef HAVE_AVX_SUPPORT
+L(check_avx):
+	mov	%rbx,%r11		# Save rbx
+	movl	$1, %eax
+	cpuid
+	mov	%r11,%rbx		# Restore rbx
+	leaq    L(save_and_restore_vector_sse)(%rip), %rax
+	andl	$(1 << 28), %ecx	# Check if AVX is available.
+	jz	L(ret)
+	leaq    L(save_and_restore_vector_avx)(%rip), %rax
+L(ret):
+	movq	%rax,L(save_and_restore_vector)(%rip)
+	jmp	*%rax
+
+	.section .data.rel.local,"aw",@progbits
+	.align	8
+L(save_and_restore_vector):
+	.quad L(check_avx)
+# endif
 #endif
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
new file mode 100644
index 0000000..d63b7d0
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -0,0 +1,291 @@
+/* Partial PLT profile trampoline to save and restore x86-64 vector
+   registers.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+	leaq 48(%rbx), %rax
+	movq %rax, LR_RSP_OFFSET(%rsp)
+
+	/* This is to provide backward binary compatility for existing
+	   audit modules.  */
+	MOVXMM %xmm0,		   (LR_XMM_OFFSET)(%rsp)
+	MOVXMM %xmm1, (LR_XMM_OFFSET +   XMM_SIZE)(%rsp)
+	MOVXMM %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
+	MOVXMM %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
+	MOVXMM %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
+	MOVXMM %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
+	MOVXMM %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
+	MOVXMM %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
+
+#ifdef RESTORE_AVX
+	/* This is to support AVX audit modules.  */
+	vmovdqu %ymm0,		      (LR_VECTOR_OFFSET)(%rsp)
+	vmovdqu %ymm1, (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
+	vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+	vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+	vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+	vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+	vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+	vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+
+	/* Save xmm0-xmm7 registers to detect if any of them are
+	   changed by audit module.  */
+	vmovdqa %xmm0,		    (LR_SIZE)(%rsp)
+	vmovdqa %xmm1, (LR_SIZE +   XMM_SIZE)(%rsp)
+	vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
+	vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
+	vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
+	vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
+	vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
+	vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
+#endif
+
+	movq %rsp, %rcx		# La_x86_64_regs pointer to %rcx.
+	movq 48(%rbx), %rdx	# Load return address if needed.
+	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
+	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
+	leaq 16(%rbx), %r8
+	call _dl_profile_fixup	# Call resolver.
+
+	movq %rax, %r11		# Save return value.
+
+	movq 8(%rbx), %rax	# Get back register content.
+	movq LR_RDX_OFFSET(%rsp), %rdx
+	movq  LR_R8_OFFSET(%rsp), %r8
+	movq  LR_R9_OFFSET(%rsp), %r9
+
+#ifdef RESTORE_AVX
+	/* Check if any xmm0-xmm7 registers are changed by audit
+	   module.  */
+	vmovdqa (LR_XMM_OFFSET)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm0
+	vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 1f
+
+	/* We restore AVX registers only if xmm0-xmm7 registers are
+	   unchanged.  */
+	vmovdqu			(LR_VECTOR_OFFSET)(%rsp), %ymm0
+	vmovdqu	  (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+	jmp 2f
+
+1:
+	vmovdqa		     (LR_XMM_OFFSET)(%rsp), %xmm0
+	vmovdqa   (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+	vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+
+2:
+#else
+	movaps		    (LR_XMM_OFFSET)(%rsp), %xmm0
+	movaps	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+	movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+	movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+	movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+	movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+	movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+	movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+#endif
+
+	movq 16(%rbx), %r10	# Anything in framesize?
+	testq %r10, %r10
+	jns 3f
+
+	/* There's nothing in the frame size, so there
+	   will be no call to the _dl_call_pltexit. */
+
+	/* Get back registers content.  */
+	movq LR_RCX_OFFSET(%rsp), %rcx
+	movq LR_RSI_OFFSET(%rsp), %rsi
+	movq LR_RDI_OFFSET(%rsp), %rdi
+
+	movq %rbx, %rsp
+	movq (%rsp), %rbx
+	cfi_restore(rbx)
+	cfi_def_cfa_register(%rsp)
+
+	addq $48, %rsp		# Adjust the stack to the return value
+				# (eats the reloc index and link_map)
+	cfi_adjust_cfa_offset(-48)
+	jmp *%r11		# Jump to function address.
+
+3:
+	cfi_adjust_cfa_offset(48)
+	cfi_rel_offset(%rbx, 0)
+	cfi_def_cfa_register(%rbx)
+
+	/* At this point we need to prepare new stack for the function
+	   which has to be called.  We copy the original stack to a
+	   temporary buffer of the size specified by the 'framesize'
+	   returned from _dl_profile_fixup */
+
+	leaq LR_RSP_OFFSET(%rbx), %rsi	# stack
+	addq $8, %r10
+	andq $0xfffffffffffffff0, %r10
+	movq %r10, %rcx
+	subq %r10, %rsp
+	movq %rsp, %rdi
+	shrq $3, %rcx
+	rep
+	movsq
+
+	movq 24(%rdi), %rcx	# Get back register content.
+	movq 32(%rdi), %rsi
+	movq 40(%rdi), %rdi
+
+	call *%r11
+
+	mov 24(%rbx), %rsp	# Drop the copied stack content
+
+	/* Now we have to prepare the La_x86_64_retval structure for the
+	   _dl_call_pltexit.  The La_x86_64_regs is being pointed by rsp now,
+	   so we just need to allocate the sizeof(La_x86_64_retval) space on
+	   the stack, since the alignment has already been taken care of. */
+#ifdef RESTORE_AVX
+	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
+	   registers to detect if xmm0/xmm1 registers are changed
+	   by audit module.  */
+	subq $(LRV_SIZE + XMM_SIZE*2), %rsp
+#else
+	subq $LRV_SIZE, %rsp	# sizeof(La_x86_64_retval)
+#endif
+	movq %rsp, %rcx		# La_x86_64_retval argument to %rcx.
+
+	/* Fill in the La_x86_64_retval structure.  */
+	movq %rax, LRV_RAX_OFFSET(%rcx)
+	movq %rdx, LRV_RDX_OFFSET(%rcx)
+
+	MOVXMM %xmm0, LRV_XMM0_OFFSET(%rcx)
+	MOVXMM %xmm1, LRV_XMM1_OFFSET(%rcx)
+
+#ifdef RESTORE_AVX
+	/* This is to support AVX audit modules.  */
+	vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
+	vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+
+	/* Save xmm0/xmm1 registers to detect if they are changed
+	   by audit module.  */
+	vmovdqa %xmm0,		  (LRV_SIZE)(%rcx)
+	vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
+#endif
+
+	fstpt LRV_ST0_OFFSET(%rcx)
+	fstpt LRV_ST1_OFFSET(%rcx)
+
+	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
+	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
+        movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
+	call _dl_call_pltexit
+
+	/* Restore return registers.  */
+	movq LRV_RAX_OFFSET(%rsp), %rax
+	movq LRV_RDX_OFFSET(%rsp), %rdx
+
+#ifdef RESTORE_AVX
+	/* Check if xmm0/xmm1 registers are changed by audit module.  */
+	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
+	vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 4f
+
+	/* We restore AVX registers only if xmm0/xmm1 registers are
+	   unchanged.  */
+	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm0
+	vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm0, %xmm1
+	vpmovmskb %xmm1, %esi
+	cmpl $0xffff, %esi
+	jne 4f
+
+	vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+	vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+	jmp 5f
+
+4:
+	vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
+	vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
+5:
+#else
+	movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+	movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+#endif
+
+	fldt LRV_ST0_OFFSET(%rsp)
+	fldt LRV_ST0_OFFSET(%rsp)
+
+	movq %rbx, %rsp
+	movq (%rsp), %rbx
+	cfi_restore(rbx)
+	cfi_def_cfa_register(%rsp)
+
+	addq $48, %rsp		# Adjust the stack to the return value
+				# (eats the reloc index and link_map)
+	cfi_adjust_cfa_offset(-48)
+	retq
diff --git a/sysdeps/x86_64/elf/configure b/sysdeps/x86_64/elf/configure
index 7746549..221e74c 100755
--- a/sysdeps/x86_64/elf/configure
+++ b/sysdeps/x86_64/elf/configure
@@ -79,3 +79,28 @@ cat >>confdefs.h <<\_ACEOF
 #define PI_STATIC_AND_HIDDEN 1
 _ACEOF
 
+
+{ $as_echo "$as_me:$LINENO: checking for AVX support" >&5
+$as_echo_n "checking for AVX support... " >&6; }
+if test "${libc_cv_cc_avx+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  libc_cv_cc_avx=yes
+else
+  libc_cv_cc_avx=no
+fi
+fi
+{ $as_echo "$as_me:$LINENO: result: $libc_cv_cc_avx" >&5
+$as_echo "$libc_cv_cc_avx" >&6; }
+if test $libc_cv_cc_avx = yes; then
+  cat >>confdefs.h <<\_ACEOF
+#define HAVE_AVX_SUPPORT 1
+_ACEOF
+
+fi
diff --git a/sysdeps/x86_64/elf/configure.in b/sysdeps/x86_64/elf/configure.in
index 9cb59d0..14d1875 100644
--- a/sysdeps/x86_64/elf/configure.in
+++ b/sysdeps/x86_64/elf/configure.in
@@ -32,3 +32,14 @@ fi
 dnl It is always possible to access static and hidden symbols in an
 dnl position independent way.
 AC_DEFINE(PI_STATIC_AND_HIDDEN)
+
+dnl Check if -mavx works.
+AC_CACHE_CHECK(for AVX support, libc_cv_cc_avx, [dnl
+if AC_TRY_COMMAND([${CC-cc} -mavx -xc /dev/null -S -o /dev/null]); then
+  libc_cv_cc_avx=yes
+else
+  libc_cv_cc_avx=no
+fi])
+if test $libc_cv_cc_avx = yes; then
+  AC_DEFINE(HAVE_AVX_SUPPORT)
+fi
diff --git a/sysdeps/x86_64/link-defines.sym b/sysdeps/x86_64/link-defines.sym
new file mode 100644
index 0000000..1694d88
--- /dev/null
+++ b/sysdeps/x86_64/link-defines.sym
@@ -0,0 +1,28 @@
+#include "link.h"
+#include <stddef.h>
+
+--
+VECTOR_SIZE		sizeof (La_x86_64_vector)
+XMM_SIZE		sizeof (La_x86_64_xmm)
+
+LR_SIZE			sizeof (struct La_x86_64_regs)
+LR_RDX_OFFSET		offsetof (struct La_x86_64_regs, lr_rdx)
+LR_R8_OFFSET		offsetof (struct La_x86_64_regs, lr_r8)
+LR_R9_OFFSET		offsetof (struct La_x86_64_regs, lr_r9)
+LR_RCX_OFFSET		offsetof (struct La_x86_64_regs, lr_rcx)
+LR_RSI_OFFSET		offsetof (struct La_x86_64_regs, lr_rsi)
+LR_RDI_OFFSET		offsetof (struct La_x86_64_regs, lr_rdi)
+LR_RBP_OFFSET		offsetof (struct La_x86_64_regs, lr_rbp)
+LR_RSP_OFFSET		offsetof (struct La_x86_64_regs, lr_rsp)
+LR_XMM_OFFSET		offsetof (struct La_x86_64_regs, lr_xmm)
+LR_VECTOR_OFFSET	offsetof (struct La_x86_64_regs, lr_vector)
+
+LRV_SIZE		sizeof (struct La_x86_64_retval)
+LRV_RAX_OFFSET		offsetof (struct La_x86_64_retval, lrv_rax)
+LRV_RDX_OFFSET		offsetof (struct La_x86_64_retval, lrv_rdx)
+LRV_XMM0_OFFSET		offsetof (struct La_x86_64_retval, lrv_xmm0)
+LRV_XMM1_OFFSET		offsetof (struct La_x86_64_retval, lrv_xmm1)
+LRV_ST0_OFFSET		offsetof (struct La_x86_64_retval, lrv_st0)
+LRV_ST1_OFFSET		offsetof (struct La_x86_64_retval, lrv_st1)
+LRV_VECTOR0_OFFSET	offsetof (struct La_x86_64_retval, lrv_vector0)
+LRV_VECTOR1_OFFSET	offsetof (struct La_x86_64_retval, lrv_vector1)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                       |   40 ++++++
 config.h.in                     |    3 +
 config.make.in                  |    1 +
 configure                       |    2 +
 configure.in                    |    1 +
 elf/Makefile                    |   26 +++-
 elf/tst-audit4.c                |   35 +++++
 elf/tst-audit5.c                |   21 +++
 elf/tst-auditmod4a.c            |   48 +++++++
 elf/tst-auditmod4b.c            |  206 ++++++++++++++++++++++++++++
 elf/tst-auditmod5a.c            |   46 +++++++
 elf/tst-auditmod5b.c            |  178 +++++++++++++++++++++++++
 sysdeps/x86_64/Makefile         |    1 +
 sysdeps/x86_64/bits/link.h      |   12 ++
 sysdeps/x86_64/dl-trampoline.S  |  280 ++++++++++++++++++++++++++++++++-------
 sysdeps/x86_64/elf/configure    |   25 ++++
 sysdeps/x86_64/elf/configure.in |   11 ++
 sysdeps/x86_64/link-defines.sym |   28 ++++
 18 files changed, 910 insertions(+), 54 deletions(-)
 create mode 100644 elf/tst-audit4.c
 create mode 100644 elf/tst-audit5.c
 create mode 100644 elf/tst-auditmod4a.c
 create mode 100644 elf/tst-auditmod4b.c
 create mode 100644 elf/tst-auditmod5a.c
 create mode 100644 elf/tst-auditmod5b.c
 create mode 100644 sysdeps/x86_64/link-defines.sym


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]