[PATCH] unstrip: Call adjust_relocs no more than once per section.

Mark Wielaard mark@klomp.org
Tue Feb 6 15:36:11 GMT 2024


Hi Aaron,

On Mon, 2024-02-05 at 18:11 -0500, Aaron Merey wrote:
> During symtab merging, adjust_relocs might be called multiple times on
> some SHT_REL/SHT_RELA sections.  In these cases it is possible for a
> relocation's symbol index to be correctly mapped from X to Y during the
> first call to adjust_relocs but then wrongly remapped from Y to Z during
> the second call.
> 
> Fix this by adjusting relocation symbol indices just once per section.
> 
> Also add stable sorting for symbols during symtab merging so that the
> symbol order in the output file's symtab does not depend on undefined
> behaviour in qsort.
> 
> Note that adjust_relocs still might be called a second time on a section
> during add_new_section_symbols.  However since add_new_section_symbols
> generates its own distinct symbol index map, this should not trigger the
> bug described above.
> 
> https://sourceware.org/bugzilla/show_bug.cgi?id=31097
> 
> Signed-off-by: Aaron Merey <amerey@redhat.com>
> ---
> 
> I added some tests to run-unstrip-test.sh to verify that strip+unstrip
> does not alter the symbols referred to by relocations.  I tried to use
> eu-elfcmp for this purpose.  However for a pair of i386 ET_REL binaries
> I did some testing with, eu-elfcmp skips checking the SHT_REL/SHT_RELA
> sections because the sh_flags do not contain SHF_ALLOC (the flags only
> contain SHF_INFO_LINK in this case).
> 
> I'm not sure if this eu-elfcmp behaviour is intentional or if eu-elfcmp
> should start comparing REL/RELA sections even if they aren't allocated.

So it looks like elfcmp explicitly checks ebl_section_strip_p and
doesn't compare sections that are strippable. Maybe we should add an
eu-elfcmp --all-sections flag?

We should probably also check that it handles the new SHT_RELR sections
correctly. I see it only checks for SHT_REL and SHT_RELA in the code.
Although RELR is really simple and doesn't have symbol references. So
it is probably fine.

>  src/unstrip.c                |  81 ++++++++++++++++----
>  tests/.gitignore             |   1 +
>  tests/Makefile.am            |   3 +-
>  tests/elf-print-reloc-syms.c | 144 +++++++++++++++++++++++++++++++++++
>  tests/run-unstrip-test.sh    |   8 ++
>  5 files changed, 221 insertions(+), 16 deletions(-)
>  create mode 100644 tests/elf-print-reloc-syms.c
> 
> diff --git a/src/unstrip.c b/src/unstrip.c
> index d5bd1821..f37d6c58 100644
> --- a/src/unstrip.c
> +++ b/src/unstrip.c
> @@ -598,21 +598,30 @@ adjust_relocs (Elf_Scn *outscn, Elf_Scn *inscn, const GElf_Shdr *shdr,
>  /* Adjust all the relocation sections in the file.  */
>  static void
>  adjust_all_relocs (Elf *elf, Elf_Scn *symtab, const GElf_Shdr *symshdr,
> -		   size_t map[], size_t map_size)
> +		   size_t map[], size_t map_size, bool *scn_filter)
>  {

Maybe bool scn_filter[] since it really is an array?

>    size_t new_sh_link = elf_ndxscn (symtab);
>    Elf_Scn *scn = NULL;
>    while ((scn = elf_nextscn (elf, scn)) != NULL)
>      if (scn != symtab)
>        {
> +	if (scn_filter != NULL)
> +	  {
> +	    size_t ndx = elf_ndxscn (scn);
> +
> +	    /* Skip relocations that were already mapped during adjust_relocs
> +	       for the stripped symtab.  This is to avoid mapping a relocation's
> +	       symbol index from X to Y during the first adjust_relocs and then
> +	       wrongly remapping it from Y to Z during the second call.  */
> +	    if (scn_filter[ndx])
> +	      continue;
> +	  }
> +
>  	GElf_Shdr shdr_mem;
>  	GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
>  	ELF_CHECK (shdr != NULL, _("cannot get section header: %s"));
> -	/* Don't redo SHT_GROUP, groups are in both the stripped and debug,
> -	   it will already have been done by adjust_relocs for the
> -	   stripped_symtab.  */
> -	if (shdr->sh_type != SHT_NOBITS && shdr->sh_type != SHT_GROUP
> -	    && shdr->sh_link == new_sh_link)
> +
> +	if (shdr->sh_type != SHT_NOBITS && shdr->sh_link == new_sh_link)
>  	  adjust_relocs (scn, scn, shdr, map, map_size, symshdr);
>        }
>  }

OK.

> @@ -697,7 +706,7 @@ add_new_section_symbols (Elf_Scn *old_symscn, size_t old_shnum,
>      }
>  
>    /* Adjust any relocations referring to the old symbol table.  */
> -  adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1);
> +  adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1, NULL);
>  
>    return symdata;
>  }

OK.

> @@ -874,6 +883,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
>        s->shndx = shndx;
>        s->info.info = sym->st_info;
>        s->info.other = sym->st_other;
> +      s->duplicate = NULL;
>  
>        if (scnmap != NULL && shndx != SHN_UNDEF && shndx < SHN_LORESERVE)
>  	s->shndx = scnmap[shndx - 1];
> @@ -903,8 +913,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
>    if (s1->value > s2->value)						      \
>      return 1
>  
> -/* Compare symbols with a consistent ordering,
> -   but one only meaningful for equality.  */
> +/* Symbol comparison used to sort symbols in preparation for deduplication.  */
>  static int
>  compare_symbols (const void *a, const void *b)
>  {
> @@ -915,6 +924,38 @@ compare_symbols (const void *a, const void *b)
>    CMP (size);
>    CMP (shndx);
>  
> +  int res = s1->compare - s2->compare;
> +  if (res != 0)
> +    return res;
> +
> +  res = strcmp (s1->name, s2->name);
> +  if (res != 0)
> +    return res;
> +
> +  /* Duplicates still have distinct positions in the symbol index map.
> +     Compare map positions to ensure that duplicate symbols are ordered
> +     consistently even if the sort function is unstable.  */
> +  CMP (map);
> +  error_exit (0, _("found two identical index map positions."));
> +}
>
> +/* Symbol comparison used to deduplicate symbols found in both the stripped
> +   and unstripped input files.
> +
> +   Similar to compare_symbols, but does not differentiate symbols based
> +   on their position in the symbol index map.  Duplicates can't be found
> +   by comparing index map postions because they always have distinct
> +   positions in the map.  */
> +static int
> +compare_symbols_duplicate (const void *a, const void *b)
> +{
> +  const struct symbol *s1 = a;
> +  const struct symbol *s2 = b;
> +
> +  CMP (value);
> +  CMP (size);
> +  CMP (shndx);
> +
>    return (s1->compare - s2->compare) ?: strcmp (s1->name, s2->name);
>  }
> 

Right, this new compare_symbols_duplicate is what compare_symbols did
before, which is not a stable sort.
 
> @@ -946,13 +987,13 @@ compare_symbols_output (const void *a, const void *b)
>  	  /* binutils always puts section symbols in section index order.  */
>  	  CMP (shndx);
>  	  else if (s1 != s2)
> -	    error_exit (0, "section symbols in unexpected order");
> +	    error_exit (0, _("section symbols in unexpected order"));
>  	}
> 

Yes, this is a translatable user visible string.

>  
>        /* Nothing really matters, so preserve the original order.  */
>        CMP (map);
>        else if (s1 != s2)
> -	error_exit (0, "found two identical symbols");
> +	error_exit (0, _("found two identical symbols"));
>      }
>  
>    return cmp;

Likewise.

> @@ -1855,7 +1896,8 @@ more sections in stripped file than debug file -- arguments reversed?"));
>  	    }
>  
>  	  struct symbol *n = s;
> -	  while (n + 1 < &symbols[total_syms] && !compare_symbols (s, n + 1))
> +	  while (n + 1 < &symbols[total_syms]
> +		 && !compare_symbols_duplicate (s, n + 1))
>  	    ++n;

Right, because compare_symbols_duplicate does what compare_symbols use
to do.

>  
>  	  while (s < n)
> @@ -1992,6 +2034,11 @@ more sections in stripped file than debug file -- arguments reversed?"));
>        elf_flagdata (symdata, ELF_C_SET, ELF_F_DIRTY);
>        update_shdr (unstripped_symtab, shdr);
>  
> +      /* Track which sections are adjusted during the first round
> +	 of calls to adjust_relocs.  */
> +      bool scn_adjusted[unstripped_shnum];
> +      memset (scn_adjusted, 0, sizeof scn_adjusted);
> +
>        if (stripped_symtab != NULL)
>  	{
>  	  /* Adjust any relocations referring to the old symbol table.  */
> @@ -2000,14 +2047,18 @@ more sections in stripped file than debug file -- arguments reversed?"));
>  	       sec < &sections[stripped_shnum - 1];
>  	       ++sec)
>  	    if (sec->outscn != NULL && sec->shdr.sh_link == old_sh_link)
> -	      adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
> -			     symndx_map, total_syms, shdr);
> +	      {
> +		adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
> +			       symndx_map, total_syms, shdr);
> +		scn_adjusted[elf_ndxscn (sec->outscn)] = true;
> +	      }
>  	}
> 

OK.

>        /* Also adjust references to the other old symbol table.  */
>        adjust_all_relocs (unstripped, unstripped_symtab, shdr,
>  			 &symndx_map[stripped_nsym - 1],
> -			 total_syms - (stripped_nsym - 1));
> +			 total_syms - (stripped_nsym - 1),
> +			 scn_adjusted);
>  
>        free (symbols);
>        free (symndx_map);

OK.

> diff --git a/tests/.gitignore b/tests/.gitignore
> index 5bebb2c4..d00a883e 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -63,6 +63,7 @@
>  /elfshphehdr
>  /elfstrmerge
>  /elfstrtab
> +/elf-print-reloc-syms
>  /emptyfile
>  /fillfile
>  /find-prologues

OK.

> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 2373c980..13bd9d56 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -62,7 +62,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
>  		  dwelf_elf_e_machine_string \
>  		  getphdrnum leb128 read_unaligned \
>  		  msg_tst system-elf-libelf-test system-elf-gelf-test \
> -		  nvidia_extended_linemap_libdw \
> +		  nvidia_extended_linemap_libdw elf-print-reloc-syms \
>  		  $(asm_TESTS)
> 

OK.

>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> @@ -810,6 +810,7 @@ getphdrnum_LDADD = $(libelf) $(libdw)
>  leb128_LDADD = $(libelf) $(libdw)
>  read_unaligned_LDADD = $(libelf) $(libdw)
>  nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw)
> +elf_print_reloc_syms_LDADD = $(libelf)
>  
>  # We want to test the libelf headers against the system elf.h header.
>  # Don't include any -I CPPFLAGS. Except when we install our own elf.h.

OK.

> diff --git a/tests/elf-print-reloc-syms.c b/tests/elf-print-reloc-syms.c
> new file mode 100644
> index 00000000..d6b7867d
> --- /dev/null
> +++ b/tests/elf-print-reloc-syms.c
> @@ -0,0 +1,144 @@
> +/* Copyright (C) 2024 Red Hat, Inc.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <libelf.h>
> +#include <gelf.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <assert.h>
> +
> +static void
> +print_reloc_symnames (Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, size_t sh_entsize)
> +{
> +  int nentries = shdr->sh_size / sh_entsize;
> +
> +  /* Get the data of the section.  */
> +  Elf_Data *data = elf_getdata (scn, NULL);
> +  assert (data != NULL);
> +
> +  /* Get the symbol table information.  */
> +  Elf_Scn *symscn = elf_getscn (elf, shdr->sh_link);
> +  GElf_Shdr symshdr_mem;
> +  GElf_Shdr *symshdr = gelf_getshdr (symscn, &symshdr_mem);
> +  Elf_Data *symdata = elf_getdata (symscn, NULL);
> +  assert (symshdr != NULL);
> +  assert (symdata != NULL);
> +
> +  /* Search for the optional extended section index table.  */
> +  Elf_Data *xndxdata = NULL;
> +  int xndxscnidx = elf_scnshndx (scn);
> +  if (xndxscnidx)
> +    xndxdata = elf_getdata (elf_getscn (elf, xndxscnidx), NULL);
> +
> +  /* Get the section header string table index.  */
> +  size_t shstrndx;
> +  assert (elf_getshdrstrndx (elf, &shstrndx) >= 0);
> +
> +  printf("Section: %s\n", elf_strptr (elf, shstrndx, shdr->sh_name));
> +  for (int cnt = 0; cnt < nentries; ++cnt)
> +    {
> +      GElf_Rel relmem;
> +      GElf_Rel *rel = gelf_getrel (data, cnt, &relmem);
> +
> +
> +      if (likely (rel != NULL))
> +        {
> +          GElf_Sym symmem;
> +          Elf32_Word xndx;
> +          GElf_Sym *sym = gelf_getsymshndx (symdata, xndxdata,
> +                                            GELF_R_SYM (rel->r_info),
> +                                            &symmem, &xndx);
> +
> +	  if (sym == NULL)
> +	    {
> +	      printf ("<SYM NOT FOUND>\n");
> +	      continue;
> +	    }
> +
> +          if (GELF_ST_TYPE (sym->st_info) != STT_SECTION)
> +            printf ("%s\n", elf_strptr (elf, symshdr->sh_link, sym->st_name));
> +          else
> +            {
> +              /* This is a relocation against a STT_SECTION symbol.  */
> +              GElf_Shdr secshdr_mem;
> +              GElf_Shdr *secshdr;
> +              secshdr = gelf_getshdr (elf_getscn (elf,
> +                                                  sym->st_shndx == SHN_XINDEX
> +                                                  ? xndx : sym->st_shndx),
> +                                      &secshdr_mem);
> +
> +	      if (secshdr == NULL)
> +		printf("<SECTION NOT FOUND>\n");
> +              else
> +                printf ("%s\n",
> +			elf_strptr (elf, shstrndx, secshdr->sh_name));
> +            }
> +        }
> +    }
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  if (argc != 2)
> +    {
> +      printf ("Usage: elf_print_reloc_syms FILE\n");
> +      return -1;
> +    }
> +
> +  elf_version (EV_CURRENT);
> +
> +  int fd = open(argv[1], O_RDONLY);
> +  assert (fd != -1);
> +
> +  Elf *elf = elf_begin (fd, ELF_C_READ, NULL);
> +  assert (elf != NULL);
> +
> +  size_t shnums;
> +  assert (elf_getshdrnum (elf, &shnums) >= 0);
> +
> +  Elf_Scn *scn = NULL;
> +  while ((scn = elf_nextscn (elf, scn)) != NULL)
> +    {
> +      GElf_Shdr shdr_mem;
> +      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
> +
> +      if (shdr != NULL)
> +	{
> +	  /* Print the names of symbols referred to by relocations.  */
> +	  if (shdr->sh_type == SHT_REL)
> +	    {
> +	      size_t sh_entsize = gelf_fsize (elf, ELF_T_REL, 1, EV_CURRENT);
> +	      print_reloc_symnames (elf, scn, shdr, sh_entsize);
> +	    }
> +	  else if (shdr->sh_type == SHT_RELA)
> +	    {
> +	      size_t sh_entsize = gelf_fsize (elf, ELF_T_RELA, 1, EV_CURRENT);
> +	      print_reloc_symnames (elf, scn, shdr, sh_entsize);
> +	    }
> +	}
> +    }
> +}

It is just a testcase, but cleaning up would be nice.

  elf_end(elf);
  close (fd);

(See also below for runtest support)

> diff --git a/tests/run-unstrip-test.sh b/tests/run-unstrip-test.sh
> index dc7d3a42..03373b3f 100755
> --- a/tests/run-unstrip-test.sh
> +++ b/tests/run-unstrip-test.sh
> @@ -33,6 +33,14 @@ testrun ${abs_top_builddir}/src/unstrip -o testfile.unstrip $stripped $debugfile
>  
>  testrun ${abs_top_builddir}/src/elfcmp --hash-inexact $original testfile.unstrip
>  
> +tempfiles syms-orig syms-testfile
> +
> +# Check whether relocated symbols changed.
> +${abs_top_builddir}/tests/elf-print-reloc-syms $original > syms-orig
> +${abs_top_builddir}/tests/elf-print-reloc-syms testfile.unstrip > syms-testfile
> 

These really should run under "testrun" so they will use the just build
libelf (and run under valgrind with --enable-valgrind, which would have
caught that the testcase doesn't clean up all resources it allocates).

> +testrun diff syms-orig syms-testfile
> +
>  # Also test modifying the file in place.
>  
>  rm -f testfile.inplace

Looks good in general. Just clean up in the new testcase and run it
with testrun.

Thanks,

Mark


More information about the Elfutils-devel mailing list