[PATCH v3 4/4] libdw: Handle overflowed DW_SECT_INFO offsets in DWARF package file indexes
Mark Wielaard
mark@klomp.org
Fri Mar 1 14:59:22 GMT 2024
Hi Omar,
On Mon, 2024-02-26 at 11:32 -0800, Omar Sandoval wrote:
> Meta uses DWARF package files for our large, statically-linked C++
> applications. Some of our largest applications have more than 4GB in
> .debug_info.dwo, but the section offsets in .debug_cu_index and
> .debug_tu_index are 32 bits; see the discussion here [1]. We
> implemented a workaround/extension for this in LLVM. Implement the
> equivalent in libdw.
>
> To test this, we need files with more than 4GB in .debug_info.dwo. I
> created these artificially by editing GCC's assembly output. They
> compress down to 6KB. I test them from run-large-elf-file.sh to take
> advantage of the existing checks for large file support.
>
> 1: https://discourse.llvm.org/t/dwarf-dwp-4gb-limit/63902.
>
> * libdw/dwarf_end.c (dwarf_package_index_free): New function.
> * tests/testfile-dwp-4-cu-index-overflow.bz2: New test file.
> * tests/testfile-dwp-4-cu-index-overflow.dwp.bz2: New test file.
> * tests/testfile-dwp-5-cu-index-overflow.bz2: New test file.
> * tests/testfile-dwp-5-cu-index-overflow.dwp.bz2: New test file.
> * tests/testfile-dwp-cu-index-overflow.source: New file.
> * tests/run-large-elf-file.sh: Check
> testfile-dwp-5-cu-index-overflow and
> testfile-dwp-4-cu-index-overflow.
The hack is kind of horrible, but given that this doesn't really
impacts "normal" dwp files and it does work with clang/lldb, lets just
support it too.
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
> libdw/dwarf_cu_dwp_section_info.c | 147 ++++++++++++++-
> libdw/dwarf_end.c | 15 +-
> libdw/libdwP.h | 3 +
> tests/Makefile.am | 7 +-
> tests/run-large-elf-file.sh | 174 ++++++++++++++++++
> tests/testfile-dwp-4-cu-index-overflow.bz2 | Bin 0 -> 4490 bytes
> .../testfile-dwp-4-cu-index-overflow.dwp.bz2 | Bin 0 -> 5584 bytes
> tests/testfile-dwp-5-cu-index-overflow.bz2 | Bin 0 -> 4544 bytes
> .../testfile-dwp-5-cu-index-overflow.dwp.bz2 | Bin 0 -> 5790 bytes
> tests/testfile-dwp-cu-index-overflow.source | 86 +++++++++
> 10 files changed, 426 insertions(+), 6 deletions(-)
> create mode 100755 tests/testfile-dwp-4-cu-index-overflow.bz2
> create mode 100644 tests/testfile-dwp-4-cu-index-overflow.dwp.bz2
> create mode 100755 tests/testfile-dwp-5-cu-index-overflow.bz2
> create mode 100644 tests/testfile-dwp-5-cu-index-overflow.dwp.bz2
> create mode 100644 tests/testfile-dwp-cu-index-overflow.source
>
> diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c
> index 298f36f9..3d11c87a 100644
> --- a/libdw/dwarf_cu_dwp_section_info.c
> +++ b/libdw/dwarf_cu_dwp_section_info.c
> @@ -30,6 +30,8 @@
> # include <config.h>
> #endif
>
> +#include <assert.h>
> +
> #include "libdwP.h"
>
> static Dwarf_Package_Index *
> @@ -110,7 +112,9 @@ __libdw_read_package_index (Dwarf *dbg, bool tu)
>
> index->dbg = dbg;
> /* Set absent sections to UINT32_MAX. */
> - memset (index->sections, 0xff, sizeof (index->sections));
> + for (size_t i = 0;
> + i < sizeof (index->sections) / sizeof (index->sections[0]); i++)
> + index->sections[i] = UINT32_MAX;
> for (size_t i = 0; i < section_count; i++)
> {
> uint32_t section = read_4ubyte_unaligned (dbg, sections + i * 4);
> @@ -161,6 +165,7 @@ __libdw_read_package_index (Dwarf *dbg, bool tu)
> index->indices = indices;
> index->section_offsets = section_offsets;
> index->section_sizes = section_sizes;
> + index->debug_info_offsets = NULL;
>
> return index;
> }
> @@ -177,6 +182,137 @@ __libdw_package_index (Dwarf *dbg, bool tu)
> if (index == NULL)
> return NULL;
>
> + /* Offsets in the section offset table are 32-bit unsigned integers. In
> + practice, the .debug_info.dwo section for very large executables can be
> + larger than 4GB. GNU dwp as of binutils 2.41 and llvm-dwp before LLVM 15
> + both accidentally truncate offsets larger than 4GB.
> +
> + LLVM 15 detects the overflow and errors out instead; see LLVM commit
> + f8df8114715b ("[DWP][DWARF] Detect and error on debug info offset
> + overflow"). However, lldb in LLVM 16 supports using dwp files with
> + truncated offsets by recovering them directly from the unit headers in the
> + .debug_info.dwo section; see LLVM commit c0db06227721 ("[DWARFLibrary] Add
> + support to re-construct cu-index"). Since LLVM 17, the overflow error can
> + be turned into a warning instead; see LLVM commit 53a483cee801 ("[DWP] add
> + overflow check for llvm-dwp tools if offset overflow").
> +
> + LLVM's support for > 4GB offsets is effectively an extension to the DWARF
> + package file format, which we implement here. The strategy is to walk the
> + unit headers in .debug_info.dwo in lockstep with the DW_SECT_INFO columns
> + in the section offset tables. As long as they are in the same order
> + (which they are in practice for both GNU dwp and llvm-dwp), we can
> + correlate the truncated offset and produce a corrected array of offsets.
> +
> + Note that this will be fixed properly in DWARF 6:
> + https://dwarfstd.org/issues/220708.2.html. */
> + if (index->sections[DW_SECT_INFO - 1] != UINT32_MAX
> + && dbg->sectiondata[IDX_debug_info]->d_size > UINT32_MAX)
> + {
> + Dwarf_Package_Index *cu_index, *tu_index = NULL;
> + if (tu)
> + {
> + tu_index = index;
> + assert (dbg->cu_index == NULL);
> + cu_index = __libdw_read_package_index (dbg, false);
> + if (cu_index == NULL)
> + {
> + free(index);
> + return NULL;
> + }
> + }
> + else
> + {
> + cu_index = index;
> + if (dbg->sectiondata[IDX_debug_tu_index] != NULL
> + && dbg->sectiondata[IDX_debug_types] == NULL)
> + {
> + assert (dbg->tu_index == NULL);
> + tu_index = __libdw_read_package_index (dbg, true);
> + if (tu_index == NULL)
> + {
> + free(index);
> + return NULL;
> + }
> + }
> + }
> +
> + cu_index->debug_info_offsets = malloc (cu_index->unit_count
> + * sizeof (Dwarf_Off));
> + if (cu_index->debug_info_offsets == NULL)
> + {
> + free (tu_index);
> + free (cu_index);
> + __libdw_seterrno (DWARF_E_NOMEM);
> + return NULL;
> + }
> + if (tu_index != NULL)
> + {
> + tu_index->debug_info_offsets = malloc (tu_index->unit_count
> + * sizeof (Dwarf_Off));
> + if (tu_index->debug_info_offsets == NULL)
> + {
> + free (tu_index);
> + free (cu_index->debug_info_offsets);
> + free (cu_index);
> + __libdw_seterrno (DWARF_E_NOMEM);
> + return NULL;
> + }
> + }
> +
> + Dwarf_Off off = 0;
> + uint32_t cui = 0, tui = 0;
> + uint32_t cu_count = cu_index->unit_count;
> + const unsigned char *cu_offset
> + = cu_index->section_offsets + cu_index->sections[DW_SECT_INFO - 1] * 4;
> + uint32_t tu_count = 0;
> + const unsigned char *tu_offset;
> + if (tu_index != NULL)
> + {
> + tu_count = tu_index->unit_count;
> + tu_offset = tu_index->section_offsets
> + + tu_index->sections[DW_SECT_INFO - 1] * 4;
> + }
> + while (cui < cu_count || tui < tu_count)
> + {
> + Dwarf_Off next_off;
> + uint8_t unit_type;
> + if (__libdw_next_unit (dbg, false, off, &next_off, NULL, NULL,
> + &unit_type, NULL, NULL, NULL, NULL, NULL)
> + != 0)
> + {
> + not_sorted:
> + free (cu_index->debug_info_offsets);
> + cu_index->debug_info_offsets = NULL;
> + if (tu_index != NULL)
> + {
> + free (tu_index->debug_info_offsets);
> + tu_index->debug_info_offsets = NULL;
> + }
> + break;
> + }
> + if (unit_type != DW_UT_split_type && cui < cu_count)
> + {
> + if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, cu_offset))
> + goto not_sorted;
> + cu_index->debug_info_offsets[cui++] = off;
> + cu_offset += cu_index->section_count * 4;
> + }
> + else if (unit_type == DW_UT_split_type && tui < tu_count)
> + {
> + if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset))
> + goto not_sorted;
> + tu_index->debug_info_offsets[tui++] = off;
> + tu_offset += tu_index->section_count * 4;
> + }
> + off = next_off;
> + }
> +
> + if (tu)
> + dbg->cu_index = cu_index;
> + else if (tu_index != NULL)
> + dbg->tu_index = tu_index;
> + }
> +
> if (tu)
> dbg->tu_index = index;
> else
This looks correct, but gcc noticed a path to use tu_offset (and
tu_index) if they weren't initialized or NULL:
In file included from /home/mark/src/elfutils/libdw/libdwP.h:684,
from /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:35:
In function ‘read_4ubyte_unaligned_1’,
inlined from ‘__libdw_package_index’ at /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:302:34:
/home/mark/src/elfutils/libdw/memory-access.h:291:12: error: ‘tu_offset’ may be used uninitialized [-Werror=maybe-uninitialized]
291 | return up->u4;
| ~~^~~~
/home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c: In function ‘__libdw_package_index’:
/home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:268:28: note: ‘tu_offset’ was declared here
268 | const unsigned char *tu_offset;
| ^~~~~~~~~
cc1: all warnings being treated as errors
I couldn't immediately disprove gcc here, so I think it is a good idea
to add an explicit check for tu_index != NULL.
diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c
index 3d11c87a..9fdc15bf 100644
--- a/libdw/dwarf_cu_dwp_section_info.c
+++ b/libdw/dwarf_cu_dwp_section_info.c
@@ -297,7 +297,8 @@ __libdw_package_index (Dwarf *dbg, bool tu)
cu_index->debug_info_offsets[cui++] = off;
cu_offset += cu_index->section_count * 4;
}
- else if (unit_type == DW_UT_split_type && tui < tu_count)
+ else if (unit_type == DW_UT_split_type && tu_index != NULL
+ && tui < tu_count)
{
if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset))
goto not_sorted;
Which makes gcc happy again.
> @@ -244,8 +380,13 @@ __libdw_dwp_section_info (Dwarf_Package_Index *index, uint32_t unit_row,
> size_t i = (size_t)(unit_row - 1) * index->section_count
> + index->sections[section - 1];
> if (offsetp != NULL)
> - *offsetp = read_4ubyte_unaligned (index->dbg,
> - index->section_offsets + i * 4);
> + {
> + if (section == DW_SECT_INFO && index->debug_info_offsets != NULL)
> + *offsetp = index->debug_info_offsets[unit_row - 1];
> + else
> + *offsetp = read_4ubyte_unaligned (index->dbg,
> + index->section_offsets + i * 4);
> + }
> if (sizep != NULL)
> *sizep = read_4ubyte_unaligned (index->dbg,
> index->section_sizes + i * 4);
OK.
> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index 78224ddb..ed8d27be 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -40,6 +40,17 @@
> #include "cfi.h"
>
>
> +static void
> +dwarf_package_index_free (Dwarf_Package_Index *index)
> +{
> + if (index != NULL)
> + {
> + free (index->debug_info_offsets);
> + free (index);
> + }
> +}
> +
> +
> static void
> noop_free (void *arg __attribute__ ((unused)))
> {
> @@ -79,8 +90,8 @@ dwarf_end (Dwarf *dwarf)
> {
> if (dwarf != NULL)
> {
> - free (dwarf->tu_index);
> - free (dwarf->cu_index);
> + dwarf_package_index_free (dwarf->tu_index);
> + dwarf_package_index_free (dwarf->cu_index);
>
> if (dwarf->cfi != NULL)
> /* Clean up the CFI cache. */
OK.
> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index 1a0a4df3..6018399c 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -371,6 +371,9 @@ typedef struct Dwarf_Package_Index_s
> const unsigned char *indices;
> const unsigned char *section_offsets;
> const unsigned char *section_sizes;
> + /* If DW_SECT_INFO section offsets were truncated to 32 bits, recovered
> + 64-bit offsets. */
> + Dwarf_Off *debug_info_offsets;
> } Dwarf_Package_Index;
>
> /* CU representation. */
OK.
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 3f80c451..98131a6b 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -641,7 +641,12 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
> testfile-dwp-4.bz2 testfile-dwp-4.dwp.bz2 \
> testfile-dwp-4-strict.bz2 testfile-dwp-4-strict.dwp.bz2 \
> testfile-dwp-5.bz2 testfile-dwp-5.dwp.bz2 testfile-dwp.source \
> - run-cu-dwp-section-info.sh run-declfiles.sh
> + run-cu-dwp-section-info.sh run-declfiles.sh \
> + testfile-dwp-5-cu-index-overflow \
> + testfile-dwp-5-cu-index-overflow.dwp \
> + testfile-dwp-4-cu-index-overflow \
> + testfile-dwp-4-cu-index-overflow.dwp \
> + testfile-dwp-cu-index-overflow.source
>
>
> if USE_VALGRIND
You mean the .bz2 files here. Fixed.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 98131a6b..9141074f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -642,10 +642,10 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
testfile-dwp-4-strict.bz2 testfile-dwp-4-strict.dwp.bz2 \
testfile-dwp-5.bz2 testfile-dwp-5.dwp.bz2 testfile-dwp.source \
run-cu-dwp-section-info.sh run-declfiles.sh \
- testfile-dwp-5-cu-index-overflow \
- testfile-dwp-5-cu-index-overflow.dwp \
- testfile-dwp-4-cu-index-overflow \
- testfile-dwp-4-cu-index-overflow.dwp \
+ testfile-dwp-5-cu-index-overflow.bz2 \
+ testfile-dwp-5-cu-index-overflow.dwp.bz2 \
+ testfile-dwp-4-cu-index-overflow.bz2 \
+ testfile-dwp-4-cu-index-overflow.dwp.bz2 \
testfile-dwp-cu-index-overflow.source
Thanks for the new tests.
Pushed with the fixlets above.
Cheers,
Mark
More information about the Elfutils-devel
mailing list