This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
RFC: shrink partial_symbol and minimal_symbol
- From: Tom Tromey <tromey at redhat dot com>
- To: gdb-patches at sourceware dot org
- Date: Fri, 06 Nov 2009 15:16:35 -0700
- Subject: RFC: shrink partial_symbol and minimal_symbol
- Reply-to: tromey at redhat dot com
I was curious about shrinking the symbol structs a little, because they
are major space users.
This patch reduces both partial_symbol and minimal_symbol by a word (on
a 32-bit machine, but there should also be a savings on 64-bit). It
does this by shrinking some bit fields and then pushing them into struct
general_symbol_info. This required no code changes, just some
rearrangements in the header file.
There are a couple minor drawbacks to this approach:
* We lose a bit of type safety (see the domain_or_type field).
* We also lose a bit of clarity in that some fields are used for
different purposes by different subtypes (something that has been a
source of some difficulty in GCC).
I think these problems are worth the savings, because these are
space-critical structures. (I would oppose a similar change in a less
important area.)
I have a couple of ideas for other space savings:
* general_symbol_info:obj_section seems redundant given the 'section'
field. If so then removing it would shave a word from all symbol
types.
* I'd like to resurrect my old patch to remove the two 'next' fields
from minimal_symbol. At least the demangled_hash_next field should be
removed, as it is often NULL (e.g., in all C programs). Both could be
removed if we used some other data structure -- these days I'm
thinking just a plain sorted list, searched with bsearch, but I'm open
to ideas.
On my test case this saves 3.8M as compared to HEAD.
That works out to 1.7% of the heap in this case.
Built and regtested on x86-64 (compile farm).
Tom
2009-11-06 Tom Tromey <tromey@redhat.com>
* symtab.h (enum minimal_symbol_type): Move earlier.
(enum domain_enum_tag): Likewise.
(enum address_class): Likewise.
(struct general_symbol_info) <language>: Shrink to 4 bits.
<domain, type>: Comment out.
<domain_or_type>: New field.
<aclass>: New field.
<flag_1, flag_2>: New fields.
(struct minimal_symbol) <type, target_flag_1, target_flag_2>:
Remove.
(MSYMBOL_TARGET_FLAG_1): Update.
(MSYMBOL_TARGET_FLAG_2): Update.
(MSYMBOL_TYPE): Update.
(struct symbol) <domain, aclass, is_argument, is_inlined>:
Remove.
(SYMBOL_DOMAIN): Update.
(SYMBOL_CLASS): Update.
(SYMBOL_IS_ARGUMENT): Update.
(SYMBOL_INLINED): Update.
(struct partial_symbol) <domain, aclass>: Remove.
(PSYMBOL_DOMAIN): Update.
(PSYMBOL_CLASS): Update.
diff --git a/gdb/symtab.h b/gdb/symtab.h
index acb8510..db80286 100644
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -76,6 +76,169 @@ struct program_space;
--chastain 2003-08-21 */
+/* Classification types for a minimal symbol. These should be taken as
+ "advisory only", since if gdb can't easily figure out a
+ classification it simply selects mst_unknown. It may also have to
+ guess when it can't figure out which is a better match between two
+ types (mst_data versus mst_bss) for example. Since the minimal
+ symbol info is sometimes derived from the BFD library's view of a
+ file, we need to live with what information bfd supplies. */
+
+enum minimal_symbol_type
+{
+ mst_unknown = 0, /* Unknown type, the default */
+ mst_text, /* Generally executable instructions */
+ mst_data, /* Generally initialized data */
+ mst_bss, /* Generally uninitialized data */
+ mst_abs, /* Generally absolute (nonrelocatable) */
+ /* GDB uses mst_solib_trampoline for the start address of a shared
+ library trampoline entry. Breakpoints for shared library functions
+ are put there if the shared library is not yet loaded.
+ After the shared library is loaded, lookup_minimal_symbol will
+ prefer the minimal symbol from the shared library (usually
+ a mst_text symbol) over the mst_solib_trampoline symbol, and the
+ breakpoints will be moved to their true address in the shared
+ library via breakpoint_re_set. */
+ mst_solib_trampoline, /* Shared library trampoline code */
+ /* For the mst_file* types, the names are only guaranteed to be unique
+ within a given .o file. */
+ mst_file_text, /* Static version of mst_text */
+ mst_file_data, /* Static version of mst_data */
+ mst_file_bss /* Static version of mst_bss */
+};
+
+/* Different name domains for symbols. Looking up a symbol specifies a
+ domain and ignores symbol definitions in other name domains. */
+
+typedef enum domain_enum_tag
+{
+ /* UNDEF_DOMAIN is used when a domain has not been discovered or
+ none of the following apply. This usually indicates an error either
+ in the symbol information or in gdb's handling of symbols. */
+
+ UNDEF_DOMAIN,
+
+ /* VAR_DOMAIN is the usual domain. In C, this contains variables,
+ function names, typedef names and enum type values. */
+
+ VAR_DOMAIN,
+
+ /* STRUCT_DOMAIN is used in C to hold struct, union and enum type names.
+ Thus, if `struct foo' is used in a C program, it produces a symbol named
+ `foo' in the STRUCT_DOMAIN. */
+
+ STRUCT_DOMAIN,
+
+ /* LABEL_DOMAIN may be used for names of labels (for gotos);
+ currently it is not used and labels are not recorded at all. */
+
+ LABEL_DOMAIN,
+
+ /* Searching domains. These overlap with VAR_DOMAIN, providing
+ some granularity with the search_symbols function. */
+
+ /* Everything in VAR_DOMAIN minus FUNCTIONS_-, TYPES_-, and
+ METHODS_DOMAIN */
+ VARIABLES_DOMAIN,
+
+ /* All functions -- for some reason not methods, though. */
+ FUNCTIONS_DOMAIN,
+
+ /* All defined types */
+ TYPES_DOMAIN,
+
+ /* All class methods -- why is this separated out? */
+ METHODS_DOMAIN
+}
+domain_enum;
+
+/* An address-class says where to find the value of a symbol. */
+
+enum address_class
+{
+ /* Not used; catches errors */
+
+ LOC_UNDEF,
+
+ /* Value is constant int SYMBOL_VALUE, host byteorder */
+
+ LOC_CONST,
+
+ /* Value is at fixed address SYMBOL_VALUE_ADDRESS */
+
+ LOC_STATIC,
+
+ /* Value is in register. SYMBOL_VALUE is the register number.
+
+ For some symbol formats (stabs, for some compilers at least),
+ the compiler generates two symbols, an argument and a register.
+ In some cases we combine them to a single LOC_REGISTER in symbol
+ reading, but currently not for all cases (e.g. it's passed on the
+ stack and then loaded into a register). */
+
+ LOC_REGISTER,
+
+ /* It's an argument; the value is at SYMBOL_VALUE offset in arglist. */
+
+ LOC_ARG,
+
+ /* Value address is at SYMBOL_VALUE offset in arglist. */
+
+ LOC_REF_ARG,
+
+ /* Value is in specified register. Just like LOC_REGISTER except the
+ register holds the address of the argument instead of the argument
+ itself. This is currently used for the passing of structs and unions
+ on sparc and hppa. It is also used for call by reference where the
+ address is in a register, at least by mipsread.c. */
+
+ LOC_REGPARM_ADDR,
+
+ /* Value is a local variable at SYMBOL_VALUE offset in stack frame. */
+
+ LOC_LOCAL,
+
+ /* Value not used; definition in SYMBOL_TYPE. Symbols in the domain
+ STRUCT_DOMAIN all have this class. */
+
+ LOC_TYPEDEF,
+
+ /* Value is address SYMBOL_VALUE_ADDRESS in the code */
+
+ LOC_LABEL,
+
+ /* In a symbol table, value is SYMBOL_BLOCK_VALUE of a `struct block'.
+ In a partial symbol table, SYMBOL_VALUE_ADDRESS is the start address
+ of the block. Function names have this class. */
+
+ LOC_BLOCK,
+
+ /* Value is a constant byte-sequence pointed to by SYMBOL_VALUE_BYTES, in
+ target byte order. */
+
+ LOC_CONST_BYTES,
+
+ /* Value is at fixed address, but the address of the variable has
+ to be determined from the minimal symbol table whenever the
+ variable is referenced.
+ This happens if debugging information for a global symbol is
+ emitted and the corresponding minimal symbol is defined
+ in another object file or runtime common storage.
+ The linker might even remove the minimal symbol if the global
+ symbol is never referenced, in which case the symbol remains
+ unresolved. */
+
+ LOC_UNRESOLVED,
+
+ /* The variable does not actually exist in the program.
+ The value is ignored. */
+
+ LOC_OPTIMIZED_OUT,
+
+ /* The variable's address is computed by a set of location
+ functions (see "struct symbol_ops" below). */
+ LOC_COMPUTED,
+};
/* Define a structure for the information that is common to all symbol types,
including minimal symbols, partial symbols, and full symbols. In a
@@ -136,8 +299,34 @@ struct general_symbol_info
/* Record the source code language that applies to this symbol.
This is used to select one of the fields from the language specific
union above. */
+ ENUM_BITFIELD(language) language : 4;
- ENUM_BITFIELD(language) language : 8;
+ /* This field is used to share storage between the 'domain' and
+ 'type' fields. 'domain' is used by symbols and psymbols, and
+ 'type' is used by minimal symbols. */
+ /* Domain code. */
+ /* ENUM_BITFIELD(domain_enum_tag) domain : 3; */
+ /* Classification type for this minimal symbol. */
+ /* ENUM_BITFIELD(minimal_symbol_type) type : 4; */
+ unsigned domain_or_type : 4;
+
+ /* Address class */
+ /* NOTE: cagney/2003-11-02: The fields "aclass" and "ops" contain
+ overlapping information. By creating a per-aclass ops vector, or
+ using the aclass as an index into an ops table, the aclass and
+ ops fields can be merged. The latter, for instance, would shave
+ 32-bits from each symbol (relative to a symbol lookup, any table
+ index overhead would be in the noise). */
+
+ ENUM_BITFIELD(address_class) aclass : 4;
+
+ /* For minimal symbols, two flag bits provided for the use of the
+ target.
+ For full symbols, flag_1 is true if this is an argument; flag_2
+ is true if this is an inlined function (class LOC_BLOCK only). */
+
+ unsigned flag_1 : 1;
+ unsigned flag_2 : 1;
/* Which section is this symbol in? This is an index into
section_offsets for this objfile. Negative means that the symbol
@@ -267,37 +456,6 @@ extern char *symbol_search_name (const struct general_symbol_info *);
#define SYMBOL_MATCHES_SEARCH_NAME(symbol, name) \
(strcmp_iw (SYMBOL_SEARCH_NAME (symbol), (name)) == 0)
-/* Classification types for a minimal symbol. These should be taken as
- "advisory only", since if gdb can't easily figure out a
- classification it simply selects mst_unknown. It may also have to
- guess when it can't figure out which is a better match between two
- types (mst_data versus mst_bss) for example. Since the minimal
- symbol info is sometimes derived from the BFD library's view of a
- file, we need to live with what information bfd supplies. */
-
-enum minimal_symbol_type
-{
- mst_unknown = 0, /* Unknown type, the default */
- mst_text, /* Generally executable instructions */
- mst_data, /* Generally initialized data */
- mst_bss, /* Generally uninitialized data */
- mst_abs, /* Generally absolute (nonrelocatable) */
- /* GDB uses mst_solib_trampoline for the start address of a shared
- library trampoline entry. Breakpoints for shared library functions
- are put there if the shared library is not yet loaded.
- After the shared library is loaded, lookup_minimal_symbol will
- prefer the minimal symbol from the shared library (usually
- a mst_text symbol) over the mst_solib_trampoline symbol, and the
- breakpoints will be moved to their true address in the shared
- library via breakpoint_re_set. */
- mst_solib_trampoline, /* Shared library trampoline code */
- /* For the mst_file* types, the names are only guaranteed to be unique
- within a given .o file. */
- mst_file_text, /* Static version of mst_text */
- mst_file_data, /* Static version of mst_data */
- mst_file_bss /* Static version of mst_bss */
-};
-
/* Define a simple structure used to hold some very basic information about
all defined global symbols (text, data, bss, abs, etc). The only required
information is the general_symbol_info.
@@ -329,14 +487,6 @@ struct minimal_symbol
/* Which source file is this symbol in? Only relevant for mst_file_*. */
char *filename;
- /* Classification type for this minimal symbol. */
-
- ENUM_BITFIELD(minimal_symbol_type) type : 8;
-
- /* Two flag bits provided for the use of the target. */
- unsigned int target_flag_1 : 1;
- unsigned int target_flag_2 : 1;
-
/* Minimal symbols with the same hash key are kept on a linked
list. This is the link. */
@@ -348,155 +498,15 @@ struct minimal_symbol
struct minimal_symbol *demangled_hash_next;
};
-#define MSYMBOL_TARGET_FLAG_1(msymbol) (msymbol)->target_flag_1
-#define MSYMBOL_TARGET_FLAG_2(msymbol) (msymbol)->target_flag_2
+#define MSYMBOL_TARGET_FLAG_1(msymbol) (msymbol)->ginfo.flag_1
+#define MSYMBOL_TARGET_FLAG_2(msymbol) (msymbol)->ginfo.flag_2
#define MSYMBOL_SIZE(msymbol) (msymbol)->size
-#define MSYMBOL_TYPE(msymbol) (msymbol)->type
+#define MSYMBOL_TYPE(msymbol) (msymbol)->ginfo.domain_or_type
/* Represent one symbol name; a variable, constant, function or typedef. */
-/* Different name domains for symbols. Looking up a symbol specifies a
- domain and ignores symbol definitions in other name domains. */
-
-typedef enum domain_enum_tag
-{
- /* UNDEF_DOMAIN is used when a domain has not been discovered or
- none of the following apply. This usually indicates an error either
- in the symbol information or in gdb's handling of symbols. */
-
- UNDEF_DOMAIN,
-
- /* VAR_DOMAIN is the usual domain. In C, this contains variables,
- function names, typedef names and enum type values. */
-
- VAR_DOMAIN,
-
- /* STRUCT_DOMAIN is used in C to hold struct, union and enum type names.
- Thus, if `struct foo' is used in a C program, it produces a symbol named
- `foo' in the STRUCT_DOMAIN. */
-
- STRUCT_DOMAIN,
-
- /* LABEL_DOMAIN may be used for names of labels (for gotos);
- currently it is not used and labels are not recorded at all. */
-
- LABEL_DOMAIN,
-
- /* Searching domains. These overlap with VAR_DOMAIN, providing
- some granularity with the search_symbols function. */
-
- /* Everything in VAR_DOMAIN minus FUNCTIONS_DOMAIN and
- TYPES_DOMAIN. */
- VARIABLES_DOMAIN,
-
- /* All functions -- for some reason not methods, though. */
- FUNCTIONS_DOMAIN,
-
- /* All defined types */
- TYPES_DOMAIN
-}
-domain_enum;
-
-/* An address-class says where to find the value of a symbol. */
-
-enum address_class
-{
- /* Not used; catches errors */
-
- LOC_UNDEF,
-
- /* Value is constant int SYMBOL_VALUE, host byteorder */
-
- LOC_CONST,
-
- /* Value is at fixed address SYMBOL_VALUE_ADDRESS */
-
- LOC_STATIC,
-
- /* Value is in register. SYMBOL_VALUE is the register number
- in the original debug format. SYMBOL_REGISTER_OPS holds a
- function that can be called to transform this into the
- actual register number this represents in a specific target
- architecture (gdbarch).
-
- For some symbol formats (stabs, for some compilers at least),
- the compiler generates two symbols, an argument and a register.
- In some cases we combine them to a single LOC_REGISTER in symbol
- reading, but currently not for all cases (e.g. it's passed on the
- stack and then loaded into a register). */
-
- LOC_REGISTER,
-
- /* It's an argument; the value is at SYMBOL_VALUE offset in arglist. */
-
- LOC_ARG,
-
- /* Value address is at SYMBOL_VALUE offset in arglist. */
-
- LOC_REF_ARG,
-
- /* Value is in specified register. Just like LOC_REGISTER except the
- register holds the address of the argument instead of the argument
- itself. This is currently used for the passing of structs and unions
- on sparc and hppa. It is also used for call by reference where the
- address is in a register, at least by mipsread.c. */
-
- LOC_REGPARM_ADDR,
-
- /* Value is a local variable at SYMBOL_VALUE offset in stack frame. */
-
- LOC_LOCAL,
-
- /* Value not used; definition in SYMBOL_TYPE. Symbols in the domain
- STRUCT_DOMAIN all have this class. */
-
- LOC_TYPEDEF,
-
- /* Value is address SYMBOL_VALUE_ADDRESS in the code */
-
- LOC_LABEL,
-
- /* In a symbol table, value is SYMBOL_BLOCK_VALUE of a `struct block'.
- In a partial symbol table, SYMBOL_VALUE_ADDRESS is the start address
- of the block. Function names have this class. */
-
- LOC_BLOCK,
-
- /* Value is a constant byte-sequence pointed to by SYMBOL_VALUE_BYTES, in
- target byte order. */
-
- LOC_CONST_BYTES,
-
- /* Value is at fixed address, but the address of the variable has
- to be determined from the minimal symbol table whenever the
- variable is referenced.
- This happens if debugging information for a global symbol is
- emitted and the corresponding minimal symbol is defined
- in another object file or runtime common storage.
- The linker might even remove the minimal symbol if the global
- symbol is never referenced, in which case the symbol remains
- unresolved.
-
- GDB would normally find the symbol in the minimal symbol table if it will
- not find it in the full symbol table. But a reference to an external
- symbol in a local block shadowing other definition requires full symbol
- without possibly having its address available for LOC_STATIC. Testcase
- is provided as `gdb.dwarf2/dw2-unresolved.exp'. */
-
- LOC_UNRESOLVED,
-
- /* The variable does not actually exist in the program.
- The value is ignored. */
-
- LOC_OPTIMIZED_OUT,
-
- /* The variable's address is computed by a set of location
- functions (see "struct symbol_computed_ops" below). */
- LOC_COMPUTED,
-};
-
/* The methods needed to implement LOC_COMPUTED. These methods can
use the symbol's .aux_value for additional per-symbol information.
@@ -556,27 +566,6 @@ struct symbol
associated with LINE. */
struct symtab *symtab;
- /* Domain code. */
-
- ENUM_BITFIELD(domain_enum_tag) domain : 6;
-
- /* Address class */
- /* NOTE: cagney/2003-11-02: The fields "aclass" and "ops" contain
- overlapping information. By creating a per-aclass ops vector, or
- using the aclass as an index into an ops table, the aclass and
- ops fields can be merged. The latter, for instance, would shave
- 32-bits from each symbol (relative to a symbol lookup, any table
- index overhead would be in the noise). */
-
- ENUM_BITFIELD(address_class) aclass : 6;
-
- /* Whether this is an argument. */
-
- unsigned is_argument : 1;
-
- /* Whether this is an inlined function (class LOC_BLOCK only). */
- unsigned is_inlined : 1;
-
/* Line number of this symbol's definition, except for inlined
functions. For an inlined function (class LOC_BLOCK and
SYMBOL_INLINED set) this is the line number of the function's call
@@ -620,10 +609,10 @@ struct symbol
};
-#define SYMBOL_DOMAIN(symbol) (symbol)->domain
-#define SYMBOL_CLASS(symbol) (symbol)->aclass
-#define SYMBOL_IS_ARGUMENT(symbol) (symbol)->is_argument
-#define SYMBOL_INLINED(symbol) (symbol)->is_inlined
+#define SYMBOL_DOMAIN(symbol) (symbol)->ginfo.domain_or_type
+#define SYMBOL_CLASS(symbol) (symbol)->ginfo.aclass
+#define SYMBOL_IS_ARGUMENT(symbol) (symbol)->ginfo.flag_1
+#define SYMBOL_INLINED(symbol) (symbol)->ginfo.flag_2
#define SYMBOL_TYPE(symbol) (symbol)->type
#define SYMBOL_LINE(symbol) (symbol)->line
#define SYMBOL_SYMTAB(symbol) (symbol)->symtab
@@ -647,18 +636,10 @@ struct partial_symbol
struct general_symbol_info ginfo;
- /* Name space code. */
-
- ENUM_BITFIELD(domain_enum_tag) domain : 6;
-
- /* Address class (for info_symbols) */
-
- ENUM_BITFIELD(address_class) aclass : 6;
-
};
-#define PSYMBOL_DOMAIN(psymbol) (psymbol)->domain
-#define PSYMBOL_CLASS(psymbol) (psymbol)->aclass
+#define PSYMBOL_DOMAIN(psymbol) (psymbol)->ginfo.domain_or_type
+#define PSYMBOL_CLASS(psymbol) (psymbol)->ginfo.aclass
/* Each item represents a line-->pc (or the reverse) mapping. This is