This is the mail archive of the binutils@sources.redhat.com mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: x86 - linker optimization of "call *constant_variable" -feasible?


Alan Modra <amodra@bigpond.net.au> writes:

> Add R_386_something to include/elf/i386.h
> Add BFD_RELOC_something to bfd/reloc.c if no existing reloc will do.
> Add reloc howto, and code to handle reloc in reloc_type_lookup and other
> functions in bfd/elf32-i386.c.  Most places it should behave exactly as
> R_386_32.
> Generate the reloc in gas/config/tc-i386.c, most likely spot is fudging
> reloc_type in output_disp as done for got relocs.

I've got a patch now that does almost all of this.  I can mark
optimization candidates in GAS, and carry them all the way through to
elf_i386_relocate_section, at which point I am stuck actually trying
to implement the optimization.  If you recall, the desired effect is:
given

        .text
a:
        call *var

        .rodata
var:
        .long func

convert the call instruction to a direct call to func.  (The same can
usefully be done for jmp instructions too, but that's a detail.)  In
elf_i386_relocate_section, I can overwrite the instruction with the
direct form easily.  What I don't know how to do is look up the target
address of the relocation being applied, determine whether it's in a
read-only segment (otherwise the optimization is invalid), and then
look up the relocation *for that address* and use it to determine the
appropriate value for the displacement field of the call instruction.

For demonstration purposes I've made ld generate a direct branch to
the address of the variable.

Another problem is that I don't want to have to change ld.so to know
about the new R_386_32_GNU_INDJUMP, so I need to make sure that if the
optimization does _not_ apply, the relocation gets converted to a
plain old R_386_32.  I do not know how to do that, either.

Appended is what I've got now.  Any further advice would be
appreciated.

zw

===================================================================
Index: bfd/elf32-i386.c
--- bfd/elf32-i386.c	11 Aug 2003 14:26:10 -0000	1.110
+++ bfd/elf32-i386.c	16 Aug 2003 07:50:27 -0000
@@ -158,9 +158,17 @@ static reloc_howto_type elf_howto_table[
 	 FALSE,			/* partial_inplace */
 	 0,			/* src_mask */
 	 0,			/* dst_mask */
-	 FALSE)			/* pcrel_offset */
+	 FALSE),		/* pcrel_offset */
 
-#define R_386_vt (R_386_GNU_VTENTRY + 1 - R_386_vt_offset)
+/* GNU extension to record use of an indirect call of a variable,
+   which can be optimized by the linker if the variable turns out
+   to be a constant.  */
+  HOWTO (R_386_32_GNU_INDJUMP,
+	 0, 2, 32, FALSE, 0, complain_overflow_bitfield,
+	 bfd_elf_generic_reloc, "R_386_32_GNU_INDJUMP",
+	 TRUE, 0xffffffff, 0xffffffff, FALSE),
+
+#define R_386_last (R_386_32_GNU_INDJUMP + 1 - R_386_vt_offset)
 
 };
 
@@ -299,6 +307,10 @@ elf_i386_reloc_type_lookup (bfd *abfd AT
       TRACE ("BFD_RELOC_VTABLE_ENTRY");
       return &elf_howto_table[R_386_GNU_VTENTRY - R_386_vt_offset];
 
+    case BFD_RELOC_386_32_GNU_INDJUMP:
+      TRACE ("BFD_RELOC_386_32_GNU_INDJUMP");
+      return &elf_howto_table[R_386_32_GNU_INDJUMP - R_386_vt_offset];
+
     default:
       break;
     }
@@ -321,7 +333,7 @@ elf_i386_info_to_howto_rel (bfd *abfd AT
       && ((indx = r_type - R_386_tls_offset) - R_386_ext
 	  >= R_386_tls - R_386_ext)
       && ((indx = r_type - R_386_vt_offset) - R_386_tls
-	  >= R_386_vt - R_386_tls))
+	  >= R_386_last - R_386_tls))
     {
       (*_bfd_error_handler) (_("%s: invalid relocation type %d"),
 			     bfd_archive_filename (abfd), (int) r_type);
@@ -2021,15 +2033,27 @@ elf_i386_relocate_section (bfd *output_b
 	  || r_type == R_386_GNU_VTENTRY)
 	continue;
 
-      if ((indx = r_type) >= R_386_standard
-	  && ((indx = r_type - R_386_ext_offset) - R_386_standard
-	      >= R_386_ext - R_386_standard)
-	  && ((indx = r_type - R_386_tls_offset) - R_386_ext
-	      >= R_386_tls - R_386_ext))
+      if (r_type < R_386_standard)
+	indx = r_type;
+
+      else if (r_type >= R_386_standard + R_386_ext_offset
+	       && r_type < R_386_ext + R_386_ext_offset)
+	indx = r_type - R_386_ext_offset;
+
+      else if (r_type >= R_386_ext + R_386_tls_offset
+	       && r_type < R_386_tls + R_386_tls_offset)
+	indx = r_type - R_386_tls_offset;
+
+      else if (r_type >= R_386_tls + R_386_vt_offset
+	       && r_type < R_386_last + R_386_vt_offset)
+	indx = r_type - R_386_vt_offset;
+
+      else
 	{
 	  bfd_set_error (bfd_error_bad_value);
 	  return FALSE;
 	}
+
       howto = elf_howto_table + indx;
 
       r_symndx = ELF32_R_SYM (rel->r_info);
@@ -2890,6 +2914,58 @@ elf_i386_relocate_section (bfd *output_b
 	    relocation = tpoff (info, relocation);
 	  else
 	    relocation = -tpoff (info, relocation);
+	  break;
+
+	case R_386_32_GNU_INDJUMP:
+	  /* This is R_386_32, but tagged specially.  We are in a code
+	     segment, and the instruction at r_offset is an indirect jump
+	     or call via an absolute memory location (ff [25|15] disp32).
+	     If that memory location is inside a read-only segment,
+	     we can optimize the instruction into a direct jump or call
+	     to the value of that memory location ([e9|e8] disp32).
+	     The replacement instruction is one byte shorter than the
+	     original, so we have to pad with a NOP (90).  */
+	  {
+	    bfd_byte *addr = contents + rel->r_offset;
+	    bfd_byte modrm = bfd_get_8 (input_bfd, addr - 1);
+	    BFD_ASSERT (bfd_get_8  (input_bfd, addr - 2) == 0xff);
+	    BFD_ASSERT (modrm == 0x25 || modrm == 0x15);
+	    BFD_ASSERT (bfd_get_32 (input_bfd, addr) == 0x0);
+
+	    /* ??? At this point we need to look up the target
+	       address of the relocation, find out if it's in a
+	       read-only data segment, and if it is, clone the
+	       relocation to be applied to that address into
+	       the current relocation, converting to R_386_PC32.
+	       How to do these things?  */
+	    if (1)
+	      {
+		/* Overwrite the instruction with a direct call or
+		   jump. The processor interprets the destination
+		   address relative to the end of the instruction, but
+		   the generic reloc handling wants to apply the
+		   addend relative to the first byte of the offset
+		   field.  Thus, we set the offset field to -4 (fc ff
+		   ff ff) which will produce the correct value after
+		   relocation.  */
+		addr[-2] = (modrm == 0x25) ? '\xe9' : '\xe8';
+		memcpy (addr - 1, "\xfc\xff\xff\xff\x90", 5);
+
+		/* Update the relocation entry.  r_offset now points one
+		   byte higher in memory than it should; furthermore,
+		   the address is now PC-relative, not absolute, and the
+		   relocation addend must be adjusted accordingly.  */
+		rel->r_offset -= 1;
+		relocation -= (input_section->output_section->vma
+			       + rel->r_offset);
+		r_type = R_386_PC32;
+	      }
+	    else
+	      /* ??? How to ensure that the output BFD gets a plain
+		 R_386_32 reloc, so that the dynamic linker never sees
+		 R_386_32_GNU_INDJUMP?  */
+	      r_type = R_386_32;
+	  }
 	  break;
 
 	default:
===================================================================
Index: bfd/reloc.c
--- bfd/reloc.c	10 Jul 2003 03:20:00 -0000	1.90
+++ bfd/reloc.c	16 Aug 2003 07:50:29 -0000
@@ -2220,6 +2220,8 @@ ENUMX
   BFD_RELOC_386_TLS_DTPOFF32
 ENUMX
   BFD_RELOC_386_TLS_TPOFF32
+ENUMX
+  BFD_RELOC_386_32_GNU_INDJUMP
 ENUMDOC
   i386/elf relocations
 
===================================================================
Index: gas/config/tc-i386.c
--- gas/config/tc-i386.c	14 Aug 2003 08:05:44 -0000	1.146
+++ gas/config/tc-i386.c	16 Aug 2003 07:50:29 -0000
@@ -3323,6 +3323,15 @@ output_disp (insn_start_frag, insn_start
 		  reloc_type = BFD_RELOC_386_GOTPC;
 		  i.op[n].disps->X_add_number += add;
 		}
+	      /* Detect "jmp/call *symbol" and mark it specially.  */
+	      else if (reloc_type == BFD_RELOC_32
+		       && i.tm.base_opcode == JUMP_INDIRECT
+		       && (i.tm.extension_opcode == 0x04     /* jmp */
+			   || i.tm.extension_opcode == 0x02) /* call */
+		       && i.base_reg == 0
+		       && i.index_reg == 0)
+		reloc_type = BFD_RELOC_386_32_GNU_INDJUMP;
+		       
 	      fix_new_exp (frag_now, p - frag_now->fr_literal, size,
 			   i.op[n].disps, pcrel, reloc_type);
 	    }
@@ -5139,6 +5148,7 @@ tc_gen_reloc (section, fixp)
     case BFD_RELOC_RVA:
     case BFD_RELOC_VTABLE_ENTRY:
     case BFD_RELOC_VTABLE_INHERIT:
+    case BFD_RELOC_386_32_GNU_INDJUMP:
       code = fixp->fx_r_type;
       break;
     default:
===================================================================
Index: include/elf/i386.h
--- include/elf/i386.h	19 Sep 2002 19:01:14 -0000	1.7
+++ include/elf/i386.h	16 Aug 2003 07:50:30 -0000
@@ -65,6 +65,9 @@ START_RELOC_NUMBERS (elf_i386_reloc_type
      /* These are GNU extensions to enable C++ vtable garbage collection.  */
      RELOC_NUMBER (R_386_GNU_VTINHERIT, 250)
      RELOC_NUMBER (R_386_GNU_VTENTRY, 251)
+
+     /* These are GNU extensions to flag code for link-time optimizations.  */
+     RELOC_NUMBER (R_386_32_GNU_INDJUMP, 252)
 END_RELOC_NUMBERS (R_386_max)
 
 #endif
===================================================================
Index: include/opcode/i386.h
--- include/opcode/i386.h	23 Jun 2003 20:15:33 -0000	1.40
+++ include/opcode/i386.h	16 Aug 2003 07:50:30 -0000
@@ -371,6 +371,7 @@ static const template i386_optab[] = {
 
 /* Control transfer instructions.  */
 {"call",   1,	0xe8, X, 0,	 wlq_Suf|JumpDword|DefaultSize,	{ Disp16|Disp32, 0, 0} },
+#define JUMP_INDIRECT 0xff
 {"call",   1,	0xff, 2, CpuNo64, wl_Suf|Modrm|DefaultSize,	{ WordReg|WordMem|JumpAbsolute, 0, 0} },
 {"call",   1,	0xff, 2, Cpu64,	 wq_Suf|Modrm|DefaultSize|NoRex64,{ WordReg|WordMem|JumpAbsolute, 0, 0} },
 /* Intel Syntax */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]