This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[GOLD] PowerPC tls_get_addr_optimize


>From 10cd12ba1510ea3317b9f2724946ba4e904f88e4 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@gmail.com>
Date: Wed, 12 Jul 2017 20:54:13 +0930
Subject: [PATCH 4/4] 

This implements the special __tls_get_addr_opt call stub for powerpc
gold that returns __thread variable addresses without actually making
a call to __tls_get_addr in most cases.  Shared libraries that are
loaded at program load time (ie. dlopen is not used) have a known
layout for their __thread variables, and thus DTPMOD64/DPTREL64 pairs
describing those variables can be set up by ld.so for the
__tls_get_addr_opt fast exit.
Ref https://sourceware.org/ml/libc-alpha/2015-03/msg00626.html

I really, really wish I'd used a differently versioned __tls_get_addr
symbol than the base symbol to indicate glibc support for the
optimized call, rather than having glibc export __tls_get_addr_opt.  A
lot of the messing around here, flipping symbols from __tls_get_addr
to __tls_get_addr_opt, is caused by that decision.  About the only
benefit is that a user can see at a glance that their disassembled
code is calling __tls_get_addr via the fancy call stub..  Anyway, we
need references to __tls_get_addr to seem like they were to
__tls_get_addr_opt, and in cases like the tsan interceptor, a
definition of __tls_get_addr to seem like one of __tls_get_addr_opt
as well.  That's the reason for Symbol::clear_in_reg and
Symbol_table::clone, and why symbols are substituted in Scan::global
and other places dealing with dynamic linking.  I decided to not
template clone because treating its arguments as Sized_symbol isn't
correct given that mips derives its own symbol class.  Passing in a
size at least gives a nod towards supporting clone on derived
classes.

OK?

elfcpp/
	* elfcpp.h (DT_PPC_OPT): Define.
	* powerpc.h (PPC_OPT_TLS): Define.
gold/
	* options.h (tls_get_addr_optimize): New option.
	* symtab.h (Symbol::clear_in_reg, clone): New functions.
	(Symbol_table::clone): New function.
	* resolve.cc (Symbol::clone): New function.
	* powerpc.cc (Target_powerpc::has_tls_get_addr_opt_,
	tls_get_addr_, tls_get_addr_opt_): New vars.
	(Target_powerpc::tls_get_addr_opt, tls_get_addr,
	is_tls_get_addr_opt, replace_tls_get_addr,
	set_has_tls_get_addr_opt, stk_linker): New functions.
	(Target_powerpc::Track_tls::maybe_skip_tls_get_addr_call): Add
	target param.  Update callers.  Compare symbols rather than names.
	(Target_powerpc::do_define_standard_symbols): Init tls_get_addr_
	and tls_get_addr_opt_.
	(Target_powerpc::Branch_info::mark_pltcall): Translate tls_get_addr
	sym to tls_get_addr_opt.
	(Target_powerpc::Branch_info::make_stub): Likewise.
	(Stub_table::define_stub_syms): Likewise.
	(Target_powerpc::Scan::global): Likewise.
	(Target_powerpc::Relocate::relocate): Likewise.
	(add_3_12_2, add_3_12_13, bctrl, beqlr, cmpdi_11_0, cmpwi_11_0,
	ld_11_1, ld_11_3, ld_12_3, lwz_11_3, lwz_12_3, mr_0_3, mr_3_0,
	mtlr_11, std_11_1): New constants.
	(Stub_table::eh_frame_added_): Delete.
	(Stub_table::tls_get_addr_opt_bctrl_, plt_fde_len_, plt_fde_): New vars.
	(Stub_table::init_plt_fde): New functions.
	(Stub_table::add_eh_frame, replace_eh_frame): Move definition out
	of line.  Init and use plt_fde_.
	(Stub_table::plt_call_size): Return size for tls_get_addr stub.
	Extract alignment code to..
	(Stub_table::plt_call_align): ..this new function.  Adjust all callers.
	(Stub_table::add_plt_call_entry): Set has_tls_get_addr_opt and
	tls_get_addr_opt_bctrl, and align after that.
	(Stub_table::do_write): Write out tls_get_addr stub.
	(Target_powerpc::do_finalize_sections): Emit DT_PPC_OPT
	PPC_OPT_TLS/PPC64_OPT_TLS bit.
	(Target_powerpc::Relocate::relocate): Don't check for or modify
	nop following bl for tls_get_addr stub.

diff --git a/elfcpp/elfcpp.h b/elfcpp/elfcpp.h
index cccec4c..1f629ce 100644
--- a/elfcpp/elfcpp.h
+++ b/elfcpp/elfcpp.h
@@ -768,6 +768,9 @@ enum DT
   // Specify the value of _GLOBAL_OFFSET_TABLE_.
   DT_PPC_GOT = 0x70000000,
 
+  // Specify whether various optimisations are possible.
+  DT_PPC_OPT = 0x70000001,
+
   // Specify the start of the .glink section.
   DT_PPC64_GLINK = 0x70000000,
 
diff --git a/elfcpp/powerpc.h b/elfcpp/powerpc.h
index 3dc0828..71cac5e 100644
--- a/elfcpp/powerpc.h
+++ b/elfcpp/powerpc.h
@@ -228,6 +228,12 @@ enum
   EF_PPC64_ABI = 3
 };
 
+// DT_PPC_OPT bits
+enum
+{
+  PPC_OPT_TLS = 1
+};
+
 // DT_PPC64_OPT bits
 enum
 {
diff --git a/gold/options.h b/gold/options.h
index 7be15a5..2e2037d 100644
--- a/gold/options.h
+++ b/gold/options.h
@@ -1293,6 +1293,9 @@ class General_options
   DEFINE_bool(tls_optimize, options::TWO_DASHES, '\0', true,
 	      N_("(PowerPC/64 only) Optimize GD/LD/IE code to IE/LE"),
 	      N_("(PowerPC/64 only) Don'\''t try to optimize TLS accesses"));
+  DEFINE_bool(tls_get_addr_optimize, options::TWO_DASHES, '\0', true,
+	      N_("(PowerPC/64 only) Use a special __tls_get_addr call"),
+	      N_("(PowerPC/64 only) Don't use a special __tls_get_addr call"));
 
   DEFINE_bool(toc_optimize, options::TWO_DASHES, '\0', true,
 	      N_("(PowerPC64 only) Optimize TOC code sequences"),
diff --git a/gold/powerpc.cc b/gold/powerpc.cc
index c29850b..b9f0012 100644
--- a/gold/powerpc.cc
+++ b/gold/powerpc.cc
@@ -613,8 +613,10 @@ class Target_powerpc : public Sized_target<size, big_endian>
       stub_tables_(), branch_lookup_table_(), branch_info_(), tocsave_loc_(),
       plt_thread_safe_(false), plt_localentry0_(false),
       plt_localentry0_init_(false), has_localentry0_(false),
+      has_tls_get_addr_opt_(false),
       relax_failed_(false), relax_fail_count_(0),
-      stub_group_size_(0), savres_section_(0)
+      stub_group_size_(0), savres_section_(0),
+      tls_get_addr_(NULL), tls_get_addr_opt_(NULL)
   {
   }
 
@@ -1081,11 +1083,43 @@ class Target_powerpc : public Sized_target<size, big_endian>
     this->set_processor_specific_flags(flags);
   }
 
+  Symbol*
+  tls_get_addr_opt() const
+  { return this->tls_get_addr_opt_; }
+
+  Symbol*
+  tls_get_addr() const
+  { return this->tls_get_addr_; }
+
+  // If optimizing __tls_get_addr calls, whether this is the
+  // "__tls_get_addr" symbol.
+  bool
+  is_tls_get_addr_opt(const Symbol* gsym) const
+  {
+    return this->tls_get_addr_opt_ && (gsym == this->tls_get_addr_
+				       || gsym == this->tls_get_addr_opt_);
+  }
+
+  bool
+  replace_tls_get_addr(const Symbol* gsym) const
+  { return this->tls_get_addr_opt_ && gsym == this->tls_get_addr_; }
+
+  void
+  set_has_tls_get_addr_opt()
+  { this->has_tls_get_addr_opt_ = true; }
+
   // Offset to toc save stack slot
   int
   stk_toc() const
   { return this->abiversion() < 2 ? 40 : 24; }
 
+  // Offset to linker save stack slot.  ELFv2 doesn't have a linker word,
+  // so use the CR save slot.  Used only by __tls_get_addr call stub,
+  // relying on __tls_get_addr not saving CR itself.
+  int
+  stk_linker() const
+  { return this->abiversion() < 2 ? 32 : 8; }
+
  private:
 
   class Track_tls
@@ -1139,12 +1173,14 @@ class Target_powerpc : public Sized_target<size, big_endian>
     {this->tls_get_addr_state_ = SKIP; }
 
     Tls_get_addr
-    maybe_skip_tls_get_addr_call(unsigned int r_type, const Symbol* gsym)
+    maybe_skip_tls_get_addr_call(Target_powerpc<size, big_endian>* target,
+				 unsigned int r_type, const Symbol* gsym)
     {
       bool is_tls_call = ((r_type == elfcpp::R_POWERPC_REL24
 			   || r_type == elfcpp::R_PPC_PLTREL24)
 			  && gsym != NULL
-			  && strcmp(gsym->name(), "__tls_get_addr") == 0);
+			  && (gsym == target->tls_get_addr()
+			      || gsym == target->tls_get_addr_opt()));
       Tls_get_addr last_tls = this->tls_get_addr_state_;
       this->tls_get_addr_state_ = NOT_EXPECTED;
       if (is_tls_call && last_tls != EXPECTED)
@@ -1549,12 +1585,18 @@ class Target_powerpc : public Sized_target<size, big_endian>
   bool plt_localentry0_;
   bool plt_localentry0_init_;
   bool has_localentry0_;
+  bool has_tls_get_addr_opt_;
 
   bool relax_failed_;
   int relax_fail_count_;
   int32_t stub_group_size_;
 
   Output_data_save_res<size, big_endian> *savres_section_;
+
+  // The "__tls_get_addr" symbol, if present
+  Symbol* tls_get_addr_;
+  // If optimizing __tls_get_addr calls, the "__tls_get_addr_opt" symbol.
+  Symbol* tls_get_addr_opt_;
 };
 
 template<>
@@ -2459,6 +2501,31 @@ Target_powerpc<size, big_endian>::do_define_standard_symbols(
 					false, false);
 	}
     }
+
+  this->tls_get_addr_ = symtab->lookup("__tls_get_addr");
+  if (parameters->options().tls_get_addr_optimize()
+      && this->tls_get_addr_ != NULL
+      && this->tls_get_addr_->in_reg())
+    this->tls_get_addr_opt_ = symtab->lookup("__tls_get_addr_opt");
+  if (this->tls_get_addr_opt_ != NULL)
+    {
+      if (this->tls_get_addr_->is_undefined()
+	  || this->tls_get_addr_->is_from_dynobj())
+	{
+	  // Make it seem as if references to __tls_get_addr are
+	  // really to __tls_get_addr_opt, so the latter symbol is
+	  // made dynamic, not the former.
+	  this->tls_get_addr_->clear_in_reg();
+	  this->tls_get_addr_opt_->set_in_reg();
+	}
+      // We have a non-dynamic definition for __tls_get_addr.
+      // Make __tls_get_addr_opt the same, if it does not already have
+      // a non-dynamic definition.
+      else if (this->tls_get_addr_opt_->is_undefined()
+	       || this->tls_get_addr_opt_->is_from_dynobj())
+	symtab->clone(this->tls_get_addr_opt_, this->tls_get_addr_,
+		      sizeof (Sized_symbol<size>));
+    }
 }
 
 // Set up PowerPC target specific relobj.
@@ -3030,6 +3097,8 @@ Target_powerpc<size, big_endian>::Branch_info::mark_pltcall(
     return false;
 
   Symbol* sym = this->object_->global_symbol(this->r_sym_);
+  if (target->replace_tls_get_addr(sym))
+    sym = target->tls_get_addr_opt();
   if (sym != NULL && sym->is_forwarder())
     sym = symtab->resolve_forwards(sym);
   const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym);
@@ -3055,12 +3124,14 @@ Target_powerpc<size, big_endian>::Branch_info::make_stub(
     Symbol_table* symtab) const
 {
   Symbol* sym = this->object_->global_symbol(this->r_sym_);
-  if (sym != NULL && sym->is_forwarder())
-    sym = symtab->resolve_forwards(sym);
-  const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym);
   Target_powerpc<size, big_endian>* target =
     static_cast<Target_powerpc<size, big_endian>*>(
       parameters->sized_target<size, big_endian>());
+  if (target->replace_tls_get_addr(sym))
+    sym = target->tls_get_addr_opt();
+  if (sym != NULL && sym->is_forwarder())
+    sym = symtab->resolve_forwards(sym);
+  const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym);
   bool ok = true;
 
   if (gsym != NULL
@@ -3675,6 +3746,8 @@ static const uint32_t add_2_2_11	= 0x7c425a14;
 static const uint32_t add_2_2_12	= 0x7c426214;
 static const uint32_t add_3_3_2		= 0x7c631214;
 static const uint32_t add_3_3_13	= 0x7c636a14;
+static const uint32_t add_3_12_2	= 0x7c6c1214;
+static const uint32_t add_3_12_13	= 0x7c6c6a14;
 static const uint32_t add_11_0_11	= 0x7d605a14;
 static const uint32_t add_11_2_11	= 0x7d625a14;
 static const uint32_t add_11_11_2	= 0x7d6b1214;
@@ -3696,10 +3769,14 @@ static const uint32_t addis_12_12	= 0x3d8c0000;
 static const uint32_t b			= 0x48000000;
 static const uint32_t bcl_20_31		= 0x429f0005;
 static const uint32_t bctr		= 0x4e800420;
+static const uint32_t bctrl		= 0x4e800421;
+static const uint32_t beqlr		= 0x4d820020;
 static const uint32_t blr		= 0x4e800020;
 static const uint32_t bnectr_p4		= 0x4ce20420;
 static const uint32_t cmpld_7_12_0	= 0x7fac0040;
 static const uint32_t cmpldi_2_0	= 0x28220000;
+static const uint32_t cmpdi_11_0	= 0x2c2b0000;
+static const uint32_t cmpwi_11_0	= 0x2c0b0000;
 static const uint32_t cror_15_15_15	= 0x4def7b82;
 static const uint32_t cror_31_31_31	= 0x4ffffb82;
 static const uint32_t ld_0_1		= 0xe8010000;
@@ -3708,9 +3785,12 @@ static const uint32_t ld_2_1		= 0xe8410000;
 static const uint32_t ld_2_2		= 0xe8420000;
 static const uint32_t ld_2_11		= 0xe84b0000;
 static const uint32_t ld_2_12		= 0xe84c0000;
+static const uint32_t ld_11_1		= 0xe9610000;
 static const uint32_t ld_11_2		= 0xe9620000;
+static const uint32_t ld_11_3		= 0xe9630000;
 static const uint32_t ld_11_11		= 0xe96b0000;
 static const uint32_t ld_12_2		= 0xe9820000;
+static const uint32_t ld_12_3		= 0xe9830000;
 static const uint32_t ld_12_11		= 0xe98b0000;
 static const uint32_t ld_12_12		= 0xe98c0000;
 static const uint32_t lfd_0_1		= 0xc8010000;
@@ -3722,17 +3802,22 @@ static const uint32_t lis_11		= 0x3d600000;
 static const uint32_t lis_12		= 0x3d800000;
 static const uint32_t lvx_0_12_0	= 0x7c0c00ce;
 static const uint32_t lwz_0_12		= 0x800c0000;
+static const uint32_t lwz_11_3		= 0x81630000;
 static const uint32_t lwz_11_11		= 0x816b0000;
 static const uint32_t lwz_11_30		= 0x817e0000;
+static const uint32_t lwz_12_3		= 0x81830000;
 static const uint32_t lwz_12_12		= 0x818c0000;
 static const uint32_t lwzu_0_12		= 0x840c0000;
 static const uint32_t mflr_0		= 0x7c0802a6;
 static const uint32_t mflr_11		= 0x7d6802a6;
 static const uint32_t mflr_12		= 0x7d8802a6;
+static const uint32_t mr_0_3		= 0x7c601b78;
+static const uint32_t mr_3_0		= 0x7c030378;
 static const uint32_t mtctr_0		= 0x7c0903a6;
 static const uint32_t mtctr_11		= 0x7d6903a6;
 static const uint32_t mtctr_12		= 0x7d8903a6;
 static const uint32_t mtlr_0		= 0x7c0803a6;
+static const uint32_t mtlr_11		= 0x7d6803a6;
 static const uint32_t mtlr_12		= 0x7d8803a6;
 static const uint32_t nop		= 0x60000000;
 static const uint32_t ori_0_0_0		= 0x60000000;
@@ -3740,6 +3825,7 @@ static const uint32_t srdi_0_0_2	= 0x7800f082;
 static const uint32_t std_0_1		= 0xf8010000;
 static const uint32_t std_0_12		= 0xf80c0000;
 static const uint32_t std_2_1		= 0xf8410000;
+static const uint32_t std_11_1		= 0xf9610000;
 static const uint32_t stfd_0_1		= 0xd8010000;
 static const uint32_t stvx_0_12_0	= 0x7c0c01ce;
 static const uint32_t sub_11_11_12	= 0x7d6c5850;
@@ -4102,7 +4188,8 @@ class Stub_table : public Output_relaxed_input_section
       orig_data_size_(owner->current_data_size()),
       plt_size_(0), last_plt_size_(0),
       branch_size_(0), last_branch_size_(0), min_size_threshold_(0),
-      eh_frame_added_(false), need_save_res_(false), uniq_(id)
+      need_save_res_(false), uniq_(id), tls_get_addr_opt_bctrl_(-1u),
+      plt_fde_len_(0)
   {
     this->set_output_section(output_section);
 
@@ -4263,48 +4350,17 @@ class Stub_table : public Output_relaxed_input_section
     return false;
   }
 
-  // Add .eh_frame info for this stub section.  Unlike other linker
-  // generated .eh_frame this is added late in the link, because we
-  // only want the .eh_frame info if this particular stub section is
-  // non-empty.
+  // Generate a suitable FDE to describe code in this stub group.
   void
-  add_eh_frame(Layout* layout)
-  {
-    if (!parameters->options().ld_generated_unwind_info())
-      return;
-
-    // Since we add stub .eh_frame info late, it must be placed
-    // after all other linker generated .eh_frame info so that
-    // merge mapping need not be updated for input sections.
-    // There is no provision to use a different CIE to that used
-    // by .glink.
-    if (!this->targ_->has_glink())
-      return;
+  init_plt_fde();
 
-    if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0)
-      return;
-
-    layout->add_eh_frame_for_plt(this,
-				 Eh_cie<size>::eh_frame_cie,
-				 sizeof (Eh_cie<size>::eh_frame_cie),
-				 default_fde,
-				 sizeof (default_fde));
-    this->eh_frame_added_ = true;
-  }
+  // Add .eh_frame info for this stub section.
+  void
+  add_eh_frame(Layout* layout);
 
+  // Remove .eh_frame info for this stub section.
   void
-  remove_eh_frame(Layout* layout)
-  {
-    if (this->eh_frame_added_)
-      {
-	layout->remove_eh_frame_for_plt(this,
-					Eh_cie<size>::eh_frame_cie,
-					sizeof (Eh_cie<size>::eh_frame_cie),
-					default_fde,
-					sizeof (default_fde));
-	this->eh_frame_added_ = false;
-      }
-  }
+  remove_eh_frame(Layout* layout);
 
   Target_powerpc<size, big_endian>*
   targ() const
@@ -4356,7 +4412,12 @@ class Stub_table : public Output_relaxed_input_section
   plt_call_size(typename Plt_stub_entries::const_iterator p) const
   {
     if (size == 32)
-      return 16;
+      {
+	const Symbol* gsym = p->first.sym_;
+	if (this->targ_->is_tls_get_addr_opt(gsym))
+	  return 12 * 4;
+	return 4 * 4;
+      }
 
     bool is_iplt;
     Address plt_addr = this->plt_off(p, &is_iplt);
@@ -4370,6 +4431,9 @@ class Stub_table : public Output_relaxed_input_section
     got_addr += ppcobj->toc_base_offset();
     Address off = plt_addr - got_addr;
     unsigned int bytes = 4 * 4 + 4 * (ha(off) != 0);
+    const Symbol* gsym = p->first.sym_;
+    if (this->targ_->is_tls_get_addr_opt(gsym))
+      bytes += 13 * 4;
     if (this->targ_->abiversion() < 2)
       {
 	bool static_chain = parameters->options().plt_static_chain();
@@ -4379,6 +4443,12 @@ class Stub_table : public Output_relaxed_input_section
 		  + 8 * thread_safe
 		  + 4 * (ha(off + 8 + 8 * static_chain) != ha(off)));
       }
+    return bytes;
+  }
+
+  unsigned int
+  plt_call_align(unsigned int bytes) const
+  {
     unsigned int align = 1 << parameters->options().plt_align();
     if (align > 1)
       bytes = (bytes + align - 1) & -align;
@@ -4518,13 +4588,16 @@ class Stub_table : public Output_relaxed_input_section
   // a stub table, it is zero for the first few iterations, then
   // increases monotonically.
   Address min_size_threshold_;
-  // Whether .eh_frame info has been created for this stub section.
-  bool eh_frame_added_;
   // Set if this stub group needs a copy of out-of-line register
   // save/restore functions.
   bool need_save_res_;
   // Per stub table unique identifier.
   uint32_t uniq_;
+  // The bctrl in the __tls_get_addr_opt stub, if present.
+  unsigned int tls_get_addr_opt_bctrl_;
+  // FDE unwind info for this stub group.
+  unsigned int plt_fde_len_;
+  unsigned char plt_fde_[20];
 };
 
 // Add a plt call stub, if we do not already have one for this
@@ -4553,6 +4626,12 @@ Stub_table<size, big_endian>::add_plt_call_entry(
 	  p.first->second.localentry0_ = 1;
 	  this->targ_->set_has_localentry0();
 	}
+      if (this->targ_->is_tls_get_addr_opt(gsym))
+	{
+	  this->targ_->set_has_tls_get_addr_opt();
+	  this->tls_get_addr_opt_bctrl_ = this->plt_size_ - 5 * 4;
+	}
+      this->plt_size_ = this->plt_call_align(this->plt_size_);
     }
   if (size == 64
       && !tocsave
@@ -4578,6 +4657,7 @@ Stub_table<size, big_endian>::add_plt_call_entry(
   if (p.second)
     {
       this->plt_size_ = ent.off_ + this->plt_call_size(p.first);
+      this->plt_size_ = this->plt_call_align(this->plt_size_);
       if (size == 64
 	  && this->targ_->is_elfv2_localentry0(object, locsym_index))
 	{
@@ -4697,6 +4777,94 @@ Stub_table<size, big_endian>::find_long_branch_entry(
   return p->second;
 }
 
+// Generate a suitable FDE to describe code in this stub group.
+// The __tls_get_addr_opt call stub needs to describe where it saves
+// LR, to support exceptions that might be thrown from __tls_get_addr.
+
+template<int size, bool big_endian>
+void
+Stub_table<size, big_endian>::init_plt_fde()
+{
+  unsigned char* p = this->plt_fde_;
+  // offset pcrel sdata4, size udata4, and augmentation size byte.
+  memset (p, 0, 9);
+  p += 9;
+  if (this->tls_get_addr_opt_bctrl_ != -1u)
+    {
+      unsigned int to_bctrl = this->tls_get_addr_opt_bctrl_ / 4;
+      if (to_bctrl < 64)
+	*p++ = elfcpp::DW_CFA_advance_loc + to_bctrl;
+      else if (to_bctrl < 256)
+	{
+	  *p++ = elfcpp::DW_CFA_advance_loc1;
+	  *p++ = to_bctrl;
+	}
+      else if (to_bctrl < 65536)
+	{
+	  *p++ = elfcpp::DW_CFA_advance_loc2;
+	  elfcpp::Swap<16, big_endian>::writeval(p, to_bctrl);
+	  p += 2;
+	}
+      else
+	{
+	  *p++ = elfcpp::DW_CFA_advance_loc4;
+	  elfcpp::Swap<32, big_endian>::writeval(p, to_bctrl);
+	  p += 4;
+	}
+      *p++ = elfcpp::DW_CFA_offset_extended_sf;
+      *p++ = 65;
+      *p++ = -(this->targ_->stk_linker() / 8) & 0x7f;
+      *p++ = elfcpp::DW_CFA_advance_loc + 4;
+      *p++ = elfcpp::DW_CFA_restore_extended;
+      *p++ = 65;
+    }
+  this->plt_fde_len_ = p - this->plt_fde_;
+}
+
+// Add .eh_frame info for this stub section.  Unlike other linker
+// generated .eh_frame this is added late in the link, because we
+// only want the .eh_frame info if this particular stub section is
+// non-empty.
+
+template<int size, bool big_endian>
+void
+Stub_table<size, big_endian>::add_eh_frame(Layout* layout)
+{
+  if (!parameters->options().ld_generated_unwind_info())
+    return;
+
+  // Since we add stub .eh_frame info late, it must be placed
+  // after all other linker generated .eh_frame info so that
+  // merge mapping need not be updated for input sections.
+  // There is no provision to use a different CIE to that used
+  // by .glink.
+  if (!this->targ_->has_glink())
+    return;
+
+  if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0)
+    return;
+
+  this->init_plt_fde();
+  layout->add_eh_frame_for_plt(this,
+			       Eh_cie<size>::eh_frame_cie,
+			       sizeof (Eh_cie<size>::eh_frame_cie),
+			       this->plt_fde_, this->plt_fde_len_);
+}
+
+template<int size, bool big_endian>
+void
+Stub_table<size, big_endian>::remove_eh_frame(Layout* layout)
+{
+  if (this->plt_fde_len_ != 0)
+    {
+      layout->remove_eh_frame_for_plt(this,
+				      Eh_cie<size>::eh_frame_cie,
+				      sizeof (Eh_cie<size>::eh_frame_cie),
+				      this->plt_fde_, this->plt_fde_len_);
+      this->plt_fde_len_ = 0;
+    }
+}
+
 // A class to handle .glink.
 
 template<int size, bool big_endian>
@@ -4896,13 +5064,15 @@ Stub_table<size, big_endian>::define_stub_syms(Symbol_table* symtab)
 	      sprintf(localname, "%x", cs->first.locsym_);
 	      symname = localname;
 	    }
+	  else if (this->targ_->is_tls_get_addr_opt(cs->first.sym_))
+	    symname = this->targ_->tls_get_addr_opt()->name();
 	  else
 	    symname = cs->first.sym_->name();
 	  char* name = new char[8 + 10 + strlen(obj) + strlen(symname) + strlen(add) + 1];
 	  sprintf(name, "%08x.plt_call.%s%s%s", this->uniq_, obj, symname, add);
 	  Address value
 	    = this->stub_address() - this->address() + cs->second.off_;
-	  unsigned int stub_size = this->plt_call_size(cs);
+	  unsigned int stub_size = this->plt_call_align(this->plt_call_size(cs));
 	  this->targ_->define_local(symtab, name, this, value, stub_size);
 	}
     }
@@ -5011,6 +5181,33 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 		}
 
 	      p = oview + cs->second.off_;
+	      const Symbol* gsym = cs->first.sym_;
+	      if (this->targ_->is_tls_get_addr_opt(gsym))
+		{
+		  write_insn<big_endian>(p, ld_11_3 + 0);
+		  p += 4;
+		  write_insn<big_endian>(p, ld_12_3 + 8);
+		  p += 4;
+		  write_insn<big_endian>(p, mr_0_3);
+		  p += 4;
+		  write_insn<big_endian>(p, cmpdi_11_0);
+		  p += 4;
+		  write_insn<big_endian>(p, add_3_12_13);
+		  p += 4;
+		  write_insn<big_endian>(p, beqlr);
+		  p += 4;
+		  write_insn<big_endian>(p, mr_3_0);
+		  p += 4;
+		  if (!cs->second.localentry0_)
+		    {
+		      write_insn<big_endian>(p, mflr_11);
+		      p += 4;
+		      write_insn<big_endian>(p, (std_11_1
+						 + this->targ_->stk_linker()));
+		      p += 4;
+		    }
+		  use_fake_dep = thread_safe;
+		}
 	      if (ha(off) != 0)
 		{
 		  if (cs->second.r2save_)
@@ -5097,7 +5294,20 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 		      p += 4;
 		    }
 		}
-	      if (thread_safe && !use_fake_dep)
+	      if (!cs->second.localentry0_
+		  && this->targ_->is_tls_get_addr_opt(gsym))
+		{
+		  write_insn<big_endian>(p, bctrl);
+		  p += 4;
+		  write_insn<big_endian>(p, ld_2_1 + this->targ_->stk_toc());
+		  p += 4;
+		  write_insn<big_endian>(p, ld_11_1 + this->targ_->stk_linker());
+		  p += 4;
+		  write_insn<big_endian>(p, mtlr_11);
+		  p += 4;
+		  write_insn<big_endian>(p, blr);
+		}
+	      else if (thread_safe && !use_fake_dep)
 		{
 		  write_insn<big_endian>(p, cmpldi_2_0);
 		  p += 4;
@@ -5173,6 +5383,26 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 		plt_addr += plt_base;
 
 	      p = oview + cs->second.off_;
+	      const Symbol* gsym = cs->first.sym_;
+	      if (this->targ_->is_tls_get_addr_opt(gsym))
+		{
+		  write_insn<big_endian>(p, lwz_11_3 + 0);
+		  p += 4;
+		  write_insn<big_endian>(p, lwz_12_3 + 4);
+		  p += 4;
+		  write_insn<big_endian>(p, mr_0_3);
+		  p += 4;
+		  write_insn<big_endian>(p, cmpwi_11_0);
+		  p += 4;
+		  write_insn<big_endian>(p, add_3_12_2);
+		  p += 4;
+		  write_insn<big_endian>(p, beqlr);
+		  p += 4;
+		  write_insn<big_endian>(p, mr_3_0);
+		  p += 4;
+		  write_insn<big_endian>(p, nop);
+		  p += 4;
+		}
 	      if (parameters->options().output_is_position_independent())
 		{
 		  Address got_addr;
@@ -6225,7 +6455,7 @@ Target_powerpc<size, big_endian>::Scan::local(
     const elfcpp::Sym<size, big_endian>& lsym,
     bool is_discarded)
 {
-  this->maybe_skip_tls_get_addr_call(r_type, NULL);
+  this->maybe_skip_tls_get_addr_call(target, r_type, NULL);
 
   if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD)
       || (size == 32 && r_type == elfcpp::R_PPC_TLSGD))
@@ -6772,9 +7002,15 @@ Target_powerpc<size, big_endian>::Scan::global(
     unsigned int r_type,
     Symbol* gsym)
 {
-  if (this->maybe_skip_tls_get_addr_call(r_type, gsym) == Track_tls::SKIP)
+  if (this->maybe_skip_tls_get_addr_call(target, r_type, gsym)
+      == Track_tls::SKIP)
     return;
 
+  if (target->replace_tls_get_addr(gsym))
+    // Change a __tls_get_addr reference to __tls_get_addr_opt
+    // so dynamic relocs are emitted against the latter symbol.
+    gsym = target->tls_get_addr_opt();
+
   if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD)
       || (size == 32 && r_type == elfcpp::R_PPC_TLSGD))
     {
@@ -7904,6 +8140,8 @@ Target_powerpc<size, big_endian>::do_finalize_sections(
 	      odyn->add_section_plus_offset(elfcpp::DT_PPC_GOT,
 					    this->got_, this->got_->g_o_t());
 	    }
+	  if (this->has_tls_get_addr_opt_)
+	    odyn->add_constant(elfcpp::DT_PPC_OPT, elfcpp::PPC_OPT_TLS);
 	}
       else
 	{
@@ -7915,9 +8153,12 @@ Target_powerpc<size, big_endian>::do_finalize_sections(
 					    (this->glink_->pltresolve_size
 					     - 32));
 	    }
-	  if (this->has_localentry0_)
+	  if (this->has_localentry0_ || this->has_tls_get_addr_opt_)
 	    odyn->add_constant(elfcpp::DT_PPC64_OPT,
-			       elfcpp::PPC64_OPT_LOCALENTRY);
+			       ((this->has_localentry0_
+				 ? elfcpp::PPC64_OPT_LOCALENTRY : 0)
+				| (this->has_tls_get_addr_opt_
+				   ? elfcpp::PPC64_OPT_TLS : 0)));
 	}
     }
 
@@ -8035,9 +8276,12 @@ Target_powerpc<size, big_endian>::Relocate::relocate(
   if (view == NULL)
     return true;
 
+  if (target->replace_tls_get_addr(gsym))
+    gsym = static_cast<const Sized_symbol<size>*>(target->tls_get_addr_opt());
+
   const elfcpp::Rela<size, big_endian> rela(preloc);
   unsigned int r_type = elfcpp::elf_r_type<size>(rela.get_r_info());
-  switch (this->maybe_skip_tls_get_addr_call(r_type, gsym))
+  switch (this->maybe_skip_tls_get_addr_call(target, r_type, gsym))
     {
     case Track_tls::NOT_EXPECTED:
       gold_error_at_location(relinfo, relnum, rela.get_r_offset(),
@@ -8162,8 +8406,8 @@ Target_powerpc<size, big_endian>::Relocate::relocate(
 	{
 	  typedef typename elfcpp::Swap<32, big_endian>::Valtype Valtype;
 	  Valtype* wv = reinterpret_cast<Valtype*>(view);
-	  bool can_plt_call = localentry0;
-	  if (!localentry0 && rela.get_r_offset() + 8 <= view_size)
+	  bool can_plt_call = localentry0 || target->is_tls_get_addr_opt(gsym);
+	  if (!can_plt_call && rela.get_r_offset() + 8 <= view_size)
 	    {
 	      Valtype insn = elfcpp::Swap<32, big_endian>::readval(wv);
 	      Valtype insn2 = elfcpp::Swap<32, big_endian>::readval(wv + 1);
diff --git a/gold/resolve.cc b/gold/resolve.cc
index 7c22606..5a35f34 100644
--- a/gold/resolve.cc
+++ b/gold/resolve.cc
@@ -917,6 +917,31 @@ Symbol_table::report_resolve_problem(bool is_error, const char* msg,
   gold_info("%s: %s: previous definition here", program_name, objname);
 }
 
+// Completely override existing symbol.  Everything bar name_,
+// version_, and is_forced_local_ flag are copied.  version_ is
+// cleared if from->version_ is clear.  Returns true if this symbol
+// should be forced local.
+bool
+Symbol::clone(const Symbol* from, size_t symbol_size)
+{
+  // Don't allow cloning after dynamic linking info is attached to symbols.
+  // We aren't prepared to merge such.
+  gold_assert(!this->has_symtab_index() && !from->has_symtab_index());
+  gold_assert(!this->has_dynsym_index() && !from->has_dynsym_index());
+  gold_assert(this->got_offsets_.get_list() == NULL
+	      && from->got_offsets_.get_list() == NULL);
+  gold_assert(!this->has_plt_offset() && !from->has_plt_offset());
+  const char* name = this->name_;
+  const char* version = this->version_;
+  bool was_forced_local = this->is_forced_local_;
+  memcpy (this, from, symbol_size);
+  this->name_ = name;
+  if (from->version_)
+    this->version_ = version;
+  this->is_forced_local_ = was_forced_local;
+  return !was_forced_local && from->is_forced_local_;
+}
+
 // A special case of should_override which is only called for a strong
 // defined symbol from a regular object file.  This is used when
 // defining special symbols.
diff --git a/gold/symtab.h b/gold/symtab.h
index 853c9fe..635405a 100644
--- a/gold/symtab.h
+++ b/gold/symtab.h
@@ -329,6 +329,11 @@ class Symbol
   set_in_reg()
   { this->in_reg_ = true; }
 
+  // Forget this symbol was seen in a regular object.
+  void
+  clear_in_reg()
+  { this->in_reg_ = false; }
+
   // Return whether this symbol has been seen in a dynamic object.
   bool
   in_dyn() const
@@ -893,6 +898,13 @@ class Symbol
   set_non_zero_localentry()
   { this->non_zero_localentry_ = true; }
 
+  // Completely override existing symbol.  Everything bar name_,
+  // version_, and is_forced_local_ flag are copied.  version_ is
+  // cleared if from->version_ is clear.  Returns true if this symbol
+  // should be forced local.
+  bool
+  clone(const Symbol* from, size_t symbol_size);
+
  protected:
   // Instances of this class should always be created at a specific
   // size.
@@ -1690,6 +1702,14 @@ class Symbol_table
   version_script() const
   { return version_script_; }
 
+  // Completely override existing symbol.
+  void
+  clone(Symbol* to, const Symbol* from, size_t symbol_size)
+  {
+    if (to->clone(from, symbol_size))
+      this->force_local(to);
+  }
+
  private:
   Symbol_table(const Symbol_table&);
   Symbol_table& operator=(const Symbol_table&);

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]