This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SPU overlay manager fix and new ld param


This patch
a) replaces the fixed 2000 byte red zone stack allowance for
   --auto-overlay with a linker command line parameter,
b) makes the overlay manager interrupt safe,
c) and does not attempt to put any functions called by the spu
   interrupt handler into non-overlay local store, since we can't do
   that reliably in the presence of calls via function pointer.

bfd/
	* elf32-spu.c (struct spu_link_hash_table): Add extra_stack_space.
	(spu_elf_check_vma): Add extra_stack_space param, copy to htab.
	(spu_elf_auto_overlay): Use it.
	(RECURSE_UNMARK): Define as 0.
	(unmark_overlay_section): Heed RECURSE_UNMARK.
	* elf32-spu.h (spu_elf_check_vma): Update prototype.
ld/
	* emultempl/spuelf.em (extra_stack_space): New variable.
	(gld${EMULATION_NAME}_finish): Pass it to spu_elf_check_vma.
	(PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
	PARSE_AND_LIST_ARGS_CASES): Handle --extra-stack-space.
	* emultempl/spu_ovl.S: Mask interrupts during dma and update of
	overlay manager structures.
	* emultempl/spu_ovl.o: Regenerate.

diff -urp src.old/bfd/elf32-spu.c src/bfd/elf32-spu.c
--- src.old/bfd/elf32-spu.c	2008-06-07 21:50:20.000000000 +0930
+++ src/bfd/elf32-spu.c	2008-06-12 15:41:02.000000000 +0930
@@ -325,6 +325,11 @@ struct spu_link_hash_table
   unsigned int overlay_fixed;
   /* Local store --auto-overlay should reserve for stack and heap.  */
   unsigned int reserved;
+  /* If reserved is not specified, stack analysis will calculate a value
+     for the stack.  This parameter adjusts that value to allow for
+     negative sp access (the ABI says 2000 bytes below sp are valid,
+     and the overlay manager uses some of this area).  */
+  int extra_stack_space;
   /* Count of overlay stubs needed in non-overlay area.  */
   unsigned int non_ovly_stub;
 
@@ -1548,6 +1553,7 @@ spu_elf_check_vma (struct bfd_link_info 
 		   unsigned int hi,
 		   unsigned int overlay_fixed,
 		   unsigned int reserved,
+		   int extra_stack_space,
 		   void (*spu_elf_load_ovl_mgr) (void),
 		   FILE *(*spu_elf_open_overlay_script) (void),
 		   void (*spu_elf_relink) (void))
@@ -1562,6 +1568,7 @@ spu_elf_check_vma (struct bfd_link_info 
   htab->local_store = hi + 1 - lo;
   htab->overlay_fixed = overlay_fixed;
   htab->reserved = reserved;
+  htab->extra_stack_space = extra_stack_space;
   htab->spu_elf_load_ovl_mgr = spu_elf_load_ovl_mgr;
   htab->spu_elf_open_overlay_script = spu_elf_open_overlay_script;
   htab->spu_elf_relink = spu_elf_relink;
@@ -2923,6 +2930,11 @@ mark_overlay_section (struct function_in
   return TRUE;
 }
 
+/* If non-zero then unmark functions called from those within sections
+   that we need to unmark.  Unfortunately this isn't reliable since the
+   call graph cannot know the destination of function pointer calls.  */
+#define RECURSE_UNMARK 0
+
 struct _uos_param {
   asection *exclude_input_section;
   asection *exclude_output_section;
@@ -2950,9 +2962,10 @@ unmark_overlay_section (struct function_
       || fun->sec->output_section == uos_param->exclude_output_section)
     excluded = 1;
 
-  uos_param->clearing += excluded;
+  if (RECURSE_UNMARK)
+    uos_param->clearing += excluded;
 
-  if (uos_param->clearing)
+  if (RECURSE_UNMARK ? uos_param->clearing : excluded)
     {
       fun->sec->linker_mark = 0;
       if (fun->rodata)
@@ -2963,7 +2976,8 @@ unmark_overlay_section (struct function_
     if (!unmark_overlay_section (call->fun, info, param))
       return FALSE;
 
-  uos_param->clearing -= excluded;
+  if (RECURSE_UNMARK)
+    uos_param->clearing -= excluded;
   return TRUE;
 }
 
@@ -3574,7 +3588,7 @@ spu_elf_auto_overlay (struct bfd_link_in
       sum_stack_param.overall_stack = 0;
       if (!for_each_node (sum_stack, info, &sum_stack_param, TRUE))
 	goto err_exit;
-      htab->reserved = sum_stack_param.overall_stack + 2000;
+      htab->reserved = sum_stack_param.overall_stack + htab->extra_stack_space;
     }
   fixed_size += htab->reserved;
   fixed_size += htab->non_ovly_stub * OVL_STUB_SIZE;
diff -urp src.old/bfd/elf32-spu.h src/bfd/elf32-spu.h
--- src.old/bfd/elf32-spu.h	2008-03-28 17:24:57.000000000 +1030
+++ src/bfd/elf32-spu.h	2008-06-10 17:18:02.000000000 +0930
@@ -62,5 +62,5 @@ extern int spu_elf_size_stubs (struct bf
 extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int);
 extern asection *spu_elf_check_vma (struct bfd_link_info *, int,
 				    unsigned int, unsigned int, unsigned int,
-				    unsigned int, void (*) (void),
+				    unsigned int, int, void (*) (void),
 				    FILE *(*) (void), void (*) (void));
diff -urp src.old/ld/emultempl/spuelf.em src/ld/emultempl/spuelf.em
--- src.old/ld/emultempl/spuelf.em	2008-06-07 11:27:23.000000000 +0930
+++ src/ld/emultempl/spuelf.em	2008-06-12 15:22:57.000000000 +0930
@@ -50,6 +50,7 @@ static int auto_overlay = 0;
 static char *auto_overlay_file = 0;
 static unsigned int auto_overlay_fixed = 0;
 static unsigned int auto_overlay_reserved = 0;
+static int extra_stack_space = 2000;
 int my_argc;
 char **my_argv;
 
@@ -330,6 +331,7 @@ gld${EMULATION_NAME}_finish (void)
 	  s = spu_elf_check_vma (&link_info, auto_overlay,
 				 local_store_lo, local_store_hi,
 				 auto_overlay_fixed, auto_overlay_reserved,
+				 extra_stack_space,
 				 spu_elf_load_ovl_mgr,
 				 spu_elf_open_overlay_script,
 				 spu_elf_relink);
@@ -523,7 +525,8 @@ PARSE_AND_LIST_PROLOGUE='
 #define OPTION_SPU_OVERLAY_RODATA	(OPTION_SPU_AUTO_RELINK + 1)
 #define OPTION_SPU_FIXED_SPACE		(OPTION_SPU_OVERLAY_RODATA + 1)
 #define OPTION_SPU_RESERVED_SPACE	(OPTION_SPU_FIXED_SPACE + 1)
-#define OPTION_SPU_NO_AUTO_OVERLAY	(OPTION_SPU_RESERVED_SPACE + 1)
+#define OPTION_SPU_EXTRA_STACK		(OPTION_SPU_RESERVED_SPACE + 1)
+#define OPTION_SPU_NO_AUTO_OVERLAY	(OPTION_SPU_EXTRA_STACK + 1)
 '
 
 PARSE_AND_LIST_LONGOPTS='
@@ -539,6 +542,7 @@ PARSE_AND_LIST_LONGOPTS='
   { "overlay-rodata", no_argument, NULL, OPTION_SPU_OVERLAY_RODATA },
   { "fixed-space", required_argument, NULL, OPTION_SPU_FIXED_SPACE },
   { "reserved-space", required_argument, NULL, OPTION_SPU_RESERVED_SPACE },
+  { "extra-stack-space", required_argument, NULL, OPTION_SPU_EXTRA_STACK },
   { "no-auto-overlay", optional_argument, NULL, OPTION_SPU_NO_AUTO_OVERLAY },
 '
 
@@ -557,7 +561,10 @@ PARSE_AND_LIST_OPTIONS='
   --overlay-rodata            Place read-only data with associated function\n\
                               code in overlays.\n\
   --fixed-space=bytes         Local store for non-overlay code and data.\n\
-  --reserved-space=bytes      Local store for stack and heap.\n"
+  --reserved-space=bytes      Local store for stack and heap.  If not specified\n\
+                              ld will estimate stack size and assume no heap.\n\
+  --extra-stack-space=bytes   Space for negative sp access (default 2000) if\n\
+                              --reserved-space not given.\n"
 		   ));
 '
 
@@ -635,6 +642,15 @@ PARSE_AND_LIST_ARGS_CASES='
       }
       break;
 
+    case OPTION_SPU_EXTRA_STACK:
+      {
+	char *end;
+	extra_stack_space = strtol (optarg, &end, 0);
+	if (*end != 0)
+	  einfo (_("%P%F: invalid --extra-stack-space value `%s'\''\n"), optarg);
+      }
+      break;
+
     case OPTION_SPU_NO_AUTO_OVERLAY:
       auto_overlay = 0;
       if (optarg != NULL)
diff -urp src.old/ld/emultempl/spu_ovl.S src/ld/emultempl/spu_ovl.S
--- src.old/ld/emultempl/spu_ovl.S	2008-02-07 11:56:23.000000000 +1030
+++ src/ld/emultempl/spu_ovl.S	2008-06-12 15:42:54.000000000 +0930
@@ -67,11 +67,13 @@
 #define rv5		reserved4
 #define cgshuf		reserved4
 #define newovl		reserved4
+#define irqtmp1		reserved4
+#define irqtmp2		reserved4
 
 #define reserved5	$79
 #define target		reserved5
 
-#define save1		$72
+#define save1		$74
 #define rv4		save1
 #define rv7		save1
 #define tagid		save1
@@ -87,7 +89,7 @@
 #define oldovl		save2
 #define newvma		save2
 
-#define save3		$74
+#define save3		$72
 #define rv1		save3
 #define ea64		save3
 #define buf3		save3
@@ -95,6 +97,8 @@
 #define newmap		save3
 #define oldmask		save3
 
+#define save4		$71
+#define irq_stat	save4
 
 	.text
 	.align 	4
@@ -140,7 +144,12 @@ __ovly_return:
 #nop; lnop
 #nop
 	lqx	vma, tab1, off1					# 1,6	8
+#ifdef OVLY_IRQ_SAVE
+	nop
+	stqd	save4, -64($sp)					# 1,6	9
+#else
 #nop; lnop
+#endif
 #nop; lnop
 #nop; lnop
 #nop; lnop
@@ -168,7 +177,9 @@ ovly_ret9:
  * On entry $75 points to a word consisting of the overlay index in
  * the top 14 bits, and the target address in the bottom 18 bits.
  *
- * Sets up $lr to return via __ovly_return.
+ * Sets up $lr to return via __ovly_return.  If $lr is already set
+ * to return via __ovly_return, don't change it.  In that case we
+ * have a tail call from one overlay function to another.
  * Updates __ovly_current.
  */
 	.align  3
@@ -245,7 +256,11 @@ __ovly_load:
 	or	rv7, rv4, rv6					# 0,2	16
 	lqd	save2, -32($sp)					# 1,6	16
 	andi	present2, size2, 1				# 0,2	17
+#ifdef OVLY_IRQ_SAVE
+	stqd	save4, -64($sp)					# 1,6	17
+#else
 	lnop							# 1,0	17
+#endif
 	selb	$lr, rv7, $lr, rv5				# 0,2	18
 	lqd	save1, -16($sp)					# 1,6	18
 #nop
@@ -268,9 +283,19 @@ ovly_load9:
 	.type	__ovly_load_event, @function
 __ovly_load_event:
 do_load:
+#ifdef OVLY_IRQ_SAVE
+	ila	irqtmp1, do_load10				# 0,2	-5
+	rotqbyi	sz, vma, 8					# 1,4	-5
 #nop
-	rotqbyi	sz, vma, 8					# 1,4	0
+	rdch	irq_stat, $SPU_RdMachStat			# 1,6	-4
 #nop
+	bid	irqtmp1						# 1,4	-3
+do_load10:
+	nop
+#else
+#nop
+	rotqbyi	sz, vma, 8					# 1,4	0
+#endif
 	rotqbyi	osize, vma, 4					# 1,4	1
 #nop
 	lqa	ea64, _EAR_					# 1,6	2
@@ -410,14 +435,30 @@ __ovly_xfer_loop:
 #nop; lnop
 	andc	pbit, pbit, zovl				# 0,2	74
 	lqd	save2, -32($sp)					# 1,6	74
+#ifdef OVLY_IRQ_SAVE
+	ila	irqtmp2, do_load90				# 0,2	75
+#lnop
+	andi	irq_stat, irq_stat, 1				# 0,2	76
+#lnop
+#else
 #nop; lnop
 #nop; lnop
+#endif
 	andc	oldvma, oldvma, pbit				# 0,2	77
 	lqd	save1, -16($sp)					# 1,6	77
-#nop; lnop
-	nop
+	nop	       						# 0,0	78
+#lnop
+#nop
 	stqx	oldvma, tab5, off5				# 1,6	79
+#nop
+#ifdef OVLY_IRQ_SAVE
+	binze	irq_stat, irqtmp2				# 1,4	80
+do_load90:
+#nop
+	lqd	save4, -64($sp)					# 1,6	84
+#else
 #nop; lnop
+#endif
 
 	.global	_ovly_debug_event
 	.type	_ovly_debug_event, @function
@@ -425,6 +466,6 @@ _ovly_debug_event:
 	nop
 /* Branch to target address. */
 do_load99:
-	bi	target						# 1,4	81
+	bi	target						# 1,4	81/85
 
 	.size	__ovly_load, . - __ovly_load

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]