This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PR gdb/15713 - errors from i386_linux_resume lead to lock-up


Sergio mentioned off list that this worked for him too, so I've
pushed it in.

Thanks,
-- 
Pedro Alves

On 05/21/2014 01:47 PM, Pedro Alves wrote:
> linux_nat_resume is not considering that linux_ops->to_resume may throw:
> 
>   /* Mark LWP as not stopped to prevent it from being continued by
>      linux_nat_resume_callback.  */
>   lp->stopped = 0;
> 
>   if (resume_many)
>     iterate_over_lwps (ptid, linux_nat_resume_callback, NULL);
> 
> If something within linux_nat_resume_callback, GDB leaves the lwp_info
> as if the inferior was resumed, while it actually wasn't.
> 
> A couple examples, there are possibly others:
> 
>  - i386_linux_resume calls target_read which calls QUIT.
>  - if the actual ptrace resumption fails in inf_ptrace_resume,
>    perror_with_name is called.
> 
> If the user tries to kill the inferior at this point (or quit, which
> offers to kill), GDB locks up trying to stop the lwp -- if it is
> already stopped no new waitpid event gets generated for it.
> 
> Fix this by setting the stopped flag earlier, as soon as we collect a
> stop event with waitpid, and clearing it always only after resuming
> the lwp successfully.
> 
> Tested on x86_64 Fedora 20.  Confirmed the lock-up disappears using a
> local hack that forces an error in inf_ptrace_resume.
> 
> Also fixes a little "set debug lin-lwp" annoyance.  Currently we always see:
> 
>  Continuing.
>  LLR: Preparing to resume process 6802, 0, inferior_ptid Thread 0x7ffff7fc7740 (LWP 6802)
>                                                                                 ^^^^^^^^
>  RC: Resuming sibling Thread 0x7ffff77c5700 (LWP 6807), 0, resume
>  RC: Resuming sibling Thread 0x7ffff7fc6700 (LWP 6806), 0, resume
>  RC: Not resuming sibling Thread 0x7ffff7fc7740 (LWP 6802) (not stopped)
>                                                  ^^^^^^^^^^^^^^^^^^^^^^^
>  LLR: PTRACE_CONT process 6802, 0 (resume event thread)
> 
> This patch gets rid of the "Not resuming sibling" line.
> 
> 2014-05-21  Pedro Alves  <palves@redhat.com>
> 
> 	PR gdb/15713
> 	* linux-nat.c (linux_nat_resume_callback): Rename the second
> 	parameter to 'except'.  Skip LP if it points to EXCEPT.
> 	(linux_nat_resume): Don't mark the event lwp as not stopped
> 	before resuming sibling lwps.  Instead ask
> 	linux_nat_resume_callback to skip the event lwp.  Mark it as not
> 	stopped after actually resuming it.
> 	(linux_handle_syscall_trap): Mark the lwp as not stopped after
> 	resuming it.
> 	(wait_lwp): Mark the lwp as stopped here.
> 	(stop_wait_callback): Mark the lwp as not stopped right after
> 	resuming it.  Don't mark lwps as stopped here.
> 	(linux_nat_filter_event): Mark the lwp as stopped earlier.
> 	(linux_nat_wait_1): Don't mark dead lwps as stopped here.
> ---
>  gdb/linux-nat.c | 41 ++++++++++++++++++-----------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)
> 
> diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
> index e84ee95..0d92b25 100644
> --- a/gdb/linux-nat.c
> +++ b/gdb/linux-nat.c
> @@ -1642,13 +1642,17 @@ resume_lwp (struct lwp_info *lp, int step, enum gdb_signal signo)
>      }
>  }
>  
> -/* Resume LWP, with the last stop signal, if it is in pass state.  */
> +/* Callback for iterate_over_lwps.  If LWP is EXCEPT, do nothing.
> +   Resume LWP with the last stop signal, if it is in pass state.  */
>  
>  static int
> -linux_nat_resume_callback (struct lwp_info *lp, void *data)
> +linux_nat_resume_callback (struct lwp_info *lp, void *except)
>  {
>    enum gdb_signal signo = GDB_SIGNAL_0;
>  
> +  if (lp == except)
> +    return 0;
> +
>    if (lp->stopped)
>      {
>        struct thread_info *thread;
> @@ -1764,12 +1768,8 @@ linux_nat_resume (struct target_ops *ops,
>        return;
>      }
>  
> -  /* Mark LWP as not stopped to prevent it from being continued by
> -     linux_nat_resume_callback.  */
> -  lp->stopped = 0;
> -
>    if (resume_many)
> -    iterate_over_lwps (ptid, linux_nat_resume_callback, NULL);
> +    iterate_over_lwps (ptid, linux_nat_resume_callback, lp);
>  
>    /* Convert to something the lower layer understands.  */
>    ptid = pid_to_ptid (ptid_get_lwp (lp->ptid));
> @@ -1778,6 +1778,7 @@ linux_nat_resume (struct target_ops *ops,
>      linux_nat_prepare_to_resume (lp);
>    linux_ops->to_resume (linux_ops, ptid, step, signo);
>    lp->stopped_by_watchpoint = 0;
> +  lp->stopped = 0;
>  
>    if (debug_linux_nat)
>      fprintf_unfiltered (gdb_stdlog,
> @@ -1864,6 +1865,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping)
>  
>        lp->syscall_state = TARGET_WAITKIND_IGNORE;
>        ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0);
> +      lp->stopped = 0;
>        return 1;
>      }
>  
> @@ -1947,6 +1949,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping)
>      linux_nat_prepare_to_resume (lp);
>    linux_ops->to_resume (linux_ops, pid_to_ptid (ptid_get_lwp (lp->ptid)),
>  			lp->step, GDB_SIGNAL_0);
> +  lp->stopped = 0;
>    return 1;
>  }
>  
> @@ -2156,7 +2159,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
>  	  linux_ops->to_resume (linux_ops,
>  				pid_to_ptid (ptid_get_lwp (lp->ptid)),
>  				0, GDB_SIGNAL_0);
> -
> +	  lp->stopped = 0;
>  	  return 1;
>  	}
>  
> @@ -2311,6 +2314,7 @@ wait_lwp (struct lwp_info *lp)
>      }
>  
>    gdb_assert (WIFSTOPPED (status));
> +  lp->stopped = 1;
>  
>    /* Handle GNU/Linux's syscall SIGTRAPs.  */
>    if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
> @@ -2564,6 +2568,7 @@ stop_wait_callback (struct lwp_info *lp, void *data)
>  
>  	  errno = 0;
>  	  ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0);
> +	  lp->stopped = 0;
>  	  if (debug_linux_nat)
>  	    fprintf_unfiltered (gdb_stdlog,
>  				"PTRACE_CONT %s, 0, 0 (%s) "
> @@ -2590,9 +2595,7 @@ stop_wait_callback (struct lwp_info *lp, void *data)
>  
>  	  /* Save the sigtrap event.  */
>  	  lp->status = status;
> -	  gdb_assert (!lp->stopped);
>  	  gdb_assert (lp->signalled);
> -	  lp->stopped = 1;
>  	}
>        else
>  	{
> @@ -2604,8 +2607,6 @@ stop_wait_callback (struct lwp_info *lp, void *data)
>  				"SWC: Delayed SIGSTOP caught for %s.\n",
>  				target_pid_to_str (lp->ptid));
>  
> -	  lp->stopped = 1;
> -
>  	  /* Reset SIGNALLED only after the stop_wait_callback call
>  	     above as it does gdb_assert on SIGNALLED.  */
>  	  lp->signalled = 0;
> @@ -2933,6 +2934,10 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p)
>    if (!WIFSTOPPED (status) && !lp)
>      return NULL;
>  
> +  /* This LWP is stopped now.  (And if dead, this prevents it from
> +     ever being continued.)  */
> +  lp->stopped = 1;
> +
>    /* Handle GNU/Linux's syscall SIGTRAPs.  */
>    if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
>      {
> @@ -2975,7 +2980,6 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p)
>  	 used.  */
>        if (ptid_get_pid (lp->ptid) == ptid_get_lwp (lp->ptid))
>  	{
> -	  lp->stopped = 1;
>  	  iterate_over_lwps (pid_to_ptid (ptid_get_pid (lp->ptid)),
>  			     stop_and_resume_callback, new_pending_p);
>  	}
> @@ -3320,13 +3324,9 @@ retry:
>  				     " cancelled it\n",
>  				     ptid_get_lwp (lp->ptid));
>  			}
> -		      lp->stopped = 1;
>  		    }
>  		  else
> -		    {
> -		      lp->stopped = 1;
> -		      lp->signalled = 0;
> -		    }
> +		    lp->signalled = 0;
>  		}
>  	      else if (WIFEXITED (lp->status) || WIFSIGNALED (lp->status))
>  		{
> @@ -3343,11 +3343,6 @@ retry:
>  		     pending for the next time we're able to report
>  		     it.  */
>  
> -		  /* Prevent trying to stop this thread again.  We'll
> -		     never try to resume it because it has a pending
> -		     status.  */
> -		  lp->stopped = 1;
> -
>  		  /* Dead LWP's aren't expected to reported a pending
>  		     sigstop.  */
>  		  lp->signalled = 0;
> 


-- 
Pedro Alves


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]