Revert "Stop reaping children from SIGCHLD signal handler"

This reverts commit 3fe1069219.

In light of #1768
This commit is contained in:
ridiculousfish 2014-10-21 11:33:22 -07:00
parent 21a751d153
commit 315ff1e712
2 changed files with 43 additions and 96 deletions

138
proc.cpp
View file

@ -569,86 +569,34 @@ io_chain_t job_t::all_io_redirections() const
return result; return result;
} }
typedef unsigned int process_generation_count_t; /* This is called from a signal handler */
/* A static value tracking how many SIGCHLDs we have seen. This is only ever modified from within the SIGCHLD signal handler, and therefore does not need atomics or locks */
static volatile process_generation_count_t s_sigchld_generation_count = 0;
/* If we have received a SIGCHLD signal, process any children. If await is false, this returns immediately if no SIGCHLD has been received. If await is true, this waits for one. Returns true if something was processed. This returns the number of children processed, or -1 on error. */
static int process_mark_finished_children(bool wants_await)
{
ASSERT_IS_MAIN_THREAD();
/* A static value tracking the SIGCHLD gen count at the time we last processed it. When this is different from s_sigchld_generation_count, it indicates there may be unreaped processes. There may not be if we reaped them via the other waitpid path. This is only ever modified from the main thread, and not from a signal handler. */
static process_generation_count_t s_last_processed_sigchld_generation_count = 0;
int processed_count = 0;
bool got_error = false;
/* The critical read. This fetches a value which is only written in the signal handler. This needs to be an atomic read (we'd use sig_atomic_t, if we knew that were unsigned - fortunately aligned unsigned int is atomic on pretty much any modern chip.) It also needs to occur before we start reaping, since the signal handler can be invoked at any point. */
const process_generation_count_t local_count = s_sigchld_generation_count;
/* Compute how many children to process. This is an upper bound - there may be fewer than this, either if someone else called waitpid(), or if some of the SIGCHLDs were from stops instead of deaths. Note that overflow is a remote but real possibility, which is why the gencount must be unsigned, so that the subtraction is always well defined. */
unsigned to_process = local_count - s_last_processed_sigchld_generation_count;
/* If we are awaiting, always process at least one */
if (to_process < 1 && wants_await)
{
to_process = 1;
}
if (to_process > 0)
{
/* Call waitpid that many times, until we get 0/ECHILD. Note that we expect this to be 0 most of the time. Note also that the condition is strictly unnecessary - the WNOHANG ensures that we'll always exit eventually, but the loop allows us to completely avoid system calls in the common case. */
for (unsigned i=0; i < to_process; i++)
{
/* If we wait, it's only on the first iteration. So we want to set NOHANG (don't wait) unless wants_await is true and this is the first iteration. */
int options = WUNTRACED;
if (! (wants_await && i == 0))
{
options |= WNOHANG;
}
int status = -1;
pid_t pid = waitpid(-1, &status, options);
if (pid > 0)
{
/* We got a valid pid */
handle_child_status(pid, status);
processed_count += 1;
}
else if (pid == 0)
{
/* No ready-and-waiting children, we're done */
break;
}
else
{
/* This indicates an error. One likely failure is ECHILD (no children), which we break on, and is not considered an error - that means that we were awaiting, but someone else already reaped the children. The other likely failure is EINTR, which means we got a signal, which is considered an error. Either way we return -1. */
got_error = (errno != ECHILD);
break;
}
}
/* Remember the reaped count unless we errored */
if (! got_error)
{
s_last_processed_sigchld_generation_count = local_count;
}
}
return got_error ? -1 : processed_count;
}
/* This is called from a signal handler. The signal is always SIGCHLD. */
void job_handle_signal(int signal, siginfo_t *info, void *con) void job_handle_signal(int signal, siginfo_t *info, void *con)
{ {
/* This is the only place that this generation count is modified. It's OK if it overflows. */
s_sigchld_generation_count += 1; int status;
pid_t pid;
int errno_old = errno;
got_signal = 1; got_signal = 1;
/* pca: I can't justify this kill() call */ // write( 2, "got signal\n", 11 );
int errno_old = errno;
while (1)
{
switch (pid=waitpid(-1,&status,WUNTRACED|WNOHANG))
{
case 0:
case -1:
{
errno=errno_old;
return;
}
default:
handle_child_status(pid, status);
break;
}
}
kill(0, SIGIO); kill(0, SIGIO);
errno=errno_old; errno=errno_old;
} }
@ -687,19 +635,20 @@ int job_reap(bool interactive)
job_t *jnext; job_t *jnext;
int found=0; int found=0;
/* job_reap may fire an event handler, we do not want to call ourselves recursively (to avoid infinite recursion). */ static int locked = 0;
static bool locked = false;
if (locked) locked++;
{
return 0;
}
locked = true;
process_mark_finished_children(false);
/* Preserve the exit status */ /* Preserve the exit status */
const int saved_status = proc_get_last_status(); const int saved_status = proc_get_last_status();
/*
job_read may fire an event handler, we do not want to call
ourselves recursively (to avoid infinite recursion).
*/
if (locked>1)
return 0;
job_iterator_t jobs; job_iterator_t jobs;
jnext = jobs.next(); jnext = jobs.next();
while (jnext) while (jnext)
@ -810,7 +759,7 @@ int job_reap(bool interactive)
/* Restore the exit status. */ /* Restore the exit status. */
proc_set_last_status(saved_status); proc_set_last_status(saved_status);
locked = false; locked = 0;
return found; return found;
} }
@ -1204,14 +1153,6 @@ void job_continue(job_t *j, bool cont)
case 1: case 1:
{ {
read_try(j); read_try(j);
process_mark_finished_children(false);
break;
}
case 0:
{
/* No FDs are ready. Look for finished processes. */
process_mark_finished_children(false);
break; break;
} }
@ -1225,8 +1166,13 @@ void job_continue(job_t *j, bool cont)
improvement on my 300 MHz machine) on improvement on my 300 MHz machine) on
short-lived jobs. short-lived jobs.
*/ */
int processed = process_mark_finished_children(true); int status;
if (processed < 0) pid_t pid = waitpid(-1, &status, WUNTRACED);
if (pid > 0)
{
handle_child_status(pid, status);
}
else
{ {
/* /*
This probably means we got a This probably means we got a

1
proc.h
View file

@ -599,4 +599,5 @@ void proc_pop_interactive();
*/ */
int proc_format_status(int status); int proc_format_status(int status);
#endif #endif