Revert "Changed how process groups are assigned to child processes"

This reverts commit 25afc9b377.
It was meant for the major branch.
This commit is contained in:
Kurtis Rader 2017-08-13 15:28:48 -07:00
parent 8d53b72e46
commit 9d5990eda7
5 changed files with 125 additions and 120 deletions

View file

@ -21,7 +21,6 @@
#include <algorithm>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <type_traits>
#include <vector>
@ -384,6 +383,21 @@ void exec_job(parser_t &parser, job_t *j) {
return;
}
auto unblock_pid = [] (pid_t blocked_pid) {
//this is correct, except there's a race condition if the child hasn't yet SIGSTOP'd
//in that case, they never receive the SIGCONT
pid_t pid_status{};
if (waitpid(blocked_pid, &pid_status, WUNTRACED) != -1) {
// if (WIFSTOPPED(pid_status)) {
debug(2, L"Unblocking process %d.\n", blocked_pid);
kill(blocked_pid, SIGCONT);
return true;
// }
}
debug(2, L"waitpid call in unblock_pid failed!\n");
return false;
};
debug(4, L"Exec job '%ls' with id %d", j->command_wcstr(), j->job_id);
// Verify that all IO_BUFFERs are output. We used to support a (single, hacked-in) magical input
@ -471,14 +485,14 @@ void exec_job(parser_t &parser, job_t *j) {
if (keepalive.pid == 0) {
// Child
keepalive.pid = getpid();
child_set_group(j, &keepalive);
set_child_group(j, &keepalive, 1);
pause();
exit_without_destructors(0);
} else {
// Parent
debug(2, L"Fork #%d, pid %d: keepalive fork for '%ls'", g_fork_count, keepalive.pid,
j->command_wcstr());
set_child_group(j, keepalive.pid);
set_child_group(j, &keepalive, 0);
}
}
@ -513,8 +527,7 @@ void exec_job(parser_t &parser, job_t *j) {
// See if we need a pipe.
const bool pipes_to_next_command = !p->is_last_in_job;
//set to true if we end up forking for this process
bool child_forked = false;
bool command_blocked = false;
// The pipes the current process write to and read from. Unfortunately these can't be just
// allocated on the stack, since j->io wants shared_ptr.
@ -619,16 +632,12 @@ void exec_job(parser_t &parser, job_t *j) {
// This is the IO buffer we use for storing the output of a block or function when it is in
// a pipeline.
shared_ptr<io_buffer_t> block_output_io_buffer;
auto unblock_previous = [&j] () {
//we've already called waitpid after forking the child, so we've guaranteed that they're
//already SIGSTOP'd. Otherwise we'd be risking a deadlock because we can call SIGCONT before
//they've actually stopped, and they'll remain suspended indefinitely.
auto unblock_previous = [&] () {
if (blocked_pid != -1) {
debug(2, L"Unblocking process %d.\n", blocked_pid);
kill(blocked_pid, SIGCONT);
//now that next command in the chain has been started, unblock the previous command
unblock_pid(blocked_pid);
blocked_pid = -1;
}
blocked_pid = -1;
};
// This is the io_streams we pass to internal builtins.
@ -884,10 +893,14 @@ void exec_job(parser_t &parser, job_t *j) {
// This is the child process. Write out the contents of the pipeline.
p->pid = getpid();
setup_child_process(j, p, process_net_io_chain);
// Make child processes pause after executing setup_child_process() to give down-chain
// commands in the job a chance to join their process group and read their pipes.
// Start child processes that are part of a chain in a stopped state
// to ensure that they are still running when the next command in the
// chain is started.
// The process will be resumed when the next command in the chain is started.
kill(p->pid, SIGSTOP);
// Note that this may span multiple jobs, as jobs can read from each other.
if (pipes_to_next_command) {
kill(p->pid, SIGSTOP);
}
exec_write_and_exit(block_output_io_buffer->fd, buffer, count, status);
} else {
@ -895,9 +908,14 @@ void exec_job(parser_t &parser, job_t *j) {
// possibly give it control over the terminal.
debug(2, L"Fork #%d, pid %d: internal block or function for '%ls'",
g_fork_count, pid, p->argv0());
child_forked = true;
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
if (pipes_to_next_command) {
//it actually blocked itself after forking above, but print in here for output
//synchronization & so we can assign command_blocked in the correct address space
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
command_blocked = true;
}
p->pid = pid;
set_child_group(j, p, 0);
}
} else {
@ -1012,11 +1030,14 @@ void exec_job(parser_t &parser, job_t *j) {
// stdout and stderr, and then exit.
p->pid = getpid();
setup_child_process(j, p, process_net_io_chain);
// Make child processes pause after executing setup_child_process() to give down-chain
// commands in the job a chance to join their process group and read their pipes.
// Start child processes that are part of a chain in a stopped state
// to ensure that they are still running when the next command in the
// chain is started.
// The process will be resumed when the next command in the chain is started.
kill(p->pid, SIGSTOP);
// Note that this may span multiple jobs, as jobs can read from each other.
if (pipes_to_next_command) {
kill(p->pid, SIGSTOP);
}
do_builtin_io(outbuff, outbuff_len, errbuff, errbuff_len);
exit_without_destructors(p->status);
} else {
@ -1024,9 +1045,15 @@ void exec_job(parser_t &parser, job_t *j) {
// possibly give it control over the terminal.
debug(2, L"Fork #%d, pid %d: internal builtin for '%ls'", g_fork_count, pid,
p->argv0());
child_forked = true;
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
if (pipes_to_next_command) {
//it actually blocked itself after forking above, but print in here for output
//synchronization & so we can assign command_blocked in the correct address space
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
command_blocked = true;
}
p->pid = pid;
set_child_group(j, p, 0);
}
}
@ -1100,10 +1127,15 @@ void exec_job(parser_t &parser, job_t *j) {
// This is the child process.
p->pid = getpid();
setup_child_process(j, p, process_net_io_chain);
// Make child processes pause after executing setup_child_process() to give down-chain
// commands in the job a chance to join their process group and read their pipes.
// Start child processes that are part of a chain in a stopped state
// to ensure that they are still running when the next command in the
// chain is started.
// The process will be resumed when the next command in the chain is started.
kill(p->pid, SIGSTOP);
// Note that this may span multiple jobs, as jobs can read from each other.
if (pipes_to_next_command) {
kill(p->pid, SIGSTOP);
}
safe_launch_process(p, actual_cmd, argv, envv);
// safe_launch_process _never_ returns...
@ -1115,14 +1147,19 @@ void exec_job(parser_t &parser, job_t *j) {
job_mark_process_as_failed(j, p);
exec_error = true;
}
child_forked = true;
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
if (pipes_to_next_command) {
//it actually blocked itself after forking above, but print in here for output
//synchronization & so we can assign command_blocked in the correct address space
debug(2, L"Blocking process %d waiting for next command in chain.\n", pid);
command_blocked = true;
}
}
}
// This is the parent process. Store away information on the child, and possibly
// fice it control over the terminal.
p->pid = pid;
set_child_group(j, p, 0);
break;
}
@ -1133,31 +1170,10 @@ void exec_job(parser_t &parser, job_t *j) {
}
}
if (child_forked) {
//we have to wait to ensure the child has set their progress group and is in SIGSTOP state
//otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be blocked indefinitely.
pid_t pid_status{};
if (waitpid(p->pid, &pid_status, WUNTRACED) != -1) {
//the child is SIGSTOP'd and is guaranteed to have called child_set_group() at this point.
set_child_group(j, p->pid); //update our own process group info to match
//we are not unblocking the child via SIGCONT just yet to give the next process a chance to open
//the pipes and join the process group. We only unblock the last process in the job chain because
//no one awaits it.
if (!pipes_to_next_command)
{
kill(p->pid, SIGCONT);
}
}
else {
debug(2, L"waitpid(%d) call in unblock_pid failed:!\n", p->pid);
wperror(L"waitpid");
}
}
//if the command we ran _before_ this one was SIGSTOP'd to let this one catch up, unblock it now.
//this must be after the wait_pid on the process we just started, if any.
unblock_previous();
if (child_forked) {
//if the command we ran before this one was SIGSTOP'd to let this one catch up, unblock it now.
unblock_previous();
if (command_blocked) {
//store the newly-blocked command's PID so that it can be SIGCONT'd once the next process
//in the chain is started. That may be in this job or in another job.
blocked_pid = p->pid;

View file

@ -59,18 +59,14 @@ static void debug_safe_int(int level, const char *format, int val) {
debug_safe(level, format, buff);
}
/// Called only by the child to set its own process group (possibly creating a new group in the
/// process if it is the first in a JOB_CONTROL job. The parent will wait for this to finish.
/// A process that isn't already in control of the terminal can't give itself control of the
/// terminal without hanging, but it's not right for the child to try and give itself control
/// from the very beginning because the parent may not have gotten around to doing so yet. Let
/// the parent figure it out; if the child doesn't have terminal control and it later tries to
/// read from the terminal, the kernel will send it SIGTTIN and it'll hang anyway.
/// The key here is that the parent should transfer control of the terminal (if appropriate)
/// prior to sending the child SIGCONT to wake it up to exec.
/// This function should be called by both the parent process and the child right after fork() has
/// been called. If job control is enabled, the child is put in the jobs group, and if the child is
/// also in the foreground, it is also given control of the terminal. When called in the parent
/// process, this function may fail, since the child might have already finished and called exit.
/// The parent process may safely ignore the exit status of this call.
///
/// Returns true on sucess, false on failiure.
bool child_set_group(job_t *j, process_t *p) {
bool set_child_group(job_t *j, process_t *p, int print_errors) {
bool retval = true;
if (j->get_flag(JOB_CONTROL)) {
@ -78,53 +74,32 @@ bool child_set_group(job_t *j, process_t *p) {
if (j->pgid == -2) {
j->pgid = p->pid;
}
int failure = setpgid(p->pid, j->pgid);
// TODO: Figure out why we're testing whether the pgid is correct after attempting to
// set it failed. This was added in commit 4e912ef8 from 2012-02-27.
failure = failure && getpgid(p->pid) != j->pgid;
if (failure) { //!OCLINT(collapsible if statements)
char pid_buff[128];
char job_id_buff[128];
char getpgid_buff[128];
char job_pgid_buff[128];
char argv0[64];
char command[64];
format_long_safe(pid_buff, p->pid);
format_long_safe(job_id_buff, j->job_id);
format_long_safe(getpgid_buff, getpgid(p->pid));
format_long_safe(job_pgid_buff, j->pgid);
narrow_string_safe(argv0, p->argv0());
narrow_string_safe(command, j->command_wcstr());
if (setpgid(p->pid, j->pgid)) { //!OCLINT(collapsible if statements)
// TODO: Figure out why we're testing whether the pgid is correct after attempting to
// set it failed. This was added in commit 4e912ef8 from 2012-02-27.
if (getpgid(p->pid) != j->pgid && print_errors) {
char pid_buff[128];
char job_id_buff[128];
char getpgid_buff[128];
char job_pgid_buff[128];
char argv0[64];
char command[64];
debug_safe(
1, "Could not send own process %s, '%s' in job %s, '%s' from group %s to group %s",
pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff);
format_long_safe(pid_buff, p->pid);
format_long_safe(job_id_buff, j->job_id);
format_long_safe(getpgid_buff, getpgid(p->pid));
format_long_safe(job_pgid_buff, j->pgid);
narrow_string_safe(argv0, p->argv0());
narrow_string_safe(command, j->command_wcstr());
safe_perror("setpgid");
retval = false;
}
} else {
//this is probably stays unused in the child
j->pgid = getpgrp();
}
debug_safe(
1, "Could not send process %s, '%s' in job %s, '%s' from group %s to group %s",
pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff);
return retval;
}
/// Called only by the parent only after a child forks and successfully calls child_set_group, guaranteeing
/// the job control process group has been created and that the child belongs to the correct process group.
/// Here we can update our job_t structure to reflect the correct process group in the case of JOB_CONTROL,
/// and we can give the new process group control of the terminal if it's to run in the foreground. Note that
/// we can guarantee the child won't try to read from the terminal before we've had a chance to run this code,
/// because we haven't woken them up with a SIGCONT yet.
bool set_child_group(job_t *j, pid_t child_pid) {
bool retval = true;
if (j->get_flag(JOB_CONTROL)) {
// New jobs have the pgid set to -2
if (j->pgid == -2) {
j->pgid = child_pid;
safe_perror("setpgid");
retval = false;
}
}
} else {
j->pgid = getpgrp();
@ -132,16 +107,33 @@ bool set_child_group(job_t *j, pid_t child_pid) {
if (j->get_flag(JOB_TERMINAL) && j->get_flag(JOB_FOREGROUND)) { //!OCLINT(early exit)
if (tcgetpgrp(STDIN_FILENO) == j->pgid) {
//we've already assigned the process group control of the terminal when the first process in the job
//was started. There's no need to do so again, and on some platforms this can cause an EPERM error.
//In addition, if we've given control of the terminal to a process group, attempting to call tcsetpgrp
//from the background will cause SIGTTOU to be sent to everything in our process group (unless we
//handle it)..
debug(4, L"Process group %d already has control of terminal\n", j->pgid);
}
else {
//no need to duplicate the code here, a function already exists that does just this
retval = terminal_give_to_job(j, false /*new job, so not continuing*/);
debug(4, L"Attempting bring process group to foreground via tcsetpgrp for job->pgid %d\n", j->pgid);
debug(4, L"caller session id: %d, pgid %d has session id: %d\n", getsid(0), j->pgid, getsid(j->pgid));
int result = -1;
errno = EINTR;
while (result == -1 && errno == EINTR) {
signal_block(true);
result = tcsetpgrp(STDIN_FILENO, j->pgid);
signal_unblock(true);
}
if (result == -1) {
if (errno == ENOTTY) redirect_tty_output();
if (print_errors) {
char job_id_buff[64];
char command_buff[64];
char job_pgid_buff[128];
format_long_safe(job_id_buff, j->job_id);
narrow_string_safe(command_buff, j->command_wcstr());
format_long_safe(job_pgid_buff, j->pgid);
debug_safe(1, "Could not send job %s ('%s') with pgid %s to foreground", job_id_buff,
command_buff, job_pgid_buff);
safe_perror("tcsetpgrp");
retval = false;
}
}
}
}
@ -250,10 +242,10 @@ static int handle_child_io(const io_chain_t &io_chain) {
}
int setup_child_process(job_t *j, process_t *p, const io_chain_t &io_chain) {
bool ok = false;
bool ok = true;
if (p) {
ok = child_set_group(j, p);
ok = set_child_group(j, p, 1);
}
if (ok) {

View file

@ -18,8 +18,7 @@ class io_chain_t;
class job_t;
class process_t;
bool set_child_group(job_t *j, pid_t child_pid); //called by parent
bool child_set_group(job_t *j, process_t *p); //called by child
bool set_child_group(job_t *j, process_t *p, int print_errors);
/// Initialize a new child process. This should be called right away after forking in the child
/// process. If job control is enabled for this job, the process is put in the process group of the

View file

@ -782,7 +782,7 @@ static void read_try(job_t *j) {
/// \param j The job to give the terminal to.
/// \param cont If this variable is set, we are giving back control to a job that has previously
/// been stopped. In that case, we need to set the terminal attributes to those saved in the job.
bool terminal_give_to_job(job_t *j, int cont) {
static bool terminal_give_to_job(job_t *j, int cont) {
errno = 0;
if (j->pgid == 0) {
debug(2, "terminal_give_to_job() returning early due to no process group");

View file

@ -365,5 +365,3 @@ void proc_pop_interactive();
int proc_format_status(int status);
#endif
bool terminal_give_to_job(job_t *j, int cont);