diff --git a/src/exec.cpp b/src/exec.cpp index a16d6ac28..ab13a5513 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -384,6 +383,21 @@ void exec_job(parser_t &parser, job_t *j) { return; } + auto unblock_pid = [] (pid_t blocked_pid) { + //this is correct, except there's a race condition if the child hasn't yet SIGSTOP'd + //in that case, they never receive the SIGCONT + pid_t pid_status{}; + if (waitpid(blocked_pid, &pid_status, WUNTRACED) != -1) { + // if (WIFSTOPPED(pid_status)) { + debug(2, L"Unblocking process %d.\n", blocked_pid); + kill(blocked_pid, SIGCONT); + return true; + // } + } + debug(2, L"waitpid call in unblock_pid failed!\n"); + return false; + }; + debug(4, L"Exec job '%ls' with id %d", j->command_wcstr(), j->job_id); // Verify that all IO_BUFFERs are output. We used to support a (single, hacked-in) magical input @@ -471,14 +485,14 @@ void exec_job(parser_t &parser, job_t *j) { if (keepalive.pid == 0) { // Child keepalive.pid = getpid(); - child_set_group(j, &keepalive); + set_child_group(j, &keepalive, 1); pause(); exit_without_destructors(0); } else { // Parent debug(2, L"Fork #%d, pid %d: keepalive fork for '%ls'", g_fork_count, keepalive.pid, j->command_wcstr()); - set_child_group(j, keepalive.pid); + set_child_group(j, &keepalive, 0); } } @@ -513,8 +527,7 @@ void exec_job(parser_t &parser, job_t *j) { // See if we need a pipe. const bool pipes_to_next_command = !p->is_last_in_job; - //set to true if we end up forking for this process - bool child_forked = false; + bool command_blocked = false; // The pipes the current process write to and read from. Unfortunately these can't be just // allocated on the stack, since j->io wants shared_ptr. @@ -619,16 +632,12 @@ void exec_job(parser_t &parser, job_t *j) { // This is the IO buffer we use for storing the output of a block or function when it is in // a pipeline. shared_ptr block_output_io_buffer; - - auto unblock_previous = [&j] () { - //we've already called waitpid after forking the child, so we've guaranteed that they're - //already SIGSTOP'd. Otherwise we'd be risking a deadlock because we can call SIGCONT before - //they've actually stopped, and they'll remain suspended indefinitely. + auto unblock_previous = [&] () { if (blocked_pid != -1) { - debug(2, L"Unblocking process %d.\n", blocked_pid); - kill(blocked_pid, SIGCONT); + //now that next command in the chain has been started, unblock the previous command + unblock_pid(blocked_pid); + blocked_pid = -1; } - blocked_pid = -1; }; // This is the io_streams we pass to internal builtins. @@ -884,10 +893,14 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. Write out the contents of the pipeline. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - // Make child processes pause after executing setup_child_process() to give down-chain - // commands in the job a chance to join their process group and read their pipes. + // Start child processes that are part of a chain in a stopped state + // to ensure that they are still running when the next command in the + // chain is started. // The process will be resumed when the next command in the chain is started. - kill(p->pid, SIGSTOP); + // Note that this may span multiple jobs, as jobs can read from each other. + if (pipes_to_next_command) { + kill(p->pid, SIGSTOP); + } exec_write_and_exit(block_output_io_buffer->fd, buffer, count, status); } else { @@ -895,9 +908,14 @@ void exec_job(parser_t &parser, job_t *j) { // possibly give it control over the terminal. debug(2, L"Fork #%d, pid %d: internal block or function for '%ls'", g_fork_count, pid, p->argv0()); - child_forked = true; - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + if (pipes_to_next_command) { + //it actually blocked itself after forking above, but print in here for output + //synchronization & so we can assign command_blocked in the correct address space + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + command_blocked = true; + } p->pid = pid; + set_child_group(j, p, 0); } } else { @@ -1012,11 +1030,14 @@ void exec_job(parser_t &parser, job_t *j) { // stdout and stderr, and then exit. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - // Make child processes pause after executing setup_child_process() to give down-chain - // commands in the job a chance to join their process group and read their pipes. + // Start child processes that are part of a chain in a stopped state + // to ensure that they are still running when the next command in the + // chain is started. // The process will be resumed when the next command in the chain is started. - kill(p->pid, SIGSTOP); - + // Note that this may span multiple jobs, as jobs can read from each other. + if (pipes_to_next_command) { + kill(p->pid, SIGSTOP); + } do_builtin_io(outbuff, outbuff_len, errbuff, errbuff_len); exit_without_destructors(p->status); } else { @@ -1024,9 +1045,15 @@ void exec_job(parser_t &parser, job_t *j) { // possibly give it control over the terminal. debug(2, L"Fork #%d, pid %d: internal builtin for '%ls'", g_fork_count, pid, p->argv0()); - child_forked = true; - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + if (pipes_to_next_command) { + //it actually blocked itself after forking above, but print in here for output + //synchronization & so we can assign command_blocked in the correct address space + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + command_blocked = true; + } p->pid = pid; + + set_child_group(j, p, 0); } } @@ -1100,10 +1127,15 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - // Make child processes pause after executing setup_child_process() to give down-chain - // commands in the job a chance to join their process group and read their pipes. + + // Start child processes that are part of a chain in a stopped state + // to ensure that they are still running when the next command in the + // chain is started. // The process will be resumed when the next command in the chain is started. - kill(p->pid, SIGSTOP); + // Note that this may span multiple jobs, as jobs can read from each other. + if (pipes_to_next_command) { + kill(p->pid, SIGSTOP); + } safe_launch_process(p, actual_cmd, argv, envv); // safe_launch_process _never_ returns... @@ -1115,14 +1147,19 @@ void exec_job(parser_t &parser, job_t *j) { job_mark_process_as_failed(j, p); exec_error = true; } - child_forked = true; - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + if (pipes_to_next_command) { + //it actually blocked itself after forking above, but print in here for output + //synchronization & so we can assign command_blocked in the correct address space + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); + command_blocked = true; + } } } // This is the parent process. Store away information on the child, and possibly // fice it control over the terminal. p->pid = pid; + set_child_group(j, p, 0); break; } @@ -1133,31 +1170,10 @@ void exec_job(parser_t &parser, job_t *j) { } } - if (child_forked) { - //we have to wait to ensure the child has set their progress group and is in SIGSTOP state - //otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be blocked indefinitely. - pid_t pid_status{}; - if (waitpid(p->pid, &pid_status, WUNTRACED) != -1) { - //the child is SIGSTOP'd and is guaranteed to have called child_set_group() at this point. - set_child_group(j, p->pid); //update our own process group info to match - //we are not unblocking the child via SIGCONT just yet to give the next process a chance to open - //the pipes and join the process group. We only unblock the last process in the job chain because - //no one awaits it. - if (!pipes_to_next_command) - { - kill(p->pid, SIGCONT); - } - } - else { - debug(2, L"waitpid(%d) call in unblock_pid failed:!\n", p->pid); - wperror(L"waitpid"); - } - } - //if the command we ran _before_ this one was SIGSTOP'd to let this one catch up, unblock it now. - //this must be after the wait_pid on the process we just started, if any. - unblock_previous(); - if (child_forked) { + //if the command we ran before this one was SIGSTOP'd to let this one catch up, unblock it now. + unblock_previous(); + if (command_blocked) { //store the newly-blocked command's PID so that it can be SIGCONT'd once the next process //in the chain is started. That may be in this job or in another job. blocked_pid = p->pid; diff --git a/src/postfork.cpp b/src/postfork.cpp index 6832d748e..1270dfee0 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -59,18 +59,14 @@ static void debug_safe_int(int level, const char *format, int val) { debug_safe(level, format, buff); } -/// Called only by the child to set its own process group (possibly creating a new group in the -/// process if it is the first in a JOB_CONTROL job. The parent will wait for this to finish. -/// A process that isn't already in control of the terminal can't give itself control of the -/// terminal without hanging, but it's not right for the child to try and give itself control -/// from the very beginning because the parent may not have gotten around to doing so yet. Let -/// the parent figure it out; if the child doesn't have terminal control and it later tries to -/// read from the terminal, the kernel will send it SIGTTIN and it'll hang anyway. -/// The key here is that the parent should transfer control of the terminal (if appropriate) -/// prior to sending the child SIGCONT to wake it up to exec. +/// This function should be called by both the parent process and the child right after fork() has +/// been called. If job control is enabled, the child is put in the jobs group, and if the child is +/// also in the foreground, it is also given control of the terminal. When called in the parent +/// process, this function may fail, since the child might have already finished and called exit. +/// The parent process may safely ignore the exit status of this call. /// /// Returns true on sucess, false on failiure. -bool child_set_group(job_t *j, process_t *p) { +bool set_child_group(job_t *j, process_t *p, int print_errors) { bool retval = true; if (j->get_flag(JOB_CONTROL)) { @@ -78,53 +74,32 @@ bool child_set_group(job_t *j, process_t *p) { if (j->pgid == -2) { j->pgid = p->pid; } - int failure = setpgid(p->pid, j->pgid); - // TODO: Figure out why we're testing whether the pgid is correct after attempting to - // set it failed. This was added in commit 4e912ef8 from 2012-02-27. - failure = failure && getpgid(p->pid) != j->pgid; - if (failure) { //!OCLINT(collapsible if statements) - char pid_buff[128]; - char job_id_buff[128]; - char getpgid_buff[128]; - char job_pgid_buff[128]; - char argv0[64]; - char command[64]; - format_long_safe(pid_buff, p->pid); - format_long_safe(job_id_buff, j->job_id); - format_long_safe(getpgid_buff, getpgid(p->pid)); - format_long_safe(job_pgid_buff, j->pgid); - narrow_string_safe(argv0, p->argv0()); - narrow_string_safe(command, j->command_wcstr()); + if (setpgid(p->pid, j->pgid)) { //!OCLINT(collapsible if statements) + // TODO: Figure out why we're testing whether the pgid is correct after attempting to + // set it failed. This was added in commit 4e912ef8 from 2012-02-27. + if (getpgid(p->pid) != j->pgid && print_errors) { + char pid_buff[128]; + char job_id_buff[128]; + char getpgid_buff[128]; + char job_pgid_buff[128]; + char argv0[64]; + char command[64]; - debug_safe( - 1, "Could not send own process %s, '%s' in job %s, '%s' from group %s to group %s", - pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff); + format_long_safe(pid_buff, p->pid); + format_long_safe(job_id_buff, j->job_id); + format_long_safe(getpgid_buff, getpgid(p->pid)); + format_long_safe(job_pgid_buff, j->pgid); + narrow_string_safe(argv0, p->argv0()); + narrow_string_safe(command, j->command_wcstr()); - safe_perror("setpgid"); - retval = false; - } - } else { - //this is probably stays unused in the child - j->pgid = getpgrp(); - } + debug_safe( + 1, "Could not send process %s, '%s' in job %s, '%s' from group %s to group %s", + pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff); - return retval; -} - -/// Called only by the parent only after a child forks and successfully calls child_set_group, guaranteeing -/// the job control process group has been created and that the child belongs to the correct process group. -/// Here we can update our job_t structure to reflect the correct process group in the case of JOB_CONTROL, -/// and we can give the new process group control of the terminal if it's to run in the foreground. Note that -/// we can guarantee the child won't try to read from the terminal before we've had a chance to run this code, -/// because we haven't woken them up with a SIGCONT yet. -bool set_child_group(job_t *j, pid_t child_pid) { - bool retval = true; - - if (j->get_flag(JOB_CONTROL)) { - // New jobs have the pgid set to -2 - if (j->pgid == -2) { - j->pgid = child_pid; + safe_perror("setpgid"); + retval = false; + } } } else { j->pgid = getpgrp(); @@ -132,16 +107,33 @@ bool set_child_group(job_t *j, pid_t child_pid) { if (j->get_flag(JOB_TERMINAL) && j->get_flag(JOB_FOREGROUND)) { //!OCLINT(early exit) if (tcgetpgrp(STDIN_FILENO) == j->pgid) { - //we've already assigned the process group control of the terminal when the first process in the job - //was started. There's no need to do so again, and on some platforms this can cause an EPERM error. - //In addition, if we've given control of the terminal to a process group, attempting to call tcsetpgrp - //from the background will cause SIGTTOU to be sent to everything in our process group (unless we - //handle it).. debug(4, L"Process group %d already has control of terminal\n", j->pgid); } else { - //no need to duplicate the code here, a function already exists that does just this - retval = terminal_give_to_job(j, false /*new job, so not continuing*/); + debug(4, L"Attempting bring process group to foreground via tcsetpgrp for job->pgid %d\n", j->pgid); + debug(4, L"caller session id: %d, pgid %d has session id: %d\n", getsid(0), j->pgid, getsid(j->pgid)); + int result = -1; + errno = EINTR; + while (result == -1 && errno == EINTR) { + signal_block(true); + result = tcsetpgrp(STDIN_FILENO, j->pgid); + signal_unblock(true); + } + if (result == -1) { + if (errno == ENOTTY) redirect_tty_output(); + if (print_errors) { + char job_id_buff[64]; + char command_buff[64]; + char job_pgid_buff[128]; + format_long_safe(job_id_buff, j->job_id); + narrow_string_safe(command_buff, j->command_wcstr()); + format_long_safe(job_pgid_buff, j->pgid); + debug_safe(1, "Could not send job %s ('%s') with pgid %s to foreground", job_id_buff, + command_buff, job_pgid_buff); + safe_perror("tcsetpgrp"); + retval = false; + } + } } } @@ -250,10 +242,10 @@ static int handle_child_io(const io_chain_t &io_chain) { } int setup_child_process(job_t *j, process_t *p, const io_chain_t &io_chain) { - bool ok = false; + bool ok = true; if (p) { - ok = child_set_group(j, p); + ok = set_child_group(j, p, 1); } if (ok) { diff --git a/src/postfork.h b/src/postfork.h index a90fd377b..0a82a4553 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -18,8 +18,7 @@ class io_chain_t; class job_t; class process_t; -bool set_child_group(job_t *j, pid_t child_pid); //called by parent -bool child_set_group(job_t *j, process_t *p); //called by child +bool set_child_group(job_t *j, process_t *p, int print_errors); /// Initialize a new child process. This should be called right away after forking in the child /// process. If job control is enabled for this job, the process is put in the process group of the diff --git a/src/proc.cpp b/src/proc.cpp index f6d338977..061aa2c96 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -782,7 +782,7 @@ static void read_try(job_t *j) { /// \param j The job to give the terminal to. /// \param cont If this variable is set, we are giving back control to a job that has previously /// been stopped. In that case, we need to set the terminal attributes to those saved in the job. -bool terminal_give_to_job(job_t *j, int cont) { +static bool terminal_give_to_job(job_t *j, int cont) { errno = 0; if (j->pgid == 0) { debug(2, "terminal_give_to_job() returning early due to no process group"); diff --git a/src/proc.h b/src/proc.h index b57b89c25..874fb8230 100644 --- a/src/proc.h +++ b/src/proc.h @@ -365,5 +365,3 @@ void proc_pop_interactive(); int proc_format_status(int status); #endif - -bool terminal_give_to_job(job_t *j, int cont);