diff --git a/src/builtin_disown.cpp b/src/builtin_disown.cpp index 3c9822df7..510044803 100644 --- a/src/builtin_disown.cpp +++ b/src/builtin_disown.cpp @@ -36,7 +36,7 @@ static int disown_job(const wchar_t *cmd, parser_t &parser, io_streams_t &stream // within the context of a subjob which will cause the parent job to crash in exec_job(). // Instead, we set a flag and the parser removes the job from the jobs list later. j->mut_flags().disown_requested = true; - if (pgid) add_disowned_pgid(*pgid); + add_disowned_job(j); return STATUS_CMD_OK; } diff --git a/src/proc.cpp b/src/proc.cpp index f65651c38..9ab2ceb43 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -326,13 +326,24 @@ bool job_t::has_external_proc() const { /// we exit. Poll these from time-to-time to prevent zombie processes from happening (#5342). static owning_lock> s_disowned_pids; -void add_disowned_pgid(pid_t pgid) { - // NEVER add our own (or an invalid) pgid as they are not unique to only +void add_disowned_job(job_t *j) { + if (j == nullptr) return; + + // Never add our own (or an invalid) pgid as it is not unique to only // one job, and may result in a deadlock if we attempt the wait. - if (pgid != getpgrp() && pgid > 0) { + auto pgid = j->get_pgid(); + auto disowned_pids = s_disowned_pids.acquire(); + if (pgid && *pgid != getpgrp() && *pgid > 0) { // waitpid(2) is signalled to wait on a process group rather than a // process id by using the negative of its value. - s_disowned_pids.acquire()->push_back(pgid * -1); + disowned_pids->push_back(*pgid * -1); + } else { + // Instead, add the PIDs of any external processes + for (auto &process : j->processes) { + if (process->pid) { + disowned_pids->push_back(process->pid); + } + } } } @@ -341,8 +352,14 @@ static void reap_disowned_pids() { auto disowned_pids = s_disowned_pids.acquire(); auto try_reap1 = [](pid_t pid) { int status; - return waitpid(pid, &status, WNOHANG) > 0; + int ret = waitpid(pid, &status, WNOHANG); + if (ret > 0) { + FLOGF(proc_reap_external, "Reaped disowned PID or PGID %d", pid); + } + return ret; }; + // waitpid returns 0 iff the PID/PGID in question has not changed state; remove the pid/pgid + // if it has changed or an error occurs (presumably ECHILD because the child does not exist) disowned_pids->erase(std::remove_if(disowned_pids->begin(), disowned_pids->end(), try_reap1), disowned_pids->end()); } diff --git a/src/proc.h b/src/proc.h index 934eab2b5..f9ce8d50b 100644 --- a/src/proc.h +++ b/src/proc.h @@ -551,9 +551,9 @@ void hup_jobs(const job_list_t &jobs); /// \return 1 if transferred, 0 if no transfer was necessary, -1 on error. int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from_stopped); -/// Add a pid to the list of pids we wait on even though they are not associated with any jobs. -/// Used to avoid zombie processes after disown. -void add_disowned_pgid(pid_t pgid); +/// Add a job to the list of PIDs/PGIDs we wait on even though they are not associated with any +/// jobs. Used to avoid zombie processes after disown. +void add_disowned_job(job_t *j); bool have_proc_stat(); diff --git a/tests/checks/jobs.fish b/tests/checks/jobs.fish index ed72e827d..1ad32670d 100644 --- a/tests/checks/jobs.fish +++ b/tests/checks/jobs.fish @@ -1,4 +1,19 @@ #RUN: %fish %s + +# Verify zombies are not left by disown (#7183, #5342) +# Do this first to avoid colliding with the other disowned processes below, which may +# still be running at the end of the script +sleep 0.2 & +disown +sleep 0.2 +echo Trigger process reaping +#CHECK: Trigger process reaping +# The initial approach here was to kill the PID of the sleep process, which should +# be gone by the time we get here. Unfortunately, kill from procps on pre-2016 distributions +# does not print an error for non-existent PIDs, so instead look for zombies in this session +# (there should be none). +ps -o state | string match 'Z*' + jobs -q echo $status #CHECK: 1