Prevent hanging when restoring the foreground process group at exit

When fish starts, it notices which pgroup owns the tty, and then it
restores that pgroup's tty ownership when it exits. However if fish does
not own the tty, then (on Mac at least) the tcsetpgrp call triggers a
SIGSTOP and fish will hang while trying to exit.

The first change is to ignore SIGTTOU instead of defaulting it. This
prevents the hang; however it risks re-introducing #7060.

The second change somewhat mitigates the risk of the first: only do the
restore if the initial pgroup is different than fish's pgroup. This
prevents some useless calls which might potentially steal the tty from
another process (e.g. in #7060).
This commit is contained in:
ridiculousfish 2021-04-05 13:04:05 -07:00
parent 797fbbb5f5
commit fa890dc233
3 changed files with 112 additions and 6 deletions

View file

@ -1789,12 +1789,15 @@ void save_term_foreground_process_group() {
}
void restore_term_foreground_process_group_for_exit() {
if (initial_fg_process_group != -1) {
// This is called during shutdown and from a signal handler. We don't bother to complain on
// failure because doing so is unlikely to be noticed.
// However we want this to fail if we are not the tty owner (#7060), so clear our SIGTTOU
// handler to allow it to fail properly. Note that we are about to exit.
(void)signal(SIGTTOU, SIG_DFL);
// We wish to restore the tty to the initial owner. There's two ways this can go wrong:
// 1. We may steal the tty from someone else (#7060).
// 2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit.
// Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP.
// Note initial_fg_process_group == 0 is possible with Linux pid namespaces.
// This is called during shutdown and from a signal handler. We don't bother to complain on
// failure because doing so is unlikely to be noticed.
if (initial_fg_process_group > 0 && initial_fg_process_group != getpgrp()) {
(void)signal(SIGTTOU, SIG_IGN);
(void)tcsetpgrp(STDIN_FILENO, initial_fg_process_group);
}
}

View file

@ -20,6 +20,26 @@ static void become_foreground_then_print_stderr() {
fprintf(stderr, "become_foreground_then_print_stderr done\n");
}
static void nohup_wait() {
pid_t init_parent = getppid();
if (signal(SIGHUP, SIG_IGN)) {
perror("tcsetgrp");
exit(EXIT_FAILURE);
}
// Note: these silly close() calls are necessary to prevent our parent process (presumably fish)
// from getting stuck in the "E" state ("Trying to exit"). This appears to be a (kernel?) bug on
// macOS: the process is no longer running but is not a zombie either, and so cannot be reaped.
// It is unclear why closing these fds successfully works around this issue.
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
// To avoid leaving fish_test_helpers around, we exit once our parent changes, meaning the fish
// instance exited.
while (getppid() == init_parent) {
usleep(1000000 / 4);
}
}
static void report_foreground_loop() {
int was_fg = -1;
const auto grp = getpgrp();
@ -146,6 +166,7 @@ struct fth_command_t {
static fth_command_t s_commands[] = {
{"become_foreground_then_print_stderr", become_foreground_then_print_stderr,
"Claim the terminal (tcsetpgrp) and then print to stderr"},
{"nohup_wait", nohup_wait, "Ignore SIGHUP and just wait"},
{"report_foreground", report_foreground, "Report to stderr whether we own the terminal"},
{"report_foreground_loop", report_foreground_loop,
"Continually report to stderr whether we own the terminal"},

View file

@ -0,0 +1,82 @@
#!/usr/bin/env python3
from pexpect_helper import SpawnedProc
import subprocess
import sys
import signal
import time
import os
sp = SpawnedProc()
send, sendline, sleep, expect_prompt, expect_re = (
sp.send,
sp.sendline,
sp.sleep,
sp.expect_prompt,
sp.expect_re,
)
# Helper to print an error and exit.
def error_and_exit(text):
keys = sp.colors()
print("{RED}Test failed: {NORMAL}{TEXT}".format(TEXT=text, **keys))
sys.exit(1)
fish_pid = sp.spawn.pid
# Launch fish_test_helper.
expect_prompt()
exe_path = os.environ.get("fish_test_helper")
sp.sendline(exe_path + " nohup_wait")
# We expect it to transfer tty ownership to fish_test_helper.
sleep(0.1)
tty_owner = os.tcgetpgrp(sp.spawn.child_fd)
if fish_pid == tty_owner:
os.kill(fish_pid, signal.SIGKILL)
error_and_exit(
"Test failed: outer fish %d did not transfer tty owner to fish_test_helper"
% (fish_pid)
)
# Now we are going to tell fish to exit.
# It must not hang. But it might hang when trying to restore the tty.
os.kill(fish_pid, signal.SIGTERM)
# Loop a bit until the process exits (correct) or stops (incorrrect).
# When it exits it should be due to the SIGTERM that we sent it.
for i in range(10):
pid, status = os.waitpid(fish_pid, os.WUNTRACED | os.WNOHANG)
if pid == 0:
# No process ready yet, loop again.
sleep(0.1)
elif pid != fish_pid:
# Some other process exited (??)
os.kill(fish_pid, signal.SIGKILL)
error_and_exit(
"unexpected pid returned from waitpid. Got %d, expected %d"
% (pid, fish_pid)
)
elif os.WIFSTOPPED(status):
# Our pid stopped instead of exiting.
# Probably it stopped because of its tcsetpgrp call during exit.
# Kill it and report a failure.
os.kill(fish_pid, signal.SIGKILL)
error_and_exit("pid %d stopped instead of exiting on SIGTERM" % pid)
elif not os.WIFSIGNALED(status):
error_and_exit("pid %d did not signal-exit" % pid)
elif os.WTERMSIG(status) != signal.SIGTERM:
error_and_exit(
"pid %d signal-exited with %d instead of %d (SIGTERM)"
% (os.WTERMSIG(status), signal.SIGTERM)
)
else:
# Success!
sys.exit(0)
else:
# Our loop completed without the process being returned.
error_and_exit("fish with pid %d hung after SIGTERM" % fish_pid)
# Should never get here.
error_and_exit("unknown, should be unreachable")