Allow more scripts without #!

This change modifies the fish safety check surrounding execve / spawn so
it can run shell scripts having concatenated binary content. We're using
the same safety check as FreeBSD /bin/sh [1] and the Z-shell [5].  POSIX
was recently revised to require this behavior:

    "The input file may be of any type, but the initial portion of the
     file intended to be parsed according to the shell grammar (XREF to
     XSH 2.10.2 Shell Grammar Rules) shall consist of characters and
     shall not contain the NUL character. The shell shall not enforce
     any line length limits."

    "Earlier versions of this standard required that input files to the
     shell be text files except that line lengths were unlimited.
     However, that was overly restrictive in relation to the fact that
     shells can parse a script without a trailing newline, and in
     relation to a common practice of concatenating a shell script
     ending with an 'exit' or 'exec $command' with a binary data payload
     to form a single-file self-extracting archive." [2] [3]

One example use case of such scripts, is the Cosmopolitan C Library [4]
which configuse the GNU Linker to output a polyglot shell+binary format
that runs on Linux / Mac / Windows / FreeBSD / OpenBSD / NetBSD / BIOS.

Fixes jart/cosmopolitan#88

[1] 9a1cd36331
[2] http://austingroupbugs.net/view.php?id=1250
[3] http://austingroupbugs.net/view.php?id=1226#c4394
[4] https://justine.lol/cosmopolitan/index.html
[5] 326d9c203b
This commit is contained in:
Justine Tunney 2021-03-08 16:29:45 -08:00 committed by ridiculousfish
parent df53d1415d
commit 0048730a67
2 changed files with 74 additions and 27 deletions

View file

@ -13,6 +13,7 @@
#ifdef HAVE_SPAWN_H #ifdef HAVE_SPAWN_H
#include <spawn.h> #include <spawn.h>
#endif #endif
#include <paths.h>
#include <stdio.h> #include <stdio.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <unistd.h> #include <unistd.h>
@ -62,6 +63,46 @@ enum class launch_result_t {
failed, failed,
} __warn_unused_type; } __warn_unused_type;
static bool is_thompson_shell_payload(const char *p, size_t n) {
if (!memchr(p, '\0', n)) return true;
bool haslower = false;
for (; *p; p++) {
if (islower(*p) || *p == '$' || *p == '`') {
haslower = true;
}
if (haslower && *p == '\n') {
return true;
}
}
return false;
}
/// This function checks the beginning of a file to see if it's safe to
/// pass to the system interpreter when execve() returns ENOEXEC.
///
/// The motivation is to be able to run classic shell scripts which
/// didn't have shebang, while protecting the user from accidentally
/// running a binary file which may corrupt terminal driver state. We
/// check for lowercase letters because the ASCII magic of binary files
/// is usually uppercase, e.g. PNG, JFIF, MZ, etc. These rules are also
/// flexible enough to permit scripts with concatenated binary content,
/// such as Actually Portable Executable.
bool is_thompson_shell_script(const char *path) {
int e = errno;
bool res = false;
int fd = open(path, O_RDONLY | O_NOCTTY);
if (fd != -1) {
char buf[256];
ssize_t got = read(fd, buf, sizeof(buf));
close(fd);
if (got != -1 && is_thompson_shell_payload(buf, got)) {
res = true;
}
}
errno = e;
return res;
}
/// This function is executed by the child process created by a call to fork(). It should be called /// This function is executed by the child process created by a call to fork(). It should be called
/// after \c child_setup_process. It calls execve to replace the fish process image with the command /// after \c child_setup_process. It calls execve to replace the fish process image with the command
/// specified in \c p. It never returns. Called in a forked child! Do not allocate memory, etc. /// specified in \c p. It never returns. Called in a forked child! Do not allocate memory, etc.
@ -71,36 +112,19 @@ enum class launch_result_t {
int err; int err;
// This function never returns, so we take certain liberties with constness. // This function never returns, so we take certain liberties with constness.
const auto envv = const_cast<char *const *>(cenvv); auto envv = const_cast<char **>(cenvv);
const auto argv = const_cast<char *const *>(cargv); auto argv = const_cast<char **>(cargv);
execve(actual_cmd, argv, envv); execve(actual_cmd, argv, envv);
err = errno; err = errno;
// Something went wrong with execve, check for a ":", and run /bin/sh if encountered. This is a // The shebang wasn't introduced until UNIX Seventh Edition, so if
// weird predecessor to the shebang that is still sometimes used since it is supported on // the kernel won't run the binary we hand it off to the intpreter
// Windows. OK to not use CLO_EXEC here because this is called after fork and the file is // after performing a binary safety check, recommended by POSIX: a
// immediately closed. // line needs to exist before the first \0 with a lowercase letter
int fd = open(actual_cmd, O_RDONLY); if (err == ENOEXEC && is_thompson_shell_script(actual_cmd)) {
if (fd >= 0) { *--argv = const_cast<char *>(_PATH_BSHELL);
char begin[1] = {0}; execve(_PATH_BSHELL, argv, envv);
ssize_t amt_read = read(fd, begin, 1);
close(fd);
if ((amt_read == 1) && (begin[0] == ':')) {
// Relaunch it with /bin/sh. Don't allocate memory, so if you have more args than this,
// update your silly script! Maybe this should be changed to be based on ARG_MAX
// somehow.
char sh_command[] = "/bin/sh";
char *argv2[128];
argv2[0] = sh_command;
for (size_t i = 1; i < sizeof argv2 / sizeof *argv2; i++) {
argv2[i] = argv[i - 1];
if (argv2[i] == nullptr) break;
}
execve(sh_command, argv2, envv);
}
} }
errno = err; errno = err;

View file

@ -3,6 +3,7 @@
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <paths.h>
#include <signal.h> #include <signal.h>
#include <stdio.h> #include <stdio.h>
#include <time.h> #include <time.h>
@ -39,6 +40,7 @@
/// Fork error message. /// Fork error message.
#define FORK_ERROR "Could not create child process - exiting" #define FORK_ERROR "Could not create child process - exiting"
extern bool is_thompson_shell_script(const char *path);
static char *get_interpreter(const char *command, char *buffer, size_t buff_size); static char *get_interpreter(const char *command, char *buffer, size_t buff_size);
/// Report the error code \p err for a failed setpgid call. /// Report the error code \p err for a failed setpgid call.
@ -300,7 +302,28 @@ posix_spawner_t::posix_spawner_t(const job_t *j, const dup2_list_t &dup2s) {
maybe_t<pid_t> posix_spawner_t::spawn(const char *cmd, char *const argv[], char *const envp[]) { maybe_t<pid_t> posix_spawner_t::spawn(const char *cmd, char *const argv[], char *const envp[]) {
if (get_error()) return none(); if (get_error()) return none();
pid_t pid = -1; pid_t pid = -1;
if (check_fail(posix_spawn(&pid, cmd, &*actions_, &*attr_, argv, envp))) return none(); if (check_fail(posix_spawn(&pid, cmd, &*actions_, &*attr_, argv, envp))) {
// The shebang wasn't introduced until UNIX Seventh Edition, so if
// the kernel won't run the binary we hand it off to the intpreter
// after performing a binary safety check, recommended by POSIX: a
// line needs to exist before the first \0 with a lowercase letter
if (error_ == ENOEXEC && is_thompson_shell_script(cmd)) {
error_ = 0;
size_t n = 0;
while (argv[n]) ++n;
std::unique_ptr<char *[]> argv2(new char *[1 + n + 1]);
char interp[] = _PATH_BSHELL;
argv2[0] = interp;
for (size_t i = 0; i < n + 1; ++i) {
argv2[i + 1] = argv[i];
}
if (check_fail(posix_spawn(&pid, interp, &*actions_, &*attr_, argv2.get(), envp))) {
return none();
}
} else {
return none();
}
}
return pid; return pid;
} }