diff --git a/CHANGELOG.md b/CHANGELOG.md index 986cffa19..2ff1b1ac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ -# fish 2.?.? (released ???) +# fish 2.6.0 (released ???) + +- The `read` command now has a default limit of 10 MiB. If a line is longer than that it will fail with $status set to 122 and the var will be empty. You can set a different limit by setting the FISH_READ_BYTE_LIMIT variable. --- diff --git a/doc_src/read.txt b/doc_src/read.txt index 5514bc848..9810bf661 100644 --- a/doc_src/read.txt +++ b/doc_src/read.txt @@ -7,7 +7,7 @@ read [OPTIONS] [VARIABLES...] \subsection read-description Description -`read` reads one line from standard input and stores the result in one or more shell variables. +`read` reads from standard input and stores the result in one or more shell variables. By default it reads one line terminated by a newline but options are available to read up to a null character and to limit each "line" to a maximum number of characters. The following options are available: @@ -19,7 +19,7 @@ The following options are available: - `-m NAME` or `--mode-name=NAME` specifies that the name NAME should be used to save/load the history file. If NAME is fish, the regular fish history will be available. -- `-n NCHARS` or `--nchars=NCHARS` causes `read` to return after reading NCHARS characters rather than waiting for a complete line of input. +- `-n NCHARS` or `--nchars=NCHARS` causes `read` to return after reading NCHARS characters rather than waiting for a complete line of input (either newline or null terminated). - `-p PROMPT_CMD` or `--prompt=PROMPT_CMD` uses the output of the shell command `PROMPT_CMD` as the prompt for the interactive mode. The default prompt command is set_color green; echo read; set_color normal; echo "> ". @@ -33,7 +33,7 @@ The following options are available: - `-x` or `--export` exports the variables to child processes. -- `-a` or `--array` stores the result as an array. +- `-a` or `--array` stores the result as an array in a single variable. - `-z` or `--null` reads up to NUL instead of newline. Disables interactive mode. @@ -43,7 +43,9 @@ If `-a` or `--array` is provided, only one variable name is allowed and the toke See the documentation for `set` for more details on the scoping rules for variables. -When read reaches the end-of-file (EOF) instead of the separator, it returns 1. If not, it returns 0. +When read reaches the end-of-file (EOF) instead of the separator, it sets `$status` to 1. If not, it sets it to 0. + +Fish has a default limit of 10 MiB on the number of characters each `read` will consume. If you attempt to read more than that `$status` is set to 122 and the variable will be empty. You can modify that limit by setting the `FISH_READ_BYTE_LIMIT` variable at any time including in the environment before fish starts running. This is a safety mechanism to keep the shell from consuming an unreasonable amount of memory if the input is malformed. \subsection read-example Example diff --git a/src/builtin.cpp b/src/builtin.cpp index e03665c60..47d5d2edb 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -1997,10 +1997,11 @@ static int read_interactive(wcstring &buff, int nchars, bool shell, const wchar_ /// they've done more extensive testing. #define READ_CHUNK_SIZE 128 -/// Read from the fd in chunks until we see newline or null, as appropriate, is seen. This is only +/// Read from the fd in chunks until we see newline or null, as requested, is seen. This is only /// used when the fd is seekable (so not from a tty or pipe) and we're not reading a specific number /// of chars. -/// Returns an exit status +/// +/// Returns an exit status. static int read_in_chunks(int fd, wcstring &buff, bool split_null) { int exit_res = STATUS_BUILTIN_OK; std::string str; @@ -2017,14 +2018,17 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) { } const char *end = std::find(inbuf, inbuf + bytes_read, split_null ? L'\0' : L'\n'); - long bytes_consumed = end - inbuf; // note: must be signed for use in lseek + long bytes_consumed = end - inbuf; // must be signed for use in lseek assert(bytes_consumed <= bytes_read); str.append(inbuf, bytes_consumed); if (bytes_consumed < bytes_read) { - // We found a splitter - // +1 because we need to treat the splitter as consumed, but not append it to the string + // We found a splitter. The +1 because we need to treat the splitter as consumed, but + // not append it to the string. CHECK(lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) != -1, STATUS_BUILTIN_ERROR) finished = true; + } else if (str.size() > read_byte_limit) { + exit_res = STATUS_READ_TOO_MUCH; + finished = true; } } @@ -2042,6 +2046,7 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) { static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool split_null) { int exit_res = STATUS_BUILTIN_OK; bool eof = false; + size_t nbytes = 0; while (true) { bool finished = false; @@ -2055,6 +2060,7 @@ static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool spli break; } + nbytes++; if (MB_CUR_MAX == 1) { res = (unsigned char)b; finished = true; @@ -2068,12 +2074,16 @@ static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool spli } } + if (nbytes > read_byte_limit) { + exit_res = STATUS_READ_TOO_MUCH; + break; + } if (eof) break; if (!split_null && res == L'\n') break; if (split_null && res == L'\0') break; buff.push_back(res); - if (0 < nchars && (size_t)nchars <= buff.size()) { + if (nchars > 0 && (size_t)nchars <= buff.size()) { break; } } diff --git a/src/env.cpp b/src/env.cpp index 3de55955d..c4a2cd517 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -56,6 +56,11 @@ /// At init, we read all the environment variables from this array. extern char **environ; +// Limit `read` to 10 MiB (bytes not wide chars) by default. This can be overridden by the +// FISH_READ_BYTE_LIMIT variable. +#define READ_BYTE_LIMIT 10 * 1024 * 1024 +size_t read_byte_limit = READ_BYTE_LIMIT; + bool g_use_posix_spawn = false; // will usually be set to true /// Does the terminal have the "eat_newline_glitch". @@ -420,6 +425,8 @@ static void react_to_variable_change(const wcstring &key) { update_wait_on_escape_ms(); } else if (key == L"LINES" || key == L"COLUMNS") { invalidate_termsize(true); // force fish to update its idea of the terminal size plus vars + } else if (key == L"FISH_READ_BYTE_LIMIT") { + env_set_read_limit(); } } @@ -483,6 +490,20 @@ bool env_set_pwd() { return true; } +/// Allow the user to override the limit on how much data the `read` command will process. +/// This is primarily for testing but could be used by users in special situations. +void env_set_read_limit() { + env_var_t read_byte_limit_var = env_get_string(L"FISH_READ_BYTE_LIMIT"); + if (!read_byte_limit_var.missing_or_empty()) { + size_t limit = fish_wcstoull(read_byte_limit_var.c_str()); + if (errno) { + debug(1, "Ignoring FISH_READ_BYTE_LIMIT since it is not valid"); + } else { + read_byte_limit = limit; + } + } +} + wcstring env_get_pwd_slash(void) { env_var_t pwd = env_get_string(L"PWD"); if (pwd.missing_or_empty()) { @@ -605,8 +626,9 @@ void env_init(const struct config_paths_t *paths /* or NULL */) { free(unam_narrow); } - env_set_pwd(); // initialize the PWD variable - env_set_termsize(); // initialize the terminal size variables + env_set_pwd(); // initialize the PWD variable + env_set_termsize(); // initialize the terminal size variables + env_set_read_limit(); // initialize the read_byte_limit // Set up universal variables. The empty string means to use the deafult path. assert(s_universal_variables == NULL); diff --git a/src/env.h b/src/env.h index 74590f1c2..29877fec0 100644 --- a/src/env.h +++ b/src/env.h @@ -10,6 +10,8 @@ #include "common.h" +extern size_t read_byte_limit; + // Flags that may be passed as the 'mode' in env_set / env_get_string. enum { /// Default mode. @@ -149,6 +151,9 @@ bool env_set_pwd(); /// Returns the PWD with a terminating slash. wcstring env_get_pwd_slash(); +/// Update the read_byte_limit variable. +void env_set_read_limit(); + class env_vars_snapshot_t { std::map vars; bool is_current() const; diff --git a/src/fish.cpp b/src/fish.cpp index 448b4c7ca..39ebab886 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -48,6 +48,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "path.h" #include "proc.h" #include "reader.h" +#include "signal.h" #include "wutil.h" // IWYU pragma: keep // PATH_MAX may not exist. @@ -318,6 +319,8 @@ static int fish_parse_opt(int argc, char **argv, std::vector *cmds) /// Various things we need to initialize at run-time that don't really fit any of the other init /// routines. static void misc_init() { + env_set_read_limit(); + // If stdout is open on a tty ensure stdio is unbuffered. That's because those functions might // be intermixed with `write()` calls and we need to ensure the writes are not reordered. See // issue #3748. diff --git a/src/proc.h b/src/proc.h index b75d20742..8aa42f77d 100644 --- a/src/proc.h +++ b/src/proc.h @@ -20,18 +20,21 @@ /// The status code use when a command was not found. #define STATUS_UNKNOWN_COMMAND 127 -/// The status code use when an unknown error occured during execution of a command. +/// The status code used when an unknown error occured during execution of a command. #define STATUS_NOT_EXECUTABLE 126 -/// The status code use when an unknown error occured during execution of a command. +/// The status code used when an unknown error occured during execution of a command. #define STATUS_EXEC_FAIL 125 -/// The status code use when a wildcard had no matches. +/// The status code used when a wildcard had no matches. #define STATUS_UNMATCHED_WILDCARD 124 -/// The status code use when illegal command name is encountered. +/// The status code used when illegal command name is encountered. #define STATUS_ILLEGAL_CMD 123 +/// The status code used when `read` is asked to consume too much data. +#define STATUS_READ_TOO_MUCH 122 + /// The status code used for normal exit in a builtin. #define STATUS_BUILTIN_OK 0 diff --git a/tests/read.in b/tests/read.in index 98b08f947..39645f536 100644 --- a/tests/read.in +++ b/tests/read.in @@ -1,8 +1,7 @@ # vim: set filetype=fish: # -# Test read builtin and IFS +# Test read builtin and IFS. # - count (echo one\ntwo) set -l IFS \t count (echo one\ntwo) @@ -128,4 +127,63 @@ and echo "Chunked reads test pass" or echo "Chunked reads test failure: long strings don't match!" rm $path -true +# ========== +# The following tests verify that `read` correctly handles the limit on the +# number of bytes consumed. +# +set FISH_READ_BYTE_LIMIT 8192 +set line abcdefghijklmnopqrstuvwxyz + +# Ensure the `read` command terminates if asked to read too much data. The var +# should be empty. We throw away any data we read if it exceeds the limit on +# what we consider reasonable. +yes $line | dd bs=1024 count=(math "1 + $FISH_READ_BYTE_LIMIT / 1024") ^/dev/null | read --null x +if test $status -ne 122 + echo reading too much data did not terminate with failure status +end +if test (string length "$x") -ne 0 + echo reading too much data resulted in a var with unexpected data +end + +# Ensure the `read` command terminates if asked to read too much data even if +# given an explicit limit. The var should be empty. We throw away any data we +# read if it exceeds the limit on what we consider reasonable. +yes $line | read --null --nchars=(math "$FISH_READ_BYTE_LIMIT + 1") x +if test $status -ne 122 + echo reading too much data did not terminate with failure status +end +if test (string length "$x") -ne 0 + echo reading too much data resulted in a var with unexpected data +end + +# Now do the opposite of the previous test and confirm we can read reasonable +# amounts of data. +echo $line | read x +if test $status -ne 0 + echo the read of a reasonable amount of data failed unexpectedly +end +set exp_length (string length $x) +set act_length (string length $line) +if test $exp_length -ne $act_length + echo reading a reasonable amount of data failed the length test + echo expected length $exp_length, actual length $act_length +end + +# Confirm we can read exactly up to the limit. +yes $line | read --null --nchars $FISH_READ_BYTE_LIMIT x +if test $status -ne 0 + echo the read of the max amount of data with --nchars failed unexpectedly +end +if test (string length "$x") -ne $FISH_READ_BYTE_LIMIT + echo reading the max amount of data with --nchars failed the length test +end + +# Same as previous test but limit the amount of data fed to `read` rather than +# using the `--nchars` flag. +yes $line | dd bs=1024 count=(math "$FISH_READ_BYTE_LIMIT / 1024") ^/dev/null | read --null x +if test $status -ne 0 + echo the read of the max amount of data failed unexpectedly +end +if test (string length "$x") -ne $FISH_READ_BYTE_LIMIT + echo reading the max amount of data with --nchars failed the length test +end