diff --git a/CHANGELOG.md b/CHANGELOG.md
index 986cffa19..2ff1b1ac4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,6 @@
-# fish 2.?.? (released ???)
+# fish 2.6.0 (released ???)
+
+- The `read` command now has a default limit of 10 MiB. If a line is longer than that it will fail with $status set to 122 and the var will be empty. You can set a different limit by setting the FISH_READ_BYTE_LIMIT variable.
---
diff --git a/doc_src/read.txt b/doc_src/read.txt
index 5514bc848..9810bf661 100644
--- a/doc_src/read.txt
+++ b/doc_src/read.txt
@@ -7,7 +7,7 @@ read [OPTIONS] [VARIABLES...]
\subsection read-description Description
-`read` reads one line from standard input and stores the result in one or more shell variables.
+`read` reads from standard input and stores the result in one or more shell variables. By default it reads one line terminated by a newline but options are available to read up to a null character and to limit each "line" to a maximum number of characters.
The following options are available:
@@ -19,7 +19,7 @@ The following options are available:
- `-m NAME` or `--mode-name=NAME` specifies that the name NAME should be used to save/load the history file. If NAME is fish, the regular fish history will be available.
-- `-n NCHARS` or `--nchars=NCHARS` causes `read` to return after reading NCHARS characters rather than waiting for a complete line of input.
+- `-n NCHARS` or `--nchars=NCHARS` causes `read` to return after reading NCHARS characters rather than waiting for a complete line of input (either newline or null terminated).
- `-p PROMPT_CMD` or `--prompt=PROMPT_CMD` uses the output of the shell command `PROMPT_CMD` as the prompt for the interactive mode. The default prompt command is set_color green; echo read; set_color normal; echo "> "
.
@@ -33,7 +33,7 @@ The following options are available:
- `-x` or `--export` exports the variables to child processes.
-- `-a` or `--array` stores the result as an array.
+- `-a` or `--array` stores the result as an array in a single variable.
- `-z` or `--null` reads up to NUL instead of newline. Disables interactive mode.
@@ -43,7 +43,9 @@ If `-a` or `--array` is provided, only one variable name is allowed and the toke
See the documentation for `set` for more details on the scoping rules for variables.
-When read reaches the end-of-file (EOF) instead of the separator, it returns 1. If not, it returns 0.
+When read reaches the end-of-file (EOF) instead of the separator, it sets `$status` to 1. If not, it sets it to 0.
+
+Fish has a default limit of 10 MiB on the number of characters each `read` will consume. If you attempt to read more than that `$status` is set to 122 and the variable will be empty. You can modify that limit by setting the `FISH_READ_BYTE_LIMIT` variable at any time including in the environment before fish starts running. This is a safety mechanism to keep the shell from consuming an unreasonable amount of memory if the input is malformed.
\subsection read-example Example
diff --git a/src/builtin.cpp b/src/builtin.cpp
index e03665c60..47d5d2edb 100644
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@@ -1997,10 +1997,11 @@ static int read_interactive(wcstring &buff, int nchars, bool shell, const wchar_
/// they've done more extensive testing.
#define READ_CHUNK_SIZE 128
-/// Read from the fd in chunks until we see newline or null, as appropriate, is seen. This is only
+/// Read from the fd in chunks until we see newline or null, as requested, is seen. This is only
/// used when the fd is seekable (so not from a tty or pipe) and we're not reading a specific number
/// of chars.
-/// Returns an exit status
+///
+/// Returns an exit status.
static int read_in_chunks(int fd, wcstring &buff, bool split_null) {
int exit_res = STATUS_BUILTIN_OK;
std::string str;
@@ -2017,14 +2018,17 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) {
}
const char *end = std::find(inbuf, inbuf + bytes_read, split_null ? L'\0' : L'\n');
- long bytes_consumed = end - inbuf; // note: must be signed for use in lseek
+ long bytes_consumed = end - inbuf; // must be signed for use in lseek
assert(bytes_consumed <= bytes_read);
str.append(inbuf, bytes_consumed);
if (bytes_consumed < bytes_read) {
- // We found a splitter
- // +1 because we need to treat the splitter as consumed, but not append it to the string
+ // We found a splitter. The +1 because we need to treat the splitter as consumed, but
+ // not append it to the string.
CHECK(lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) != -1, STATUS_BUILTIN_ERROR)
finished = true;
+ } else if (str.size() > read_byte_limit) {
+ exit_res = STATUS_READ_TOO_MUCH;
+ finished = true;
}
}
@@ -2042,6 +2046,7 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) {
static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool split_null) {
int exit_res = STATUS_BUILTIN_OK;
bool eof = false;
+ size_t nbytes = 0;
while (true) {
bool finished = false;
@@ -2055,6 +2060,7 @@ static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool spli
break;
}
+ nbytes++;
if (MB_CUR_MAX == 1) {
res = (unsigned char)b;
finished = true;
@@ -2068,12 +2074,16 @@ static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool spli
}
}
+ if (nbytes > read_byte_limit) {
+ exit_res = STATUS_READ_TOO_MUCH;
+ break;
+ }
if (eof) break;
if (!split_null && res == L'\n') break;
if (split_null && res == L'\0') break;
buff.push_back(res);
- if (0 < nchars && (size_t)nchars <= buff.size()) {
+ if (nchars > 0 && (size_t)nchars <= buff.size()) {
break;
}
}
diff --git a/src/env.cpp b/src/env.cpp
index 3de55955d..c4a2cd517 100644
--- a/src/env.cpp
+++ b/src/env.cpp
@@ -56,6 +56,11 @@
/// At init, we read all the environment variables from this array.
extern char **environ;
+// Limit `read` to 10 MiB (bytes not wide chars) by default. This can be overridden by the
+// FISH_READ_BYTE_LIMIT variable.
+#define READ_BYTE_LIMIT 10 * 1024 * 1024
+size_t read_byte_limit = READ_BYTE_LIMIT;
+
bool g_use_posix_spawn = false; // will usually be set to true
/// Does the terminal have the "eat_newline_glitch".
@@ -420,6 +425,8 @@ static void react_to_variable_change(const wcstring &key) {
update_wait_on_escape_ms();
} else if (key == L"LINES" || key == L"COLUMNS") {
invalidate_termsize(true); // force fish to update its idea of the terminal size plus vars
+ } else if (key == L"FISH_READ_BYTE_LIMIT") {
+ env_set_read_limit();
}
}
@@ -483,6 +490,20 @@ bool env_set_pwd() {
return true;
}
+/// Allow the user to override the limit on how much data the `read` command will process.
+/// This is primarily for testing but could be used by users in special situations.
+void env_set_read_limit() {
+ env_var_t read_byte_limit_var = env_get_string(L"FISH_READ_BYTE_LIMIT");
+ if (!read_byte_limit_var.missing_or_empty()) {
+ size_t limit = fish_wcstoull(read_byte_limit_var.c_str());
+ if (errno) {
+ debug(1, "Ignoring FISH_READ_BYTE_LIMIT since it is not valid");
+ } else {
+ read_byte_limit = limit;
+ }
+ }
+}
+
wcstring env_get_pwd_slash(void) {
env_var_t pwd = env_get_string(L"PWD");
if (pwd.missing_or_empty()) {
@@ -605,8 +626,9 @@ void env_init(const struct config_paths_t *paths /* or NULL */) {
free(unam_narrow);
}
- env_set_pwd(); // initialize the PWD variable
- env_set_termsize(); // initialize the terminal size variables
+ env_set_pwd(); // initialize the PWD variable
+ env_set_termsize(); // initialize the terminal size variables
+ env_set_read_limit(); // initialize the read_byte_limit
// Set up universal variables. The empty string means to use the deafult path.
assert(s_universal_variables == NULL);
diff --git a/src/env.h b/src/env.h
index 74590f1c2..29877fec0 100644
--- a/src/env.h
+++ b/src/env.h
@@ -10,6 +10,8 @@
#include "common.h"
+extern size_t read_byte_limit;
+
// Flags that may be passed as the 'mode' in env_set / env_get_string.
enum {
/// Default mode.
@@ -149,6 +151,9 @@ bool env_set_pwd();
/// Returns the PWD with a terminating slash.
wcstring env_get_pwd_slash();
+/// Update the read_byte_limit variable.
+void env_set_read_limit();
+
class env_vars_snapshot_t {
std::map vars;
bool is_current() const;
diff --git a/src/fish.cpp b/src/fish.cpp
index 448b4c7ca..39ebab886 100644
--- a/src/fish.cpp
+++ b/src/fish.cpp
@@ -48,6 +48,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "path.h"
#include "proc.h"
#include "reader.h"
+#include "signal.h"
#include "wutil.h" // IWYU pragma: keep
// PATH_MAX may not exist.
@@ -318,6 +319,8 @@ static int fish_parse_opt(int argc, char **argv, std::vector *cmds)
/// Various things we need to initialize at run-time that don't really fit any of the other init
/// routines.
static void misc_init() {
+ env_set_read_limit();
+
// If stdout is open on a tty ensure stdio is unbuffered. That's because those functions might
// be intermixed with `write()` calls and we need to ensure the writes are not reordered. See
// issue #3748.
diff --git a/src/proc.h b/src/proc.h
index b75d20742..8aa42f77d 100644
--- a/src/proc.h
+++ b/src/proc.h
@@ -20,18 +20,21 @@
/// The status code use when a command was not found.
#define STATUS_UNKNOWN_COMMAND 127
-/// The status code use when an unknown error occured during execution of a command.
+/// The status code used when an unknown error occured during execution of a command.
#define STATUS_NOT_EXECUTABLE 126
-/// The status code use when an unknown error occured during execution of a command.
+/// The status code used when an unknown error occured during execution of a command.
#define STATUS_EXEC_FAIL 125
-/// The status code use when a wildcard had no matches.
+/// The status code used when a wildcard had no matches.
#define STATUS_UNMATCHED_WILDCARD 124
-/// The status code use when illegal command name is encountered.
+/// The status code used when illegal command name is encountered.
#define STATUS_ILLEGAL_CMD 123
+/// The status code used when `read` is asked to consume too much data.
+#define STATUS_READ_TOO_MUCH 122
+
/// The status code used for normal exit in a builtin.
#define STATUS_BUILTIN_OK 0
diff --git a/tests/read.in b/tests/read.in
index 98b08f947..39645f536 100644
--- a/tests/read.in
+++ b/tests/read.in
@@ -1,8 +1,7 @@
# vim: set filetype=fish:
#
-# Test read builtin and IFS
+# Test read builtin and IFS.
#
-
count (echo one\ntwo)
set -l IFS \t
count (echo one\ntwo)
@@ -128,4 +127,63 @@ and echo "Chunked reads test pass"
or echo "Chunked reads test failure: long strings don't match!"
rm $path
-true
+# ==========
+# The following tests verify that `read` correctly handles the limit on the
+# number of bytes consumed.
+#
+set FISH_READ_BYTE_LIMIT 8192
+set line abcdefghijklmnopqrstuvwxyz
+
+# Ensure the `read` command terminates if asked to read too much data. The var
+# should be empty. We throw away any data we read if it exceeds the limit on
+# what we consider reasonable.
+yes $line | dd bs=1024 count=(math "1 + $FISH_READ_BYTE_LIMIT / 1024") ^/dev/null | read --null x
+if test $status -ne 122
+ echo reading too much data did not terminate with failure status
+end
+if test (string length "$x") -ne 0
+ echo reading too much data resulted in a var with unexpected data
+end
+
+# Ensure the `read` command terminates if asked to read too much data even if
+# given an explicit limit. The var should be empty. We throw away any data we
+# read if it exceeds the limit on what we consider reasonable.
+yes $line | read --null --nchars=(math "$FISH_READ_BYTE_LIMIT + 1") x
+if test $status -ne 122
+ echo reading too much data did not terminate with failure status
+end
+if test (string length "$x") -ne 0
+ echo reading too much data resulted in a var with unexpected data
+end
+
+# Now do the opposite of the previous test and confirm we can read reasonable
+# amounts of data.
+echo $line | read x
+if test $status -ne 0
+ echo the read of a reasonable amount of data failed unexpectedly
+end
+set exp_length (string length $x)
+set act_length (string length $line)
+if test $exp_length -ne $act_length
+ echo reading a reasonable amount of data failed the length test
+ echo expected length $exp_length, actual length $act_length
+end
+
+# Confirm we can read exactly up to the limit.
+yes $line | read --null --nchars $FISH_READ_BYTE_LIMIT x
+if test $status -ne 0
+ echo the read of the max amount of data with --nchars failed unexpectedly
+end
+if test (string length "$x") -ne $FISH_READ_BYTE_LIMIT
+ echo reading the max amount of data with --nchars failed the length test
+end
+
+# Same as previous test but limit the amount of data fed to `read` rather than
+# using the `--nchars` flag.
+yes $line | dd bs=1024 count=(math "$FISH_READ_BYTE_LIMIT / 1024") ^/dev/null | read --null x
+if test $status -ne 0
+ echo the read of the max amount of data failed unexpectedly
+end
+if test (string length "$x") -ne $FISH_READ_BYTE_LIMIT
+ echo reading the max amount of data with --nchars failed the length test
+end