From bd8c8ceb5994ba0945fbe331574f68c6d214408b Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Tue, 17 Apr 2018 06:57:33 -0500 Subject: [PATCH] Add line-delimited read presets with --line and --all-lines Refer to changes in doc_src/read.txt for more info. Closes #4861. --- doc_src/read.txt | 6 +- src/builtin_read.cpp | 171 +++++++++++++++++++++++++++---------------- src/wcstringutil.h | 3 +- 3 files changed, 116 insertions(+), 64 deletions(-) diff --git a/doc_src/read.txt b/doc_src/read.txt index b661b368a..f17aa2c89 100644 --- a/doc_src/read.txt +++ b/doc_src/read.txt @@ -7,7 +7,7 @@ read [OPTIONS] VARIABLES... \subsection read-description Description -`read` reads from standard input and either writes the result back to the terminal for use in command substitution or stores the result in one or more shell variables. By default, `read` reads up to the next newline and splits it into the given variables on space, tab and newline. Alternatively, a null character or a maximum number of characters can be used to terminate the input, and other delimiters can be given. Unlike other shells, there is no default variable (such as `REPLY`) for storing the result. Instead, it is printed on stdout. +`read` reads from standard input and either writes the result back to the terminal for use in command substitution or stores the result in one or more shell variables. By default, `read` reads up to the next newline and splits it into given variables on spaces or tabs. Alternatively, a null character or a maximum number of characters can be used to terminate the input, and other delimiters can be given. Unlike other shells, there is no default variable (such as `REPLY`) for storing the result. Instead, it is printed on stdout. The following options are available: @@ -43,6 +43,10 @@ The following options are available: - `-z` or `--null` marks the end of the line with the NUL character, instead of newline. This also disables interactive mode. +- `-L` or `--line` reads a single line at a time from the input stream and stores it in the `N` given variable. No more than `N` lines are consumed (one line per variable) from the input stream. + +- `-A` or `--all-lines` splits input into the given variables, separated by line breaks. The entire input stream is consumed and interactive mode is disabled. Probably only useful with `-a` to read all lines into a single array variable. Where possible, ` | while read --line` should be preferred over ` | read --all-lines` as the latter will block until the input stream has been consumed, leading to latency and decreased responsiveness. + `read` reads a single line of input from stdin, breaks it into tokens based on the delimiter set via `-d`/`--delimiter` as a complete string (like `string split` or, if that has not been given the (deprecated) `IFS` shell variable as a set of characters, and then assigns one token to each variable specified in `VARIABLES`. If there are more tokens than variables, the complete remainder is assigned to the last variable. As a special case, if `IFS` is set to the empty string, each character of the input is considered a separate token. If no parameters are provided, `read` enters a special case that simply provides redirection from `stdin` to `stdout`, useful for command substitution. For instance, the fish shell command below can be used to read data that should be provided via a command line argument from the console instead of hardcoding it in the command itself, allowing the command to both be reused as-is in various contexts with different input values and preventing possibly sensitive text from being included in the shell history: diff --git a/src/builtin_read.cpp b/src/builtin_read.cpp index 4a3afa050..37edcb548 100644 --- a/src/builtin_read.cpp +++ b/src/builtin_read.cpp @@ -49,27 +49,33 @@ struct read_cmd_opts_t { bool split_null = false; bool to_stdout = false; int nchars = 0; + bool all_lines = false; + bool one_line = false; }; -static const wchar_t *short_options = L":ac:ghilm:n:p:d:suxzP:UR:"; -static const struct woption long_options[] = {{L"export", no_argument, NULL, 'x'}, - {L"global", no_argument, NULL, 'g'}, - {L"local", no_argument, NULL, 'l'}, - {L"universal", no_argument, NULL, 'U'}, - {L"unexport", no_argument, NULL, 'u'}, - {L"prompt", required_argument, NULL, 'p'}, - {L"prompt-str", required_argument, NULL, 'P'}, - {L"right-prompt", required_argument, NULL, 'R'}, - {L"command", required_argument, NULL, 'c'}, - {L"mode-name", required_argument, NULL, 'm'}, - {L"silent", no_argument, NULL, 's'}, - {L"nchars", required_argument, NULL, 'n'}, - {L"delimiter", required_argument, NULL, 'd'}, - {L"shell", no_argument, NULL, 'S'}, - {L"array", no_argument, NULL, 'a'}, - {L"null", no_argument, NULL, 'z'}, - {L"help", no_argument, NULL, 'h'}, - {NULL, 0, NULL, 0}}; +static const wchar_t *short_options = L":Aac:d:ghiLlm:n:p:suxzP:UR:LB"; +static const struct woption long_options[] = { + {L"array", no_argument, NULL, 'a'}, + {L"all-lines", no_argument, NULL, 'A'}, + {L"command", required_argument, NULL, 'c'}, + {L"delimiter", required_argument, NULL, 'd'}, + {L"export", no_argument, NULL, 'x'}, + {L"global", no_argument, NULL, 'g'}, + {L"help", no_argument, NULL, 'h'}, + {L"line", no_argument, NULL, 'L'}, + {L"local", no_argument, NULL, 'l'}, + {L"mode-name", required_argument, NULL, 'm'}, + {L"nchars", required_argument, NULL, 'n'}, + {L"null", no_argument, NULL, 'z'}, + {L"prompt", required_argument, NULL, 'p'}, + {L"prompt-str", required_argument, NULL, 'P'}, + {L"right-prompt", required_argument, NULL, 'R'}, + {L"shell", no_argument, NULL, 'S'}, + {L"silent", no_argument, NULL, 's'}, + {L"unexport", no_argument, NULL, 'u'}, + {L"universal", no_argument, NULL, 'U'}, + {NULL, 0, NULL, 0} +}; static int parse_cmd_opts(read_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { @@ -78,42 +84,44 @@ static int parse_cmd_opts(read_cmd_opts_t &opts, int *optind, //!OCLINT(high nc wgetopter_t w; while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, NULL)) != -1) { switch (opt) { - case L'x': { - opts.place |= ENV_EXPORT; + case 'a': { + opts.array = true; break; } + case L'A': { + opts.all_lines = true; + break; + } + case L'c': { + opts.commandline = w.woptarg; + break; + } + case 'd': { + opts.have_delimiter = true; + opts.delimiter = w.woptarg; + break; + } + case 'i': { + streams.err.append_format(_(L"%ls: usage of -i for --silent is deprecated. Please use -s or --silent instead.\n"), + cmd); + return STATUS_INVALID_ARGS; + } case L'g': { opts.place |= ENV_GLOBAL; break; } + case 'h': { + opts.print_help = true; + break; + } + case L'L': { + opts.one_line = true; + break; + } case L'l': { opts.place |= ENV_LOCAL; break; } - case L'U': { - opts.place |= ENV_UNIVERSAL; - break; - } - case L'u': { - opts.place |= ENV_UNEXPORT; - break; - } - case L'p': { - opts.prompt = w.woptarg; - break; - } - case L'P': { - opts.prompt_str = w.woptarg; - break; - } - case L'R': { - opts.right_prompt = w.woptarg; - break; - } - case L'c': { - opts.commandline = w.woptarg; - break; - } case L'm': { streams.err.append_format(_(L"%ls: flags '--mode-name' / '-m' are now ignored. " L"Set fish_history instead.\n"), @@ -137,34 +145,40 @@ static int parse_cmd_opts(read_cmd_opts_t &opts, int *optind, //!OCLINT(high nc } break; } - case 'd': { - opts.have_delimiter = true; - opts.delimiter = w.woptarg; + case L'P': { + opts.prompt_str = w.woptarg; break; } - case 'i': { - streams.err.append_format(_(L"%ls: usage of -i for --silent is deprecated. Please use -s or --silent instead.\n"), - cmd); - return STATUS_INVALID_ARGS; + case L'p': { + opts.prompt = w.woptarg; + break; + } + case L'R': { + opts.right_prompt = w.woptarg; + break; } case 's': { opts.silent = true; break; } - case 'a': { - opts.array = true; - break; - } case L'S': { opts.shell = true; break; } - case L'z': { - opts.split_null = true; + case L'U': { + opts.place |= ENV_UNIVERSAL; break; } - case 'h': { - opts.print_help = true; + case L'u': { + opts.place |= ENV_UNEXPORT; + break; + } + case L'x': { + opts.place |= ENV_EXPORT; + break; + } + case L'z': { + opts.split_null = true; break; } case ':': { @@ -341,11 +355,28 @@ static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool spli static int validate_read_args(const wchar_t *cmd, read_cmd_opts_t &opts, int argc, const wchar_t *const *argv, parser_t &parser, io_streams_t &streams) { if (opts.prompt && opts.prompt_str) { - streams.err.append_format(_(L"%ls: You can't specify both -p and -P\n"), cmd); + streams.err.append_format(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"-p", L"-P"); builtin_print_help(parser, streams, cmd, streams.err); return STATUS_INVALID_ARGS; } + if (opts.have_delimiter && opts.all_lines) { + streams.err.append_format(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"--delimiter", L"--all-lines"); + return STATUS_INVALID_ARGS; + } + if (opts.have_delimiter && opts.one_line) { + streams.err.append_format(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"--delimiter", L"--line"); + return STATUS_INVALID_ARGS; + } + if (opts.one_line && opts.all_lines) { + streams.err.append_format(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"--all-lines", L"--line"); + return STATUS_INVALID_ARGS; + } + if (opts.one_line && opts.split_null) { + streams.err.append_format(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"-z", L"--line"); + return STATUS_INVALID_ARGS; + } + if (opts.prompt_str) { opts.prompt_cmd = L"echo " + escape_string(opts.prompt_str, ESCAPE_ALL); opts.prompt = opts.prompt_cmd.c_str(); @@ -422,6 +453,21 @@ int builtin_read(parser_t &parser, io_streams_t &streams, wchar_t **argv) { retval = validate_read_args(cmd, opts, argc, argv, parser, streams); if (retval != STATUS_CMD_OK) return retval; + if (opts.all_lines) { + // --all-lines is the same as read -d \n -z + opts.have_delimiter = true; + opts.delimiter = L"\n"; + opts.split_null = true; + opts.shell = false; + } + else if (opts.one_line) { + // --line is the same as read -d \n + opts.have_delimiter = true; + opts.delimiter = L"\n"; + opts.split_null = false; + opts.shell = false; + } + // TODO: Determine if the original set of conditions for interactive reads should be reinstated: // if (isatty(0) && streams.stdin_fd == STDIN_FILENO && !split_null) { int stream_stdin_is_a_tty = isatty(streams.stdin_fd); @@ -517,7 +563,8 @@ int builtin_read(parser_t &parser, io_streams_t &streams, wchar_t **argv) { // We're using a delimiter provided by the user so use the `string split` behavior. wcstring_list_t splits; split_about(buff.begin(), buff.end(), opts.delimiter.begin(), opts.delimiter.end(), - &splits, LONG_MAX); + &splits); + env_set(argv[0], opts.place, splits); } } else { diff --git a/src/wcstringutil.h b/src/wcstringutil.h index ad9404ede..483817bff 100644 --- a/src/wcstringutil.h +++ b/src/wcstringutil.h @@ -26,9 +26,10 @@ wcstring_range wcstring_tok(wcstring& str, const wcstring& needle, /// If the iterators are forward, this does the normal thing. /// If the iterators are backward, this returns reversed strings, in reversed order! /// If the needle is empty, split on individual elements (characters). +/// Max output entries will be max + 1 (after max splits) template void split_about(ITER haystack_start, ITER haystack_end, ITER needle_start, ITER needle_end, - wcstring_list_t* output, long max, bool no_empty = false) { + wcstring_list_t* output, long max = LONG_MAX, bool no_empty = false) { long remaining = max; ITER haystack_cursor = haystack_start; while (remaining > 0 && haystack_cursor != haystack_end) {