From 35671dd9f094cf8c643c9cf224c27a2b55578937 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 14 Oct 2019 13:20:31 -0700 Subject: [PATCH] Clean up and unify pipes and redirections This cleans up how pipes and redirections are recognized by the parser, and unifies pipes and redirections into a single type. --- src/fish_tests.cpp | 57 +++++--- src/highlight.cpp | 23 ++-- src/parse_execution.cpp | 37 +++--- src/tnode.cpp | 13 +- src/tnode.h | 8 +- src/tokenizer.cpp | 281 ++++++++++++++++++++++------------------ src/tokenizer.h | 44 ++++++- 7 files changed, 280 insertions(+), 183 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 2a566fd23..8f8b8a3c9 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -669,35 +669,60 @@ static void test_tokenizer() { do_test(token->error_offset == 4); } - // Test redirection_type_for_string. - if (redirection_type_for_string(L"<") != redirection_type_t::input) + // Test some redirection parsing. + auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_t::from_string(s); }; + do_test(pipe_or_redir(L"|")->is_pipe); + do_test(pipe_or_redir(L"0>|")->is_pipe); + do_test(pipe_or_redir(L"0>|")->fd == 0); + do_test(pipe_or_redir(L"2>|")->is_pipe); + do_test(pipe_or_redir(L"2>|")->fd == 2); + do_test(pipe_or_redir(L">|")->is_pipe); + do_test(pipe_or_redir(L">|")->fd == STDOUT_FILENO); + do_test(!pipe_or_redir(L">")->is_pipe); + do_test(pipe_or_redir(L">")->fd == STDOUT_FILENO); + do_test(pipe_or_redir(L"2>")->fd == STDERR_FILENO); + do_test(pipe_or_redir(L"9999999999999>")->fd == -1); + do_test(pipe_or_redir(L"9999999999999>&2")->fd == -1); + do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false); + do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false); + + auto get_redir_mode = [](const wchar_t *s) -> maybe_t { + if (auto redir = pipe_or_redir_t::from_string(s)) { + return redir->mode; + } + return none(); + }; + + if (get_redir_mode(L"<") != redirection_mode_t::input) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"^") != redirection_type_t::overwrite) + if (get_redir_mode(L"^") != redirection_mode_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L">") != redirection_type_t::overwrite) + if (get_redir_mode(L">") != redirection_mode_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>") != redirection_type_t::overwrite) + if (get_redir_mode(L"2>") != redirection_mode_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L">>") != redirection_type_t::append) + if (get_redir_mode(L">>") != redirection_mode_t::append) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>>") != redirection_type_t::append) + if (get_redir_mode(L"2>>") != redirection_mode_t::append) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>?") != redirection_type_t::noclob) + if (get_redir_mode(L"2>?") != redirection_mode_t::noclob) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"9999999999999999>?")) + if (get_redir_mode(L"9999999999999999>?") != redirection_mode_t::noclob) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>&3") != redirection_type_t::fd) + if (get_redir_mode(L"2>&3") != redirection_mode_t::fd) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>|")) + if (get_redir_mode(L"3<&0") != redirection_mode_t::fd) + err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (get_redir_mode(L"3buffer().size() != 0) { - err(L"Expected 0 bytes in out_buff, but instead found %lu bytes\n", - buffer->buffer().size()); + err(L"Expected 0 bytes in out_buff, but instead found %lu bytes, for command %ls\n", + buffer->buffer().size(), src); } iothread_drain_all(); } @@ -5435,7 +5460,7 @@ int main(int argc, char **argv) { if (should_test_function("format")) test_format(); if (should_test_function("convert")) test_convert(); if (should_test_function("convert_nulls")) test_convert_nulls(); - if (should_test_function("tok")) test_tokenizer(); + if (should_test_function("tokenizer")) test_tokenizer(); if (should_test_function("iothread")) test_iothread(); if (should_test_function("pthread")) test_pthread(); if (should_test_function("parser")) test_parser(); diff --git a/src/highlight.cpp b/src/highlight.cpp index 73e7b424c..6e0398dc4 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -958,11 +958,12 @@ void highlighter_t::color_redirection(tnode_t redirection_node) if (redir_prim) { wcstring target; - const maybe_t redirect_type = - redirection_type(redirection_node, this->buff, nullptr, &target); + const maybe_t redirect = + redirection_for_node(redirection_node, this->buff, &target); // We may get a missing redirection type if the redirection is invalid. - auto hl = redirect_type ? highlight_role_t::redirection : highlight_role_t::error; + auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection + : highlight_role_t::error; this->color_node(redir_prim, hl); // Check if the argument contains a command substitution. If so, highlight it as a param @@ -974,7 +975,7 @@ void highlighter_t::color_redirection(tnode_t redirection_node) // disallow redirections into a non-existent directory. bool target_is_valid = true; - if (!redirect_type) { + if (!redirect || !redirect->is_valid()) { // not a valid redirection target_is_valid = false; } else if (!this->io_ok) { @@ -990,8 +991,8 @@ void highlighter_t::color_redirection(tnode_t redirection_node) // redirections). Note that the target is now unescaped. const wcstring target_path = path_apply_working_directory(target, this->working_directory); - switch (*redirect_type) { - case redirection_type_t::fd: { + switch (redirect->mode) { + case redirection_mode_t::fd: { if (target == L"-") { target_is_valid = true; } else { @@ -1000,16 +1001,16 @@ void highlighter_t::color_redirection(tnode_t redirection_node) } break; } - case redirection_type_t::input: { + case redirection_mode_t::input: { // Input redirections must have a readable non-directory. struct stat buf = {}; target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode); break; } - case redirection_type_t::overwrite: - case redirection_type_t::append: - case redirection_type_t::noclob: { + case redirection_mode_t::overwrite: + case redirection_mode_t::append: + case redirection_mode_t::noclob: { // Test whether the file exists, and whether it's writable (possibly after // creating it). access() returns failure if the file does not exist. bool file_exists = false, file_is_writable = false; @@ -1053,7 +1054,7 @@ void highlighter_t::color_redirection(tnode_t redirection_node) // NOCLOB means that we must not overwrite files that exist. target_is_valid = file_is_writable && - !(file_exists && redirect_type == redirection_type_t::noclob); + !(file_exists && redirect->mode == redirection_mode_t::noclob); break; } } diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index 491df34b6..badec1d8e 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -967,9 +967,15 @@ bool parse_execution_context_t::determine_io_chain(tnode_t()) { - int source_fd = -1; // source fd - wcstring target; // file path or target fd - auto redirect_type = redirection_type(redirect_node, pstree->src, &source_fd, &target); + wcstring target; // file path or target fd + auto redirect = redirection_for_node(redirect_node, pstree->src, &target); + + if (!redirect || !redirect->is_valid()) { + // TODO: improve this error message. + report_error(redirect_node, _(L"Invalid redirection: %ls"), + redirect_node.get_source(pstree->src).c_str()); + return false; + } // PCA: I can't justify this skip_variables flag. It was like this when I got here. bool target_expanded = @@ -977,17 +983,17 @@ bool parse_execution_context_t::determine_io_chain(tnode_tvars(), parser->shared()); if (!target_expanded || target.empty()) { // TODO: Improve this error message. - errored = - report_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str()); + report_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str()); + return false; } // Generate the actual IO redirection. shared_ptr new_io; - assert(redirect_type && "expected to have a valid redirection"); - switch (*redirect_type) { - case redirection_type_t::fd: { + assert(redirect && redirect->is_valid() && "expected to have a valid redirection"); + switch (redirect->mode) { + case redirection_mode_t::fd: { if (target == L"-") { - new_io.reset(new io_close_t(source_fd)); + new_io.reset(new io_close_t(redirect->fd)); } else { int old_fd = fish_wcstoi(target.c_str()); if (errno || old_fd < 0) { @@ -996,14 +1002,14 @@ bool parse_execution_context_t::determine_io_chain(tnode_tfd, old_fd, true)); } } break; } default: { - int oflags = oflags_for_redirection_type(*redirect_type); - io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); + int oflags = redirect->oflags(); + io_file_t *new_io_file = new io_file_t(redirect->fd, target, oflags); new_io.reset(new_io_file); break; } @@ -1127,12 +1133,13 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node( tnode_t statement = job_cont.require_get_child(); // Handle the pipe, whose fd may not be the obvious stdout. - int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe)); - if (pipe_write_fd == -1) { + auto parsed_pipe = pipe_or_redir_t::from_string(get_source(pipe)); + assert(parsed_pipe.has_value() && parsed_pipe->is_pipe && "Failed to parse valid pipe"); + if (!parsed_pipe->is_valid()) { result = report_error(pipe, ILLEGAL_FD_ERR_MSG, get_source(pipe).c_str()); break; } - processes.back()->pipe_write_fd = pipe_write_fd; + processes.back()->pipe_write_fd = parsed_pipe->fd; // Store the new process (and maybe with an error). processes.emplace_back(new process_t()); diff --git a/src/tnode.cpp b/src/tnode.cpp index 5c2e62232..cc9f84743 100644 --- a/src/tnode.cpp +++ b/src/tnode.cpp @@ -55,19 +55,20 @@ enum parse_bool_statement_type_t bool_statement_type( return static_cast(cont.tag()); } -maybe_t redirection_type(tnode_t redirection, - const wcstring &src, int *out_fd, - wcstring *out_target) { +maybe_t redirection_for_node(tnode_t redirection, + const wcstring &src, wcstring *out_target) { assert(redirection && "redirection is missing"); - maybe_t result{}; tnode_t prim = redirection.child<0>(); // like 2> assert(prim && "expected to have primitive"); + maybe_t result{}; if (prim.has_source()) { - result = redirection_type_for_string(prim.get_source(src), out_fd); + result = pipe_or_redir_t::from_string(prim.get_source(src)); + assert(result.has_value() && "Failed to parse valid redirection"); + assert(!result->is_pipe && "Should not be a pipe"); } if (out_target != NULL) { - tnode_t target = redirection.child<1>(); // like &1 or file path + tnode_t target = redirection.child<1>(); // like 1 or file path *out_target = target.has_source() ? target.get_source(src) : wcstring(); } return result; diff --git a/src/tnode.h b/src/tnode.h index fc2d14cc8..e65c8b783 100644 --- a/src/tnode.h +++ b/src/tnode.h @@ -242,10 +242,10 @@ enum parse_bool_statement_type_t bool_statement_type(tnode_t stmt); -/// Given a redirection, get the redirection type (or none) and target (file path, or fd). -maybe_t redirection_type(tnode_t redirection, - const wcstring &src, int *out_fd, - wcstring *out_target); +/// Given a redirection node, get the parsed redirection and target of the redirection (file path, +/// or fd). +maybe_t redirection_for_node(tnode_t redirection, + const wcstring &src, wcstring *out_target); /// Return the arguments under an arguments_list or arguments_or_redirection_list /// Do not return more than max. diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index e823ef17e..7fe4708a5 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -273,145 +273,172 @@ tok_t tokenizer_t::read_string() { return result; } -// Reads a redirection or an "fd pipe" (like 2>|) from a string. -// Returns the parsed pipe or redirection, or none() on error. -struct parsed_redir_or_pipe_t { - // Number of characters consumed. - size_t consumed{0}; - - // The token type, always either TOK_PIPE or TOK_REDIRECT. - token_type_t type{token_type_t::redirect}; - - // The redirection mode if the type is TOK_REDIRECT. - redirection_type_t redirection_mode{redirection_type_t::overwrite}; - - // The redirected fd, or -1 on overflow. - int fd{0}; -}; - -static maybe_t read_redirection_or_fd_pipe(const wchar_t *buff) { - bool errored = false; - parsed_redir_or_pipe_t result; - size_t idx = 0; - - // Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like - // '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the - // first character. Watch out for overflow. +// Parse an fd from the non-empty string [start, end), all of which are digits. +// Return the fd, or -1 on overflow. +static int parse_fd(const wchar_t *start, const wchar_t *end) { + assert(start < end && "String cannot be empty"); long long big_fd = 0; - for (; iswdigit(buff[idx]); idx++) { - // Note that it's important we consume all the digits here, even if it overflows. - if (big_fd <= INT_MAX) big_fd = big_fd * 10 + (buff[idx] - L'0'); + for (const wchar_t *cursor = start; cursor < end; ++cursor) { + assert(L'0' <= *cursor && *cursor <= L'9' && "Not a digit"); + big_fd = big_fd * 10 + (*cursor - L'0'); + if (big_fd > INT_MAX) return -1; } + assert(big_fd <= INT_MAX && "big_fd should be in range"); + return static_cast(big_fd); +} - result.fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); +pipe_or_redir_t::pipe_or_redir_t() = default; - if (idx == 0) { - // We did not find a leading digit, so there's no explicit fd. Infer it from the type. - switch (buff[idx]) { - case L'>': { - result.fd = STDOUT_FILENO; - break; +maybe_t pipe_or_redir_t::from_string(const wchar_t *buff) { + pipe_or_redir_t result{}; + + /* Examples of supported syntaxes. + Note we are only responsible for parsing the redirection part, not 'cmd' or 'file'. + + cmd | cmd normal pipe + cmd >| cmd pipe with explicit fd + cmd 2>| cmd pipe with explicit fd + cmd < file stdin redirection + cmd > file redirection + cmd >> file appending redirection + cmd >? file noclobber redirection + cmd >>? file appending noclobber redirection + cmd 2> file file redirection with explicit fd + cmd >&2 file fd redirection with no explicit src fd (stdout is used) + cmd 1>&2 file fd redirection with an explicit src fd + cmd <&2 file fd redirection with no explicit src fd (stdin is used) + cmd 3<&0 file fd redirection with an explicit src fd + cmd ^ file caret (stderr) redirection, perhaps disabled via feature flags + cmd ^^ file caret (stderr) redirection, perhaps disabled via feature flags + */ + + const wchar_t *cursor = buff; + + // Extract a range of leading fd. + const wchar_t *fd_start = cursor; + while (iswdigit(*cursor)) cursor++; + const wchar_t *fd_end = cursor; + bool has_fd = (fd_end > fd_start); + + // Try consuming a given character. + // Return true if consumed. On success, advances cursor. + auto try_consume = [&cursor](wchar_t c) -> bool { + if (*cursor != c) return false; + cursor++; + return true; + }; + + // Like try_consume, but asserts on failure. + auto consume = [&](wchar_t c) { + assert(*cursor == c && "Failed to consume char"); + cursor++; + }; + + switch (*cursor) { + case L'|': { + if (has_fd) { + // Like 123| + return none(); } - case L'<': { - result.fd = STDIN_FILENO; - break; + consume(L'|'); + assert(*cursor != L'|' && + "|| passed as redirection, this should have been handled as 'or' by the caller"); + result.fd = STDOUT_FILENO; + result.is_pipe = true; + break; + } + case L'>': { + consume(L'>'); + if (try_consume(L'|')) { + // Note we differ from bash here. + // Consider `echo foo 2>| bar` + // In fish, this is a *pipe*. Run bar as a command and attach foo's stderr to bar's + // stdin, while leaving stdout as tty. + // In bash, this is a *redirection* to bar as a file. It is like > but ignores + // noclobber. + result.is_pipe = true; + result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 2>| + : STDOUT_FILENO; // like >| + } else if (try_consume(L'&')) { + // This is a redirection to an fd. + result.mode = redirection_mode_t::fd; + result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1>&2 + : STDOUT_FILENO; // like >&2 + } else { + // This is a redirection to a file. + result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1> file.txt + : STDOUT_FILENO; // like > file.txt + // Note 'echo abc >>? file' is valid: it means append and noclobber. + // But here "noclobber" means the file must not exist, so appending + // can be ignored. + result.mode = redirection_mode_t::overwrite; + if (try_consume(L'>')) result.mode = redirection_mode_t::append; + if (try_consume(L'?')) result.mode = redirection_mode_t::noclob; } - case L'^': { - if (caret_redirs()) { - result.fd = STDERR_FILENO; - } else { - errored = true; + break; + } + case L'<': { + consume(L'<'); + if (try_consume('&')) { + result.mode = redirection_mode_t::fd; + } else { + result.mode = redirection_mode_t::input; + } + result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1<&3 or 1< /tmp/file.txt + : STDIN_FILENO; // like <&3 or < /tmp/file.txt + break; + } + case L'^': { + if (!caret_redirs()) { + // ^ is not special if caret_redirs is disabled. + return none(); + } else { + if (has_fd) { + return none(); } - break; - } - default: { - errored = true; + consume(L'^'); + result.fd = STDERR_FILENO; + result.mode = redirection_mode_t::overwrite; + if (try_consume(L'^')) result.mode = redirection_mode_t::append; + if (try_consume(L'?')) result.mode = redirection_mode_t::noclob; break; } } - } - // Either way we should have ended on the redirection character itself like '>'. - // Don't allow an fd with a caret redirection - see #1873 - wchar_t redirect_char = buff[idx++]; // note increment of idx - if (redirect_char == L'>' || (redirect_char == L'^' && idx == 1 && caret_redirs())) { - result.redirection_mode = redirection_type_t::overwrite; - if (buff[idx] == redirect_char) { - // Doubled up like ^^ or >>. That means append. - result.redirection_mode = redirection_type_t::append; - idx++; + default: { + // Not a redirection. + return none(); } - } else if (redirect_char == L'<') { - result.redirection_mode = redirection_type_t::input; - } else { - // Something else. - errored = true; } - // Bail on error. - if (errored) { - return none(); - } - - // Optional characters like & or ?, or the pipe char |. - wchar_t opt_char = buff[idx]; - if (opt_char == L'&') { - result.redirection_mode = redirection_type_t::fd; - idx++; - } else if (opt_char == L'?') { - result.redirection_mode = redirection_type_t::noclob; - idx++; - } else if (opt_char == L'|') { - // So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets - // handled elsewhere. - result.type = token_type_t::pipe; - idx++; - } - - result.consumed = idx; + result.consumed = (cursor - buff); + assert(result.consumed > 0 && "Should have consumed at least one character on success"); return result; } -maybe_t redirection_type_for_string(const wcstring &str, int *out_fd) { - auto v = read_redirection_or_fd_pipe(str.c_str()); - // Redirections only, no pipes. - if (!v || v->type != token_type_t::redirect || v->fd < 0) return none(); - if (out_fd) *out_fd = v->fd; - return v->redirection_mode; -} - -int fd_redirected_by_pipe(const wcstring &str) { - // Hack for the common case. - if (str == L"|") { - return STDOUT_FILENO; - } - auto v = read_redirection_or_fd_pipe(str.c_str()); - return (v && v->type == token_type_t::pipe) ? v->fd : -1; -} - -int oflags_for_redirection_type(redirection_type_t type) { - switch (type) { - case redirection_type_t::append: { +int pipe_or_redir_t::oflags() const { + switch (mode) { + case redirection_mode_t::append: { return O_CREAT | O_APPEND | O_WRONLY; } - case redirection_type_t::overwrite: { + case redirection_mode_t::overwrite: { return O_CREAT | O_WRONLY | O_TRUNC; } - case redirection_type_t::noclob: { + case redirection_mode_t::noclob: { return O_CREAT | O_EXCL | O_WRONLY; } - case redirection_type_t::input: { + case redirection_mode_t::input: { return O_RDONLY; } - case redirection_type_t::fd: + case redirection_mode_t::fd: default: { return -1; } } } -/// Test if a character is whitespace. Differs from iswspace in that it does not consider a newline -/// to be whitespace. +/// Test if a character is whitespace. Differs from iswspace in that it does not consider a +/// newline to be whitespace. static bool iswspace_not_nl(wchar_t c) { switch (c) { case L' ': @@ -430,7 +457,8 @@ maybe_t tokenizer_t::next() { return none(); } - // Consume non-newline whitespace. If we get an escaped newline, mark it and continue past it. + // Consume non-newline whitespace. If we get an escaped newline, mark it and continue past + // it. bool preceding_escaped_nl = false; for (;;) { if (this->buff[0] == L'\\' && this->buff[1] == L'\n') { @@ -512,23 +540,27 @@ maybe_t tokenizer_t::next() { result->length = 2; this->buff += 2; } else { - result.emplace(token_type_t::pipe); + auto pipe = pipe_or_redir_t::from_string(buff); + assert(pipe.has_value() && pipe->is_pipe && + "Should always succeed to parse a | pipe"); + result.emplace(pipe->token_type()); result->offset = start_pos; - result->length = 1; - this->buff++; + result->length = pipe->consumed; + this->buff += pipe->consumed; } break; } case L'>': case L'<': { - // There's some duplication with the code in the default case below. The key difference - // here is that we must never parse these as a string; a failed redirection is an error! - auto redir_or_pipe = read_redirection_or_fd_pipe(this->buff); + // There's some duplication with the code in the default case below. The key + // difference here is that we must never parse these as a string; a failed + // redirection is an error! + auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff); if (!redir_or_pipe || redir_or_pipe->fd < 0) { return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, this->buff); } - result.emplace(redir_or_pipe->type); + result.emplace(redir_or_pipe->token_type()); result->offset = start_pos; result->length = redir_or_pipe->consumed; this->buff += redir_or_pipe->consumed; @@ -537,20 +569,20 @@ maybe_t tokenizer_t::next() { default: { // Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string. const wchar_t *error_location = this->buff; - maybe_t redir_or_pipe; + maybe_t redir_or_pipe{}; if (iswdigit(*this->buff) || (*this->buff == L'^' && caret_redirs())) { - redir_or_pipe = read_redirection_or_fd_pipe(this->buff); + redir_or_pipe = pipe_or_redir_t::from_string(this->buff); } - if (redir_or_pipe && redir_or_pipe->consumed > 0) { + if (redir_or_pipe) { // It looks like a redirection or a pipe. But we don't support piping fd 0. Note - // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer - // error. - if (redir_or_pipe->type == token_type_t::pipe && redir_or_pipe->fd == 0) { + // that fd 0 may be -1, indicating overflow; but we don't treat that as a + // tokenizer error. + if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) { return this->call_error(tokenizer_error_t::invalid_pipe, error_location, error_location); } - result.emplace(redir_or_pipe->type); + result.emplace(redir_or_pipe->token_type()); result->offset = start_pos; result->length = redir_or_pipe->consumed; this->buff += redir_or_pipe->consumed; @@ -637,7 +669,8 @@ bool move_word_state_machine_t::consume_char_punctuation(wchar_t c) { bool move_word_state_machine_t::is_path_component_character(wchar_t c) { // Always treat separators as first. All this does is ensure that we treat ^ as a string - // character instead of as stderr redirection, which I hypothesize is usually what is desired. + // character instead of as stderr redirection, which I hypothesize is usually what is + // desired. return tok_is_string_character(c, true) && !std::wcschr(L"/={,}'\":@", c); } diff --git a/src/tokenizer.h b/src/tokenizer.h index 29606db6d..91fd55993 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -22,7 +22,7 @@ enum class token_type_t { comment, /// comment token }; -enum class redirection_type_t { +enum class redirection_mode_t { overwrite, // normal redirection: > file.txt append, // appending redirection: >> file.txt input, // input redirection: < file.txt @@ -138,14 +138,44 @@ class tokenizer_t { /// returns the empty string. wcstring tok_first(const wcstring &str); -/// Helper function to determine redirection type from a string. Also returns the fd by reference. -maybe_t redirection_type_for_string(const wcstring &str, int *out_fd = NULL); +/// Struct wrapping up a parsed pipe or redirection. +struct pipe_or_redir_t { + // The redirected fd, or -1 on overflow. + // In the common case of a pipe, this is 0 (STDOUT_FILENO). + // For example, in the case of "3>&1" this will be 3. + int fd{0}; -/// Helper function to determine which fd is redirected by a pipe. -int fd_redirected_by_pipe(const wcstring &str); + // Whether we are a pipe (true) or redirection (false). + bool is_pipe{false}; -/// Helper function to return oflags (as in open(2)) for a redirection type. -int oflags_for_redirection_type(redirection_type_t type); + // The redirection mode if the type is redirect. + // Ignored for pipes. + redirection_mode_t mode{redirection_mode_t::overwrite}; + + // Number of characters consumed when parsing the string. + size_t consumed{0}; + + // Construct from a string. + static maybe_t from_string(const wchar_t *buff); + static maybe_t from_string(const wcstring &buff) { + return from_string(buff.c_str()); + } + + // \return the oflags (as in open(2)) for this redirection. + int oflags() const; + + // \return if we are "valid". Here "valid" means only that the source fd did not overflow. + // For example 99999999999> is invalid. + bool is_valid() const { return fd >= 0; } + + // \return the token type for this redirection. + token_type_t token_type() const { + return is_pipe ? token_type_t::pipe : token_type_t::redirect; + } + + private: + pipe_or_redir_t(); +}; enum move_word_style_t { move_word_style_punctuation, // stop at punctuation