From 99fb7bb6aae2e8914174006ad19768afc935e2f2 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 23 Feb 2018 15:19:58 -0800 Subject: [PATCH] Refactor how redirections are represented by the tokenizer Prior to this fix, each redirection type was a separate token_type. Unify these under a single type TOK_REDIRECT and break the redirection type out into a new sub-type redirection_type_t. --- src/fish_tests.cpp | 27 ++++---- src/highlight.cpp | 34 +++++----- src/parse_execution.cpp | 22 ++----- src/parse_tree.cpp | 6 +- src/tnode.cpp | 7 +- src/tnode.h | 7 +- src/tokenizer.cpp | 143 +++++++++++++++++++--------------------- src/tokenizer.h | 32 +++++---- 8 files changed, 131 insertions(+), 147 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 880bf29ef..e5d21e585 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -536,10 +536,9 @@ static void test_tokenizer() { L"string &1 'nested \"quoted\" '(string containing subshells " L"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect " L"Compress_Newlines\n \n\t\n \nInto_Just_One"; - const int types[] = {TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, - TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, - TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, - TOK_STRING}; + const int types[] = {TOK_STRING, TOK_REDIRECT, TOK_STRING, TOK_REDIRECT, TOK_STRING, + TOK_STRING, TOK_STRING, TOK_REDIRECT, TOK_REDIRECT, TOK_STRING, + TOK_STRING, TOK_END, TOK_STRING}; say(L"Test correct tokenization"); @@ -594,25 +593,25 @@ static void test_tokenizer() { } // Test redirection_type_for_string. - if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) + if (redirection_type_for_string(L"<") != redirection_type_t::input) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT) + if (redirection_type_for_string(L"^") != redirection_type_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT) + if (redirection_type_for_string(L">") != redirection_type_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT) + if (redirection_type_for_string(L"2>") != redirection_type_t::overwrite) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND) + if (redirection_type_for_string(L">>") != redirection_type_t::append) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND) + if (redirection_type_for_string(L"2>>") != redirection_type_t::append) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB) + if (redirection_type_for_string(L"2>?") != redirection_type_t::noclob) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE) + if (redirection_type_for_string(L"9999999999999999>?")) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD) + if (redirection_type_for_string(L"2>&3") != redirection_type_t::fd) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (redirection_type_for_string(L"2>|") != TOK_NONE) + if (redirection_type_for_string(L"2>|")) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); } diff --git a/src/highlight.cpp b/src/highlight.cpp index 58bca6d13..cc69010ec 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -836,11 +836,11 @@ void highlighter_t::color_redirection(tnode_t redirection_node) if (redir_prim) { wcstring target; - const enum token_type redirect_type = + const maybe_t redirect_type = redirection_type(redirection_node, this->buff, nullptr, &target); - // We may get a TOK_NONE redirection type, e.g. if the redirection is invalid. - auto hl = redirect_type == TOK_NONE ? highlight_spec_error : highlight_spec_redirection; + // We may get a missing redirection type if the redirection is invalid. + auto hl = redirect_type ? highlight_spec_redirection : highlight_spec_error; this->color_node(redir_prim, hl); // Check if the argument contains a command substitution. If so, highlight it as a param @@ -852,7 +852,10 @@ void highlighter_t::color_redirection(tnode_t redirection_node) // disallow redirections into a non-existent directory. bool target_is_valid = true; - if (!this->io_ok) { + if (!redirect_type) { + // not a valid redirection + target_is_valid = false; + } else if (!this->io_ok) { // I/O is disallowed, so we don't have much hope of catching anything but gross // errors. Assume it's valid. target_is_valid = true; @@ -865,22 +868,22 @@ void highlighter_t::color_redirection(tnode_t redirection_node) // redirections). Note that the target is now unescaped. const wcstring target_path = path_apply_working_directory(target, this->working_directory); - switch (redirect_type) { - case TOK_REDIRECT_FD: { + switch (*redirect_type) { + case redirection_type_t::fd: { int fd = fish_wcstoi(target.c_str()); target_is_valid = !errno && fd >= 0; break; } - case TOK_REDIRECT_IN: { + case redirection_type_t::input: { // Input redirections must have a readable non-directory. struct stat buf = {}; target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode); break; } - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_NOCLOB: { + case redirection_type_t::overwrite: + case redirection_type_t::append: + case redirection_type_t::noclob: { // Test whether the file exists, and whether it's writable (possibly after // creating it). access() returns failure if the file does not exist. bool file_exists = false, file_is_writable = false; @@ -922,14 +925,9 @@ void highlighter_t::color_redirection(tnode_t redirection_node) } // NOCLOB means that we must not overwrite files that exist. - target_is_valid = file_is_writable && - !(file_exists && redirect_type == TOK_REDIRECT_NOCLOB); - break; - } - default: { - // We should not get here, since the node was marked as a redirection, but - // treat it as an error for paranoia. - target_is_valid = false; + target_is_valid = + file_is_writable && + !(file_exists && redirect_type == redirection_type_t::noclob); break; } } diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index 428e088b9..6116424ab 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -871,8 +871,7 @@ bool parse_execution_context_t::determine_io_chain(tnode_t()) { int source_fd = -1; // source fd wcstring target; // file path or target fd - enum token_type redirect_type = - redirection_type(redirect_node, pstree->src, &source_fd, &target); + auto redirect_type = redirection_type(redirect_node, pstree->src, &source_fd, &target); // PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0, NULL); @@ -884,9 +883,9 @@ bool parse_execution_context_t::determine_io_chain(tnode_t new_io; - assert(redirect_type != TOK_NONE); - switch (redirect_type) { - case TOK_REDIRECT_FD: { + assert(redirect_type && "expected to have a valid redirection"); + switch (*redirect_type) { + case redirection_type_t::fd: { if (target == L"-") { new_io.reset(new io_close_t(source_fd)); } else { @@ -902,21 +901,12 @@ bool parse_execution_context_t::determine_io_chain(tnode_t(stmt.tag()); } -enum token_type redirection_type(tnode_t redirection, const wcstring &src, - int *out_fd, wcstring *out_target) { +maybe_t redirection_type(tnode_t redirection, + const wcstring &src, int *out_fd, + wcstring *out_target) { assert(redirection && "redirection is missing"); - enum token_type result = TOK_NONE; + maybe_t result{}; tnode_t prim = redirection.child<0>(); // like 2> assert(prim && "expected to have primitive"); diff --git a/src/tnode.h b/src/tnode.h index 95c8f129f..c7edc5da6 100644 --- a/src/tnode.h +++ b/src/tnode.h @@ -234,9 +234,10 @@ parse_statement_decoration_t get_decoration(tnode_t st /// Return the type for a boolean statement. enum parse_bool_statement_type_t bool_statement_type(tnode_t stmt); -/// Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd). -enum token_type redirection_type(tnode_t redirection, const wcstring &src, - int *out_fd, wcstring *out_target); +/// Given a redirection, get the redirection type (or none) and target (file path, or fd). +maybe_t redirection_type(tnode_t redirection, + const wcstring &src, int *out_fd, + wcstring *out_target); /// Return the arguments under an arguments_list or arguments_or_redirection_list /// Do not return more than max. diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 810d68ffa..fdcf052c7 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -323,15 +323,25 @@ tok_t tokenizer_t::read_string() { return result; } -/// Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were -/// consumed. If zero, then this string was not a redirection. Also returns by reference the -/// redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1. -static size_t read_redirection_or_fd_pipe(const wchar_t *buff, - enum token_type *out_redirection_mode, int *out_fd) { - bool errored = false; - int fd = 0; - enum token_type redirection_mode = TOK_NONE; +// Reads a redirection or an "fd pipe" (like 2>|) from a string. +// Returns the parsed pipe or redirection, or none() on error. +struct parsed_redir_or_pipe_t { + // Number of characters consumed. + size_t consumed{0}; + // The token type, always either TOK_PIPE or TOK_REDIRECT. + token_type type{TOK_REDIRECT}; + + // The redirection mode if the type is TOK_REDIRECT. + redirection_type_t redirection_mode{redirection_type_t::overwrite}; + + // The redirected fd, or -1 on overflow. + int fd{0}; +}; + +static maybe_t read_redirection_or_fd_pipe(const wchar_t *buff) { + bool errored = false; + parsed_redir_or_pipe_t result; size_t idx = 0; // Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like @@ -343,21 +353,21 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, if (big_fd <= INT_MAX) big_fd = big_fd * 10 + (buff[idx] - L'0'); } - fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); + result.fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); if (idx == 0) { // We did not find a leading digit, so there's no explicit fd. Infer it from the type. switch (buff[idx]) { case L'>': { - fd = STDOUT_FILENO; + result.fd = STDOUT_FILENO; break; } case L'<': { - fd = STDIN_FILENO; + result.fd = STDIN_FILENO; break; } case L'^': { - fd = STDERR_FILENO; + result.fd = STDERR_FILENO; break; } default: { @@ -371,55 +381,49 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, // Don't allow an fd with a caret redirection - see #1873 wchar_t redirect_char = buff[idx++]; // note increment of idx if (redirect_char == L'>' || (redirect_char == L'^' && idx == 1)) { - redirection_mode = TOK_REDIRECT_OUT; + result.redirection_mode = redirection_type_t::overwrite; if (buff[idx] == redirect_char) { // Doubled up like ^^ or >>. That means append. - redirection_mode = TOK_REDIRECT_APPEND; + result.redirection_mode = redirection_type_t::append; idx++; } } else if (redirect_char == L'<') { - redirection_mode = TOK_REDIRECT_IN; + result.redirection_mode = redirection_type_t::input; } else { // Something else. errored = true; } - // Don't return valid-looking stuff on error. + // Bail on error. if (errored) { - idx = 0; - redirection_mode = TOK_NONE; - } else { - // Optional characters like & or ?, or the pipe char |. - wchar_t opt_char = buff[idx]; - if (opt_char == L'&') { - redirection_mode = TOK_REDIRECT_FD; - idx++; - } else if (opt_char == L'?') { - redirection_mode = TOK_REDIRECT_NOCLOB; - idx++; - } else if (opt_char == L'|') { - // So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets - // handled elsewhere. - redirection_mode = TOK_PIPE; - idx++; - } + return none(); } - // Return stuff. - if (out_redirection_mode != NULL) *out_redirection_mode = redirection_mode; - if (out_fd != NULL) *out_fd = fd; + // Optional characters like & or ?, or the pipe char |. + wchar_t opt_char = buff[idx]; + if (opt_char == L'&') { + result.redirection_mode = redirection_type_t::fd; + idx++; + } else if (opt_char == L'?') { + result.redirection_mode = redirection_type_t::noclob; + idx++; + } else if (opt_char == L'|') { + // So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets + // handled elsewhere. + result.type = TOK_PIPE; + idx++; + } - return idx; + result.consumed = idx; + return result; } -enum token_type redirection_type_for_string(const wcstring &str, int *out_fd) { - enum token_type mode = TOK_NONE; - int fd = 0; - read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); +maybe_t redirection_type_for_string(const wcstring &str, int *out_fd) { + auto v = read_redirection_or_fd_pipe(str.c_str()); // Redirections only, no pipes. - if (mode == TOK_PIPE || fd < 0) mode = TOK_NONE; - if (out_fd != NULL) *out_fd = fd; - return mode; + if (!v || v->type != TOK_REDIRECT || v->fd < 0) return none(); + if (out_fd) *out_fd = v->fd; + return v->redirection_mode; } int fd_redirected_by_pipe(const wcstring &str) { @@ -427,27 +431,22 @@ int fd_redirected_by_pipe(const wcstring &str) { if (str == L"|") { return STDOUT_FILENO; } - - enum token_type mode = TOK_NONE; - int fd = 0; - read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); - // Pipes only. - if (mode != TOK_PIPE || fd < 0) fd = -1; - return fd; + auto v = read_redirection_or_fd_pipe(str.c_str()); + return (v && v->type == TOK_PIPE) ? v->fd : -1; } -int oflags_for_redirection_type(enum token_type type) { +int oflags_for_redirection_type(redirection_type_t type) { switch (type) { - case TOK_REDIRECT_APPEND: { + case redirection_type_t::append: { return O_CREAT | O_APPEND | O_WRONLY; } - case TOK_REDIRECT_OUT: { + case redirection_type_t::overwrite: { return O_CREAT | O_WRONLY | O_TRUNC; } - case TOK_REDIRECT_NOCLOB: { + case redirection_type_t::noclob: { return O_CREAT | O_EXCL | O_WRONLY; } - case TOK_REDIRECT_IN: { + case redirection_type_t::input: { return O_RDONLY; } default: { return -1; } @@ -551,39 +550,35 @@ maybe_t tokenizer_t::tok_next() { case L'^': { // There's some duplication with the code in the default case below. The key difference // here is that we must never parse these as a string; a failed redirection is an error! - enum token_type mode = TOK_NONE; - int fd = -1; - size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd); - if (consumed == 0 || fd < 0) { + auto redir_or_pipe = read_redirection_or_fd_pipe(this->buff); + if (!redir_or_pipe || redir_or_pipe->fd < 0) { return this->call_error(TOK_INVALID_REDIRECT, this->buff, this->buff); } - result.type = mode; - result.redirected_fd = fd; - result.length = consumed; - this->buff += consumed; + result.type = redir_or_pipe->type; + result.redirected_fd = redir_or_pipe->fd; + result.length = redir_or_pipe->consumed; + this->buff += redir_or_pipe->consumed; break; } default: { // Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string. const wchar_t *error_location = this->buff; - size_t consumed = 0; - enum token_type mode = TOK_NONE; - int fd = -1; + maybe_t redir_or_pipe; if (iswdigit(*this->buff)) { - consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd); + redir_or_pipe = read_redirection_or_fd_pipe(this->buff); } - if (consumed > 0) { + if (redir_or_pipe && redir_or_pipe->consumed > 0) { // It looks like a redirection or a pipe. But we don't support piping fd 0. Note // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer // error. - if (mode == TOK_PIPE && fd == 0) { + if (redir_or_pipe->type == TOK_PIPE && redir_or_pipe->fd == 0) { return this->call_error(TOK_INVALID_PIPE, error_location, error_location); } - result.type = mode; - result.redirected_fd = fd; - result.length = consumed; - this->buff += consumed; + result.type = redir_or_pipe->type; + result.redirected_fd = redir_or_pipe->fd; + result.length = redir_or_pipe->consumed; + this->buff += redir_or_pipe->consumed; } else { // Not a redirection or pipe, so just a string. result = this->read_string(); diff --git a/src/tokenizer.h b/src/tokenizer.h index 21c8d038f..e95bd73c6 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -10,18 +10,14 @@ /// Token types. enum token_type { - TOK_NONE, /// Tokenizer not yet constructed - TOK_ERROR, /// Error reading token - TOK_STRING, /// String token - TOK_PIPE, /// Pipe token - TOK_END, /// End token (semicolon or newline, not literal end) - TOK_REDIRECT_OUT, /// redirection token - TOK_REDIRECT_APPEND, /// redirection append token - TOK_REDIRECT_IN, /// input redirection token - TOK_REDIRECT_FD, /// redirection to new fd token - TOK_REDIRECT_NOCLOB, /// redirection token - TOK_BACKGROUND, /// send job to bg token - TOK_COMMENT /// comment token + TOK_NONE, /// Tokenizer not yet constructed + TOK_ERROR, /// Error reading token + TOK_STRING, /// String token + TOK_PIPE, /// Pipe token + TOK_END, /// End token (semicolon or newline, not literal end) + TOK_REDIRECT, /// redirection token + TOK_BACKGROUND, /// send job to bg token + TOK_COMMENT /// comment token }; /// Tokenizer error types. @@ -35,6 +31,14 @@ enum tokenizer_error { TOK_INVALID_PIPE }; +enum class redirection_type_t { + overwrite, // normal redirection: > file.txt + append, // appending redirection: >> file.txt + input, // input redirection: < file.txt + fd, // fd redirection: 2>&1 + noclob // noclobber redirection: >? file.txt +}; + /// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching /// paranthesis, etc. This is useful for tab-completion. #define TOK_ACCEPT_UNFINISHED 1 @@ -127,13 +131,13 @@ wcstring tok_first(const wcstring &str); /// Helper function to determine redirection type from a string, or TOK_NONE if the redirection is /// invalid. Also returns the fd by reference. -enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL); +maybe_t redirection_type_for_string(const wcstring &str, int *out_fd = NULL); /// Helper function to determine which fd is redirected by a pipe. int fd_redirected_by_pipe(const wcstring &str); /// Helper function to return oflags (as in open(2)) for a redirection type. -int oflags_for_redirection_type(enum token_type type); +int oflags_for_redirection_type(redirection_type_t type); enum move_word_style_t { move_word_style_punctuation, // stop at punctuation