diff --git a/fish_tests.cpp b/fish_tests.cpp index b52b612d1..e013743ec 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -396,6 +396,18 @@ static void test_tok() } } } + + /* Test redirection_type_for_string */ + if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>|") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); } static int test_fork_helper(void *unused) @@ -2182,9 +2194,59 @@ static void test_highlighting(void) {L")", HIGHLIGHT_OPERATOR}, {NULL, -1} }; + + // Redirections substitutions + const highlight_component_t components8[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + + /* Input redirection */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/bin/echo", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to a valid fd */ + {L"1>&2", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to an invalid fd */ + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"LOL", HIGHLIGHT_ERROR}, + + /* Just a param, not a redirection */ + {L"/tmp/blah", HIGHLIGHT_PARAM}, + + /* Input redirection from directory */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/tmp/", HIGHLIGHT_ERROR}, + + /* Output redirection to an invalid path */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/not/a/valid/path/nope", HIGHLIGHT_ERROR}, + + /* Output redirection to directory */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/tmp/nope/", HIGHLIGHT_ERROR}, + + + /* Redirections to overflow fd */ + {L"99999999999999999999>&2", HIGHLIGHT_ERROR}, + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"99999999999999999999", HIGHLIGHT_ERROR}, + + /* Output redirection containing a command substitution */ + {L"4>", HIGHLIGHT_REDIRECTION}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/somewhere", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + + /* Just another param */ + {L"param2", HIGHLIGHT_PARAM}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; @@ -2206,14 +2268,7 @@ static void test_highlighting(void) expected_colors.push_back(0); } text.append(components[i].txt); - - // hackish space handling - const size_t text_len = wcslen(components[i].txt); - for (size_t j=0; j < text_len; j++) - { - bool is_space = (components[i].txt[j] == L' '); - expected_colors.push_back(is_space ? 0 : components[i].color); - } + expected_colors.resize(text.size(), components[i].color); } assert(expected_colors.size() == text.size()); @@ -2227,6 +2282,10 @@ static void test_highlighting(void) assert(expected_colors.size() == colors.size()); for (size_t i=0; i < text.size(); i++) { + // Hackish space handling. We don't care about the colors in spaces. + if (text.at(i) == L' ') + continue; + if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); @@ -2248,7 +2307,7 @@ int main(int argc, char **argv) configure_thread_assertions_for_testing(); program_name=L"(ignore)"; - s_arguments = argv; + s_arguments = argv + 1; say(L"Testing low-level functionality"); set_main_thread(); @@ -2262,7 +2321,7 @@ int main(int argc, char **argv) if (should_test_function("highlighting")) test_highlighting(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); - if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); + //if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser")) test_new_parser(); diff --git a/highlight.cpp b/highlight.cpp index c4ad7d92e..3acaf4968 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "fallback.h" #include "util.h" @@ -1692,9 +1693,15 @@ class highlighter_t /* Color an argument */ void color_argument(const parse_node_t &node); + /* Color a redirection */ + void color_redirection(const parse_node_t &node); + /* Color the arguments of the given node */ void color_arguments(const parse_node_t &list_node); + /* Color the redirections of the given node */ + void color_redirections(const parse_node_t &list_node); + /* Color all the children of the command with the given type */ void color_children(const parse_node_t &parent, parse_token_type_t type, int color); @@ -1729,6 +1736,7 @@ void highlighter_t::color_node(const parse_node_t &node, int color) std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); } +/* node does not necessarily have type symbol_argument here */ void highlighter_t::color_argument(const parse_node_t &node) { if (! node.has_source()) @@ -1819,7 +1827,7 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) /* Find all the arguments of this list */ const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); - for (node_offset_t i=0; i < nodes.size(); i++) + for (size_t i=0; i < nodes.size(); i++) { const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); @@ -1841,6 +1849,141 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) } } +void highlighter_t::color_redirection(const parse_node_t &redirection_node) +{ + assert(redirection_node.type == symbol_redirection); + if (! redirection_node.has_source()) + return; + + const parse_node_t *redirection_primitive = this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->parse_tree.get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL) + { + wcstring target; + const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, &target); + + /* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */ + this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? HIGHLIGHT_ERROR : HIGHLIGHT_REDIRECTION); + + /* Check if the argument contains a command substitution. If so, highlight it as a param even though it's a command redirection, and don't try to do any other validation. */ + if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0) + { + if (redirection_target != NULL) + this->color_argument(*redirection_target); + } + else + { + /* No command substitution, so we can highlight the target file or fd. For example, disallow redirections into a non-existent directory */ + bool target_is_valid = true; + + if (! expand_one(target, EXPAND_SKIP_CMDSUBST)) + { + /* Could not be expanded */ + target_is_valid = false; + } + else + { + /* Ok, we successfully expanded our target. Now verify that it works with this redirection. We will probably need it as a path (but not in the case of fd redirections */ + const wcstring target_path = apply_working_directory(target, this->working_directory); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + /* target should be an fd. It must be all digits, and must not overflow. fish_wcstoi returns INT_MAX on overflow; we could instead check errno to disambiguiate this from a real INT_MAX fd, but instead we just disallow that. */ + const wchar_t *target_cstr = target.c_str(); + wchar_t *end = NULL; + int fd = fish_wcstoi(target_cstr, &end, 10); + + /* The iswdigit check ensures there's no leading whitespace, the *end check ensures the entire string was consumed, and the numeric checks ensure the fd is at least zero and there was no overflow */ + target_is_valid = (iswdigit(target_cstr[0]) && *end == L'\0' && fd >= 0 && fd < INT_MAX); + } + break; + + case TOK_REDIRECT_IN: + { + /* Input redirections must have a readable non-directory */ + struct stat buf = {}; + target_is_valid = ! waccess(target_path, R_OK) && ! wstat(target_path, &buf) && ! S_ISDIR(buf.st_mode); + } + break; + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_NOCLOB: + { + /* Test whether the file exists, and whether it's writable (possibly after creating it). access() returns failure if the file does not exist. */ + bool file_exists = false, file_is_writable = false; + int err = 0; + + struct stat buf = {}; + if (wstat(target_path, &buf) < 0) + { + err = errno; + } + + if (string_suffixes_string(L"/", target)) + { + /* Redirections to things that are directories is definitely not allowed */ + file_exists = false; + file_is_writable = false; + } + else if (err == 0) + { + /* No err. We can write to it if it's not a directory and we have permission */ + file_exists = true; + file_is_writable = ! S_ISDIR(buf.st_mode) && ! waccess(target_path, W_OK); + } + else if (err == ENOENT) + { + /* File does not exist. Check if its parent directory is writable. */ + wcstring parent = wdirname(target_path); + + /* Ensure that the parent ends with the path separator. This will ensure that we get an error if the parent directory is not really a directory. */ + if (! string_suffixes_string(L"/", parent)) + parent.push_back(L'/'); + + /* Now the file is considered writable if the parent directory is writable */ + file_exists = false; + file_is_writable = (0 == waccess(parent, W_OK)); + } + else + { + /* Other errors we treat as not writable. This includes things like ENOTDIR. */ + file_exists = false; + file_is_writable = false; + } + + /* NOCLOB means that we must not overwrite files that exist */ + target_is_valid = file_is_writable && ! (file_exists && redirect_type == TOK_REDIRECT_NOCLOB); + } + break; + + default: + /* We should not get here, since the node was marked as a redirection, but treat it as an error for paranoia */ + target_is_valid = false; + break; + } + } + + if (redirection_target != NULL) + { + this->color_node(*redirection_target, target_is_valid ? HIGHLIGHT_REDIRECTION : HIGHLIGHT_ERROR); + } + } + } +} + +// Color all of the redirections of the given command +void highlighter_t::color_redirections(const parse_node_t &list_node) +{ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_redirection); + for (size_t i=0; i < nodes.size(); i++) + { + this->color_redirection(*nodes.at(i)); + } +} + /* Color all the children of the command with the given type */ void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) { @@ -1950,12 +2093,6 @@ const highlighter_t::color_array_t & highlighter_t::highlight() } break; - case symbol_redirection: - { - this->color_children(node, parse_token_type_string, HIGHLIGHT_REDIRECTION); - } - break; - case parse_token_type_background: case parse_token_type_end: { @@ -1994,6 +2131,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() if (parse_tree.argument_list_is_root(node)) { this->color_arguments(node); + this->color_redirections(node); } } break; diff --git a/parse_productions.cpp b/parse_productions.cpp index 528ca3cea..227955453 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -398,7 +398,7 @@ RESOLVE(arguments_or_redirections_list) PRODUCTIONS(argument_or_redirection) = { {symbol_argument}, - {parse_token_type_redirection} + {symbol_redirection} }; RESOLVE(argument_or_redirection) { @@ -421,7 +421,7 @@ RESOLVE_ONLY(argument) PRODUCTIONS(redirection) = { - {parse_token_type_redirection} + {parse_token_type_redirection, parse_token_type_string} }; RESOLVE_ONLY(redirection) diff --git a/parse_tree.cpp b/parse_tree.cpp index 87e2b3dc0..97421dab1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -519,8 +519,11 @@ void parse_ll_t::determine_node_ranges(void) for (node_offset_t i=0; i < parent->child_count; i++) { const parse_node_t &child = nodes.at(parent->child_offset(i)); - min_start = std::min(min_start, child.source_start); - max_end = std::max(max_end, child.source_start + child.source_length); + if (child.has_source()) + { + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } } if (min_start != source_start_invalid) @@ -691,6 +694,10 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) err_node.source_length = token1.source_length; nodes.push_back(err_node); consumed = true; + + /* tokenizer errors are fatal */ + if (token1.type == parse_special_type_tokenizer_error) + this->fatal_errored = true; } while (! consumed && ! this->fatal_errored) @@ -811,7 +818,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) parse_token_t result; - /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard ant it even starts to look like a feature. */ + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */ result.type = parse_token_type_from_tokenizer_token(tok_type); result.keyword = keyword_for_token(tok_type, tok_txt); result.has_dash_prefix = (tok_txt[0] == L'-'); @@ -906,6 +913,7 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo bool wants_errors = (errors != NULL); this->parser->set_should_generate_error_messages(wants_errors); + /* Passing invalid_token here is totally wrong. This code is only used in testing however. */ this->parser->accept_tokens(token, invalid_token); return ! this->parser->has_fatal_error(); @@ -1083,3 +1091,21 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co } return result; } + +enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const +{ + assert(redirection_node.type == symbol_redirection); + enum token_type result = TOK_NONE; + const parse_node_t *redirection_primitive = this->get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL && redirection_primitive->has_source()) + { + result = redirection_type_for_string(redirection_primitive->get_source(src)); + } + if (out_target != NULL) + { + *out_target = redirection_target ? redirection_target->get_source(src) : L""; + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 62ffb622a..79cae8ccb 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -273,7 +273,9 @@ public: /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; - + + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; }; /* Fish grammar: @@ -332,7 +334,8 @@ public: argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection argument = - redirection = + + redirection = terminator = | diff --git a/tokenizer.cpp b/tokenizer.cpp index 8a6fe58a8..2416ce9d6 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -435,9 +435,11 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } + + /* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. - Also returns by reference the redirection mode, and the fd to redirection. + Also returns by reference the redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1. */ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { @@ -447,13 +449,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * size_t idx = 0; - /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character. Watch out for overflow. */ + long long big_fd = 0; for (; iswdigit(buff[idx]); idx++) { - int digit = buff[idx] - L'0'; - fd = fd * 10 + digit; + /* Note that it's important we consume all the digits here, even if it overflows. */ + if (big_fd <= INT_MAX) + big_fd = big_fd * 10 + (buff[idx] - L'0'); } + fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); + if (idx == 0) { /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ @@ -523,6 +529,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * return idx; } +enum token_type redirection_type_for_string(const wcstring &str) +{ + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Redirections only, no pipes */ + if (mode == TOK_PIPE || fd < 0) + mode = TOK_NONE; + return mode; +} + wchar_t tok_last_quote(tokenizer_t *tok) { CHECK(tok, 0); @@ -639,7 +656,7 @@ void tok_next(tokenizer_t *tok) enum token_type mode = TOK_NONE; int fd = -1; size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); - if (consumed == 0) + if (consumed == 0 || fd < 0) { TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); } @@ -663,7 +680,7 @@ void tok_next(tokenizer_t *tok) if (consumed > 0) { - /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + /* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */ if (mode == TOK_PIPE && fd == 0) { TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); diff --git a/tokenizer.h b/tokenizer.h index dec206a58..8e130f0e7 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -187,6 +187,9 @@ const wchar_t *tok_get_desc(int type); */ int tok_get_error(tokenizer_t *tok); +/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid */ +enum token_type redirection_type_for_string(const wcstring &str); + enum move_word_style_t { move_word_style_punctuation, //stop at punctuation