From fb882f0b695d88a3211cdae97f18d955427b0b18 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 1 Jan 2014 15:29:56 -0800 Subject: [PATCH] Support for stack overflow and infinite recursion detection in new parser --- builtin.cpp | 2 +- complete.cpp | 2 +- fish_tests.cpp | 8 ++++ parse_constants.h | 16 ++++--- parse_execution.cpp | 110 ++++++++++++++++++++++++++++++++++++++++---- parse_execution.h | 1 + parse_tree.cpp | 62 ++++++++++++++++++++++++- parse_tree.h | 10 +++- parser.cpp | 36 +++++---------- proc.cpp | 3 +- 10 files changed, 204 insertions(+), 46 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 215edc596..0bff83d38 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -690,7 +690,7 @@ static int builtin_bind(parser_t &parser, wchar_t **argv) default: { res = STATUS_BUILTIN_ERROR; - append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d"), argv[0], argc-woptind); + append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d\n"), argv[0], argc-woptind); break; } } diff --git a/complete.cpp b/complete.cpp index 6541ed502..c9db0f701 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1230,7 +1230,7 @@ void completer_t::complete_from_args(const wcstring &str, std::vector possible_comp; bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST); - parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false); + parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */); /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */ if (! is_autosuggest) diff --git a/fish_tests.cpp b/fish_tests.cpp index 83f8976d0..29f4e830b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -659,6 +659,14 @@ static void test_parser() { err(L"Invalid block mode when evaluating undetected"); } + + /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ +#if 1 + /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */ + say(L"Testing recursion detection"); + parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP); + parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP); +#endif } static void test_indents() diff --git a/parse_constants.h b/parse_constants.h index e923bc10e..12fcb114c 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -128,6 +128,8 @@ enum { typedef unsigned int parser_test_error_bits_t; +/** Maximum number of function calls. */ +#define FISH_MAX_STACK_DEPTH 128 /** Error message for tokenizer error. The tokenizer message is @@ -140,15 +142,15 @@ typedef unsigned int parser_test_error_bits_t; */ #define COND_ERR_MSG _( L"An additional command is required" ) -/** - Error message on a function that calls itself immediately -*/ +/** Error message on a function that calls itself immediately */ #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") -/** - Error message on reaching maximum recursion depth -*/ -#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") +/** Error message on a function that calls itself immediately */ +#define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.") + + +/** Error message on reaching maximum call stack depth */ +#define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") /** Error message used when the end of a block can't be located diff --git a/parse_execution.cpp b/parse_execution.cpp index 338a513da..bc478c05d 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -48,6 +48,81 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co return offset; } +const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const +{ + assert(job_list.type == symbol_job_list); + /* + This is a bit fragile. It is a test to see if we are + inside of function call, but not inside a block in that + function call. If, in the future, the rules for what + block scopes are pushed on function invocation changes, + then this check will break. + */ + const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1); + bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL); + if (! is_within_function_call) + { + return NULL; + } + + /* Check to see which function call is forbidden */ + if (parser->forbidden_function.empty()) + { + return NULL; + } + const wcstring &forbidden_function_name = parser->forbidden_function.back(); + + /* Get the first job in the job list. */ + const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL); + if (first_job == NULL) + { + return NULL; + } + + /* Here's the statement node we find that's infinite recursive */ + const parse_node_t *infinite_recursive_statement = NULL; + + /* Get the list of statements */ + const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job); + + /* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */ + for (size_t i=0; i < statements.size(); i++) + { + /* We only care about decorated statements, not while statements, etc. */ + const parse_node_t &statement = *statements.at(i); + if (statement.type != symbol_decorated_statement) + { + continue; + } + + const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement); + if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none) + { + /* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */ + continue; + } + + /* Ok, this is an undecorated plain statement. Get and expand its command */ + wcstring cmd; + tree.command_for_plain_statement(plain_statement, src, &cmd); + expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + + if (cmd == forbidden_function_name) + { + /* This is it */ + infinite_recursive_statement = &statement; + if (out_func_name != NULL) + { + *out_func_name = forbidden_function_name; + } + break; + } + } + + assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement); + return infinite_recursive_statement; +} + enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const { assert(plain_statement.type == symbol_plain_statement); @@ -528,12 +603,10 @@ parse_execution_result_t parse_execution_context_t::report_error(const parse_nod error.text = vformat_string(fmt, va); va_end(va); - /* Output the error */ - const wcstring desc = error.describe(this->src); - if (! desc.empty()) - { - fprintf(stderr, "%ls\n", desc.c_str()); - } + /* Get a backtrace */ + wcstring backtrace; + const parse_error_list_t error_list = parse_error_list_t(1, error); + parser->get_backtrace(src, error_list, &backtrace); return parse_execution_errored; } @@ -669,7 +742,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); assert(got_cmd); - /* Expand it as a command. Return NULL on failure. */ + /* Expand it as a command. Return an error on failure. */ bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded) { @@ -680,6 +753,13 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t /* Determine the process type */ enum process_type_t process_type = process_type_for_command(statement, cmd); + /* Check for stack overflow */ + if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH) + { + this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); + return parse_execution_errored; + } + wcstring actual_cmd; if (process_type == EXTERNAL) { @@ -1296,10 +1376,24 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off switch (node.type) { case symbol_job_list: + { /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ assert(offset == 0); - status = this->run_job_list(node, associated_block); + wcstring func_name; + const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name); + if (infinite_recursive_node != NULL) + { + /* We have an infinite recursion */ + this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); + status = parse_execution_errored; + } + else + { + /* No infinite recursion */ + status = this->run_job_list(node, associated_block); + } break; + } case symbol_block_statement: status = this->run_block_statement(node); diff --git a/parse_execution.h b/parse_execution.h index feeb7404a..0d924b8ec 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -70,6 +70,7 @@ class parse_execution_context_t wcstring get_source(const parse_node_t &node) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; + const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const; enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; diff --git a/parse_tree.cpp b/parse_tree.cpp index 222af8af9..81973c61e 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1405,7 +1405,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi return result; } -const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) +const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const { const parse_node_t *result = NULL; if (node.type == symbol_block_statement) @@ -1418,3 +1418,63 @@ const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const par } return result; } + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const +{ + assert(job.type == symbol_job); + parse_node_list_t result; + + /* Initial statement (non-specific) */ + result.push_back(get_child(job, 0, symbol_statement)); + + /* Our cursor variable. Walk over the list of continuations. */ + const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation); + while (continuation != NULL && continuation->child_count > 0) + { + result.push_back(get_child(*continuation, 1, symbol_statement)); + continuation = get_child(*continuation, 2, symbol_job_continuation); + } + + /* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */ + for (size_t i=0; i < result.size(); i++) + { + const parse_node_t *statement = result.at(i); + assert(statement->type == symbol_statement); + result.at(i) = this->get_child(*statement, 0); + } + + return result; +} + +const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const +{ + assert(top_job_list.type == symbol_job_list); + + /* Our cursor variable */ + const parse_node_t *job_list = &top_job_list; + + /* Skip over a run of empty jobs */ + assert(job_list->type == symbol_job_list); + while (job_list->production_idx == 2) + { + job_list = this->get_child(*job_list, 1, symbol_job_list); + } + + /* Should now be at production 0 or 1 */ + assert(job_list->type == symbol_job_list); + assert(job_list->production_idx == 0 || job_list->production_idx == 1); + + /* Pull out the job */ + const parse_node_t *job = NULL; + const parse_node_t *list_tail = NULL; + if (job_list->production_idx == 1) + { + job = this->get_child(*job_list, 0, symbol_job); + list_tail = this->get_child(*job_list, 1, symbol_job_list); + } + + /* Return them */ + if (out_list_tail != NULL) + *out_list_tail = list_tail; + return job; +} diff --git a/parse_tree.h b/parse_tree.h index d8874b119..acfaa7396 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -209,7 +209,13 @@ public: enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ - const parse_node_t *header_node_for_block_statement(const parse_node_t &node); + const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; + + /* Given a job list, returns the next job (or NULL), and the tail of the job list. */ + const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const; + + /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ + parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; }; /* Fish grammar: @@ -252,7 +258,7 @@ public: while_header = WHILE job begin_header = BEGIN -# Functions take arguments, and require at least one (the name) +# Functions take arguments, and require at least one (the name). No redirections allowed. function_header = FUNCTION argument argument_list # A boolean statement is AND or OR or NOT diff --git a/parser.cpp b/parser.cpp index 3cd1b98c3..6f5008174 100644 --- a/parser.cpp +++ b/parser.cpp @@ -47,11 +47,6 @@ The fish parser. Contains functions for parsing and evaluating code. #include "parse_tree.h" #include "parse_execution.h" -/** - Maximum number of function calls, i.e. recursion depth. -*/ -#define MAX_RECURSION_DEPTH 128 - /** Error message for unknown builtin */ @@ -78,11 +73,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") -/** - Error message on reaching maximum recursion depth -*/ -#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") - /** Error message used when the end of a block can't be located */ @@ -584,12 +574,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) va_start(va, str); err_buff = vformat_string(str, va); va_end(va); - - if (parser_use_ast()) - { - fprintf(stderr, "parser error: %ls\n", err_buff.c_str()); - err_buff.clear(); - } } /** @@ -746,7 +730,6 @@ void parser_t::print_errors_stderr() void parser_t::eval_args(const wchar_t *line, std::vector &args) { - expand_flags_t eflags = 0; if (! show_errors) eflags |= EXPAND_NO_DESCRIPTIONS; @@ -757,7 +740,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector &args) if (! line) return; - // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. + // PCA we need to suppress calling proc_push_interactive off of the main thread. if (this->parser_type == PARSER_TYPE_GENERAL) proc_push_interactive(0); @@ -988,10 +971,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const const wchar_t *parser_t::current_filename() const { - /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */ ASSERT_IS_MAIN_THREAD(); - assert(this == &principal_parser()); - for (size_t i=0; i < this->block_count(); i++) { @@ -1002,7 +982,13 @@ const wchar_t *parser_t::current_filename() const return function_get_definition_file(fb->name); } } - return reader_current_filename(); + + /* We query a global array for the current file name, but only do that if we are the principal parser */ + if (this == &principal_parser()) + { + return reader_current_filename(); + } + return NULL; } /** @@ -1221,7 +1207,7 @@ void parser_t::job_promote(job_t *job) { signal_block(); - job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); + job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); assert(loc != my_job_list.end()); /* Move the job to the beginning */ @@ -1940,9 +1926,9 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) /* Check if we have reached the maximum recursion depth */ - if (forbidden_function.size() > MAX_RECURSION_DEPTH) + if (forbidden_function.size() > FISH_MAX_STACK_DEPTH) { - error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG); + error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); } else { diff --git a/proc.cpp b/proc.cpp index 65e906ac8..4a25b4517 100644 --- a/proc.cpp +++ b/proc.cpp @@ -136,7 +136,8 @@ static bool proc_had_barrier = false; int get_is_interactive(void) { ASSERT_IS_MAIN_THREAD(); - return is_interactive; + // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false. + return is_interactive > 0; } bool get_proc_had_barrier()