Support for stack overflow and infinite recursion detection in new

parser
This commit is contained in:
ridiculousfish 2014-01-01 15:29:56 -08:00
parent 27cba56761
commit fb882f0b69
10 changed files with 204 additions and 46 deletions

View file

@ -690,7 +690,7 @@ static int builtin_bind(parser_t &parser, wchar_t **argv)
default: default:
{ {
res = STATUS_BUILTIN_ERROR; res = STATUS_BUILTIN_ERROR;
append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d"), argv[0], argc-woptind); append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d\n"), argv[0], argc-woptind);
break; break;
} }
} }

View file

@ -1230,7 +1230,7 @@ void completer_t::complete_from_args(const wcstring &str,
std::vector<completion_t> possible_comp; std::vector<completion_t> possible_comp;
bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST); bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST);
parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false); parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */);
/* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */ /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */
if (! is_autosuggest) if (! is_autosuggest)

View file

@ -659,6 +659,14 @@ static void test_parser()
{ {
err(L"Invalid block mode when evaluating undetected"); err(L"Invalid block mode when evaluating undetected");
} }
/* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
#if 1
/* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
say(L"Testing recursion detection");
parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
#endif
} }
static void test_indents() static void test_indents()

View file

@ -128,6 +128,8 @@ enum {
typedef unsigned int parser_test_error_bits_t; typedef unsigned int parser_test_error_bits_t;
/** Maximum number of function calls. */
#define FISH_MAX_STACK_DEPTH 128
/** /**
Error message for tokenizer error. The tokenizer message is Error message for tokenizer error. The tokenizer message is
@ -140,15 +142,15 @@ typedef unsigned int parser_test_error_bits_t;
*/ */
#define COND_ERR_MSG _( L"An additional command is required" ) #define COND_ERR_MSG _( L"An additional command is required" )
/** /** Error message on a function that calls itself immediately */
Error message on a function that calls itself immediately
*/
#define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")
/** /** Error message on a function that calls itself immediately */
Error message on reaching maximum recursion depth #define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.")
*/
#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
/** Error message on reaching maximum call stack depth */
#define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?")
/** /**
Error message used when the end of a block can't be located Error message used when the end of a block can't be located

View file

@ -48,6 +48,81 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co
return offset; return offset;
} }
const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const
{
assert(job_list.type == symbol_job_list);
/*
This is a bit fragile. It is a test to see if we are
inside of function call, but not inside a block in that
function call. If, in the future, the rules for what
block scopes are pushed on function invocation changes,
then this check will break.
*/
const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1);
bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL);
if (! is_within_function_call)
{
return NULL;
}
/* Check to see which function call is forbidden */
if (parser->forbidden_function.empty())
{
return NULL;
}
const wcstring &forbidden_function_name = parser->forbidden_function.back();
/* Get the first job in the job list. */
const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL);
if (first_job == NULL)
{
return NULL;
}
/* Here's the statement node we find that's infinite recursive */
const parse_node_t *infinite_recursive_statement = NULL;
/* Get the list of statements */
const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job);
/* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */
for (size_t i=0; i < statements.size(); i++)
{
/* We only care about decorated statements, not while statements, etc. */
const parse_node_t &statement = *statements.at(i);
if (statement.type != symbol_decorated_statement)
{
continue;
}
const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement);
if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none)
{
/* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */
continue;
}
/* Ok, this is an undecorated plain statement. Get and expand its command */
wcstring cmd;
tree.command_for_plain_statement(plain_statement, src, &cmd);
expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
if (cmd == forbidden_function_name)
{
/* This is it */
infinite_recursive_statement = &statement;
if (out_func_name != NULL)
{
*out_func_name = forbidden_function_name;
}
break;
}
}
assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement);
return infinite_recursive_statement;
}
enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const
{ {
assert(plain_statement.type == symbol_plain_statement); assert(plain_statement.type == symbol_plain_statement);
@ -528,12 +603,10 @@ parse_execution_result_t parse_execution_context_t::report_error(const parse_nod
error.text = vformat_string(fmt, va); error.text = vformat_string(fmt, va);
va_end(va); va_end(va);
/* Output the error */ /* Get a backtrace */
const wcstring desc = error.describe(this->src); wcstring backtrace;
if (! desc.empty()) const parse_error_list_t error_list = parse_error_list_t(1, error);
{ parser->get_backtrace(src, error_list, &backtrace);
fprintf(stderr, "%ls\n", desc.c_str());
}
return parse_execution_errored; return parse_execution_errored;
} }
@ -669,7 +742,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd);
assert(got_cmd); assert(got_cmd);
/* Expand it as a command. Return NULL on failure. */ /* Expand it as a command. Return an error on failure. */
bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
if (! expanded) if (! expanded)
{ {
@ -680,6 +753,13 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
/* Determine the process type */ /* Determine the process type */
enum process_type_t process_type = process_type_for_command(statement, cmd); enum process_type_t process_type = process_type_for_command(statement, cmd);
/* Check for stack overflow */
if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH)
{
this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
return parse_execution_errored;
}
wcstring actual_cmd; wcstring actual_cmd;
if (process_type == EXTERNAL) if (process_type == EXTERNAL)
{ {
@ -1296,10 +1376,24 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off
switch (node.type) switch (node.type)
{ {
case symbol_job_list: case symbol_job_list:
{
/* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */
assert(offset == 0); assert(offset == 0);
wcstring func_name;
const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name);
if (infinite_recursive_node != NULL)
{
/* We have an infinite recursion */
this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str());
status = parse_execution_errored;
}
else
{
/* No infinite recursion */
status = this->run_job_list(node, associated_block); status = this->run_job_list(node, associated_block);
}
break; break;
}
case symbol_block_statement: case symbol_block_statement:
status = this->run_block_statement(node); status = this->run_block_statement(node);

View file

@ -70,6 +70,7 @@ class parse_execution_context_t
wcstring get_source(const parse_node_t &node) const; wcstring get_source(const parse_node_t &node) const;
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
node_offset_t get_offset(const parse_node_t &node) const; node_offset_t get_offset(const parse_node_t &node) const;
const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const;
enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const;

View file

@ -1405,7 +1405,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
return result; return result;
} }
const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const
{ {
const parse_node_t *result = NULL; const parse_node_t *result = NULL;
if (node.type == symbol_block_statement) if (node.type == symbol_block_statement)
@ -1418,3 +1418,63 @@ const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const par
} }
return result; return result;
} }
parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const
{
assert(job.type == symbol_job);
parse_node_list_t result;
/* Initial statement (non-specific) */
result.push_back(get_child(job, 0, symbol_statement));
/* Our cursor variable. Walk over the list of continuations. */
const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation);
while (continuation != NULL && continuation->child_count > 0)
{
result.push_back(get_child(*continuation, 1, symbol_statement));
continuation = get_child(*continuation, 2, symbol_job_continuation);
}
/* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */
for (size_t i=0; i < result.size(); i++)
{
const parse_node_t *statement = result.at(i);
assert(statement->type == symbol_statement);
result.at(i) = this->get_child(*statement, 0);
}
return result;
}
const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const
{
assert(top_job_list.type == symbol_job_list);
/* Our cursor variable */
const parse_node_t *job_list = &top_job_list;
/* Skip over a run of empty jobs */
assert(job_list->type == symbol_job_list);
while (job_list->production_idx == 2)
{
job_list = this->get_child(*job_list, 1, symbol_job_list);
}
/* Should now be at production 0 or 1 */
assert(job_list->type == symbol_job_list);
assert(job_list->production_idx == 0 || job_list->production_idx == 1);
/* Pull out the job */
const parse_node_t *job = NULL;
const parse_node_t *list_tail = NULL;
if (job_list->production_idx == 1)
{
job = this->get_child(*job_list, 0, symbol_job);
list_tail = this->get_child(*job_list, 1, symbol_job_list);
}
/* Return them */
if (out_list_tail != NULL)
*out_list_tail = list_tail;
return job;
}

View file

@ -209,7 +209,13 @@ public:
enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const;
/* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
const parse_node_t *header_node_for_block_statement(const parse_node_t &node); const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const;
/* Given a job list, returns the next job (or NULL), and the tail of the job list. */
const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const;
/* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
}; };
/* Fish grammar: /* Fish grammar:
@ -252,7 +258,7 @@ public:
while_header = WHILE job while_header = WHILE job
begin_header = BEGIN begin_header = BEGIN
# Functions take arguments, and require at least one (the name) # Functions take arguments, and require at least one (the name). No redirections allowed.
function_header = FUNCTION argument argument_list function_header = FUNCTION argument argument_list
# A boolean statement is AND or OR or NOT # A boolean statement is AND or OR or NOT

View file

@ -47,11 +47,6 @@ The fish parser. Contains functions for parsing and evaluating code.
#include "parse_tree.h" #include "parse_tree.h"
#include "parse_execution.h" #include "parse_execution.h"
/**
Maximum number of function calls, i.e. recursion depth.
*/
#define MAX_RECURSION_DEPTH 128
/** /**
Error message for unknown builtin Error message for unknown builtin
*/ */
@ -78,11 +73,6 @@ The fish parser. Contains functions for parsing and evaluating code.
*/ */
#define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")
/**
Error message on reaching maximum recursion depth
*/
#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
/** /**
Error message used when the end of a block can't be located Error message used when the end of a block can't be located
*/ */
@ -584,12 +574,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
va_start(va, str); va_start(va, str);
err_buff = vformat_string(str, va); err_buff = vformat_string(str, va);
va_end(va); va_end(va);
if (parser_use_ast())
{
fprintf(stderr, "parser error: %ls\n", err_buff.c_str());
err_buff.clear();
}
} }
/** /**
@ -746,7 +730,6 @@ void parser_t::print_errors_stderr()
void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args) void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
{ {
expand_flags_t eflags = 0; expand_flags_t eflags = 0;
if (! show_errors) if (! show_errors)
eflags |= EXPAND_NO_DESCRIPTIONS; eflags |= EXPAND_NO_DESCRIPTIONS;
@ -757,7 +740,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
if (! line) return; if (! line) return;
// PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. // PCA we need to suppress calling proc_push_interactive off of the main thread.
if (this->parser_type == PARSER_TYPE_GENERAL) if (this->parser_type == PARSER_TYPE_GENERAL)
proc_push_interactive(0); proc_push_interactive(0);
@ -988,10 +971,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const
const wchar_t *parser_t::current_filename() const const wchar_t *parser_t::current_filename() const
{ {
/* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
assert(this == &principal_parser());
for (size_t i=0; i < this->block_count(); i++) for (size_t i=0; i < this->block_count(); i++)
{ {
@ -1002,8 +982,14 @@ const wchar_t *parser_t::current_filename() const
return function_get_definition_file(fb->name); return function_get_definition_file(fb->name);
} }
} }
/* We query a global array for the current file name, but only do that if we are the principal parser */
if (this == &principal_parser())
{
return reader_current_filename(); return reader_current_filename();
} }
return NULL;
}
/** /**
Calculates the on-screen width of the specified substring of the Calculates the on-screen width of the specified substring of the
@ -1940,9 +1926,9 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok)
/* /*
Check if we have reached the maximum recursion depth Check if we have reached the maximum recursion depth
*/ */
if (forbidden_function.size() > MAX_RECURSION_DEPTH) if (forbidden_function.size() > FISH_MAX_STACK_DEPTH)
{ {
error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG); error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
} }
else else
{ {

View file

@ -136,7 +136,8 @@ static bool proc_had_barrier = false;
int get_is_interactive(void) int get_is_interactive(void)
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
return is_interactive; // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false.
return is_interactive > 0;
} }
bool get_proc_had_barrier() bool get_proc_had_barrier()