Support for stack overflow and infinite recursion detection in new

parser
2024-12-27 05:13:10 +00:00 · 2014-01-01 15:29:56 -08:00 · 2014-01-01 15:29:56 -08:00 · fb882f0b69
commit fb882f0b69
parent 27cba56761
10 changed files with 204 additions and 46 deletions
--- a/builtin.cpp
+++ b/builtin.cpp
@ -690,7 +690,7 @@ static int builtin_bind(parser_t &parser, wchar_t **argv)
                default:
                {
                    res = STATUS_BUILTIN_ERROR;
-                    append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d"), argv[0], argc-woptind);
+                    append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d\n"), argv[0], argc-woptind);
                    break;
                }
            }
--- a/complete.cpp
+++ b/complete.cpp
@ -1230,7 +1230,7 @@ void completer_t::complete_from_args(const wcstring &str,
    std::vector<completion_t> possible_comp;
    bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST);
-    parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false);
+    parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */);
    /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */
    if (! is_autosuggest)
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -659,6 +659,14 @@ static void test_parser()
    {
        err(L"Invalid block mode when evaluating undetected");
    }
    /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
 #if 1
    /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
    say(L"Testing recursion detection");
    parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
    parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
 #endif
 }
 static void test_indents()
--- a/parse_constants.h
+++ b/parse_constants.h
@ -128,6 +128,8 @@ enum {
 typedef unsigned int parser_test_error_bits_t;
 /** Maximum number of function calls. */
 #define FISH_MAX_STACK_DEPTH 128
 /**
   Error message for tokenizer error. The tokenizer message is
@ -140,15 +142,15 @@ typedef unsigned int parser_test_error_bits_t;
 */
 #define COND_ERR_MSG _( L"An additional command is required" )
-/**
+/** Error message on a function that calls itself immediately */
   Error message on a function that calls itself immediately
 */
 #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")
-/**
+/** Error message on a function that calls itself immediately */
-   Error message on reaching maximum recursion depth
+#define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.")
-*/
+
-#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
+
 /** Error message on reaching maximum call stack depth */
 #define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?")
 /**
   Error message used when the end of a block can't be located
--- a/parse_execution.cpp
+++ b/parse_execution.cpp
@ -48,6 +48,81 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co
    return offset;
 }
 const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const
 {
    assert(job_list.type == symbol_job_list);
    /*
      This is a bit fragile. It is a test to see if we are
      inside of function call, but not inside a block in that
      function call. If, in the future, the rules for what
      block scopes are pushed on function invocation changes,
      then this check will break.
    */
    const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1);
    bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL);
    if (! is_within_function_call)
    {
        return NULL;
    }
    /* Check to see which function call is forbidden */
    if (parser->forbidden_function.empty())
    {
        return NULL;
    }
    const wcstring &forbidden_function_name = parser->forbidden_function.back();
    /* Get the first job in the job list. */
    const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL);
    if (first_job == NULL)
    {
        return NULL;
    }
    /* Here's the statement node we find that's infinite recursive */
    const parse_node_t *infinite_recursive_statement = NULL;
    /* Get the list of statements */
    const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job);
    /* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */
    for (size_t i=0; i < statements.size(); i++)
    {
        /* We only care about decorated statements, not while statements, etc. */
        const parse_node_t &statement = *statements.at(i);
        if (statement.type != symbol_decorated_statement)
        {
            continue;
        }
        const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement);
        if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none)
        {
            /* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */
            continue;
        }
        /* Ok, this is an undecorated plain statement. Get and expand its command */
        wcstring cmd;
        tree.command_for_plain_statement(plain_statement, src, &cmd);
        expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
        if (cmd == forbidden_function_name)
        {
            /* This is it */
            infinite_recursive_statement = &statement;
            if (out_func_name != NULL)
            {
                *out_func_name = forbidden_function_name;
            }
            break;
        }
    }
    assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement);
    return infinite_recursive_statement;
 }
 enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const
 {
    assert(plain_statement.type == symbol_plain_statement);
@ -528,12 +603,10 @@ parse_execution_result_t parse_execution_context_t::report_error(const parse_nod
    error.text = vformat_string(fmt, va);
    va_end(va);
-    /* Output the error */
+    /* Get a backtrace */
-    const wcstring desc = error.describe(this->src);
+    wcstring backtrace;
-    if (! desc.empty())
+    const parse_error_list_t error_list = parse_error_list_t(1, error);
-    {
+    parser->get_backtrace(src, error_list, &backtrace);
        fprintf(stderr, "%ls\n", desc.c_str());
    }
    return parse_execution_errored;
 }
@ -669,7 +742,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
    bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd);
    assert(got_cmd);
-    /* Expand it as a command. Return NULL on failure. */
+    /* Expand it as a command. Return an error on failure. */
    bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
    if (! expanded)
    {
@ -680,6 +753,13 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
    /* Determine the process type */
    enum process_type_t process_type = process_type_for_command(statement, cmd);
    /* Check for stack overflow */
    if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH)
    {
        this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
        return parse_execution_errored;
    }
    wcstring actual_cmd;
    if (process_type == EXTERNAL)
    {
@ -1296,10 +1376,24 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off
    switch (node.type)
    {
        case symbol_job_list:
        {
            /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */
            assert(offset == 0);
            wcstring func_name;
            const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name);
            if (infinite_recursive_node != NULL)
            {
                /* We have an infinite recursion */
                this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str());
                status = parse_execution_errored;
            }
            else
            {
                /* No infinite recursion */
                status = this->run_job_list(node, associated_block);
            }
            break;
        }
        case symbol_block_statement:
            status = this->run_block_statement(node);
--- a/parse_execution.h
+++ b/parse_execution.h
@ -70,6 +70,7 @@ class parse_execution_context_t
    wcstring get_source(const parse_node_t &node) const;
    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
    node_offset_t get_offset(const parse_node_t &node) const;
    const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const;
    enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const;
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -1405,7 +1405,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
    return result;
 }
-const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node)
+const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const
 {
    const parse_node_t *result = NULL;
    if (node.type == symbol_block_statement)
@ -1418,3 +1418,63 @@ const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const par
    }
    return result;
 }
 parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const
 {
    assert(job.type == symbol_job);
    parse_node_list_t result;
    /* Initial statement (non-specific) */
    result.push_back(get_child(job, 0, symbol_statement));
    /* Our cursor variable. Walk over the list of continuations. */
    const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation);
    while (continuation != NULL && continuation->child_count > 0)
    {
        result.push_back(get_child(*continuation, 1, symbol_statement));
        continuation = get_child(*continuation, 2, symbol_job_continuation);
    }
    /* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */
    for (size_t i=0; i < result.size(); i++)
    {
        const parse_node_t *statement = result.at(i);
        assert(statement->type == symbol_statement);
        result.at(i) = this->get_child(*statement, 0);
    }
    return result;
 }
 const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const
 {
    assert(top_job_list.type == symbol_job_list);
    /* Our cursor variable */
    const parse_node_t *job_list = &top_job_list;
    /* Skip over a run of empty jobs */
    assert(job_list->type == symbol_job_list);
    while (job_list->production_idx == 2)
    {
        job_list = this->get_child(*job_list, 1, symbol_job_list);
    }
    /* Should now be at production 0 or 1 */
    assert(job_list->type == symbol_job_list);
    assert(job_list->production_idx == 0 || job_list->production_idx == 1);
    /* Pull out the job */
    const parse_node_t *job = NULL;
    const parse_node_t *list_tail = NULL;
    if (job_list->production_idx == 1)
    {
        job = this->get_child(*job_list, 0, symbol_job);
        list_tail = this->get_child(*job_list, 1, symbol_job_list);
    }
    /* Return them */
    if (out_list_tail != NULL)
        *out_list_tail = list_tail;
    return job;
 }
--- a/parse_tree.h
+++ b/parse_tree.h
@ -209,7 +209,13 @@ public:
    enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const;
    /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
-    const parse_node_t *header_node_for_block_statement(const parse_node_t &node);
+    const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const;
    /* Given a job list, returns the next job (or NULL), and the tail of the job list. */
    const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const;
    /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
    parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
 };
 /* Fish grammar:
@ -252,7 +258,7 @@ public:
    while_header = WHILE job
    begin_header = BEGIN
-# Functions take arguments, and require at least one (the name)    
+# Functions take arguments, and require at least one (the name). No redirections allowed.
    function_header = FUNCTION argument argument_list
 # A boolean statement is AND or OR or NOT
--- a/parser.cpp
+++ b/parser.cpp
@ -47,11 +47,6 @@ The fish parser. Contains functions for parsing and evaluating code.
 #include "parse_tree.h"
 #include "parse_execution.h"
 /**
   Maximum number of function calls, i.e. recursion depth.
 */
 #define MAX_RECURSION_DEPTH 128
 /**
   Error message for unknown builtin
 */
@ -78,11 +73,6 @@ The fish parser. Contains functions for parsing and evaluating code.
 */
 #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")
 /**
   Error message on reaching maximum recursion depth
 */
 #define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
 /**
   Error message used when the end of a block can't be located
 */
@ -584,12 +574,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
    va_start(va, str);
    err_buff = vformat_string(str, va);
    va_end(va);
    if (parser_use_ast())
    {
        fprintf(stderr, "parser error: %ls\n", err_buff.c_str());
        err_buff.clear();
    }
 }
 /**
@ -746,7 +730,6 @@ void parser_t::print_errors_stderr()
 void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
 {
    expand_flags_t eflags = 0;
    if (! show_errors)
        eflags |= EXPAND_NO_DESCRIPTIONS;
@ -757,7 +740,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
    if (! line) return;
-    // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does.
+    // PCA we need to suppress calling proc_push_interactive off of the main thread.
    if (this->parser_type == PARSER_TYPE_GENERAL)
        proc_push_interactive(0);
@ -988,10 +971,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const
 const wchar_t *parser_t::current_filename() const
 {
    /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */
    ASSERT_IS_MAIN_THREAD();
    assert(this == &principal_parser());
    for (size_t i=0; i < this->block_count(); i++)
    {
@ -1002,8 +982,14 @@ const wchar_t *parser_t::current_filename() const
            return function_get_definition_file(fb->name);
        }
    }
    /* We query a global array for the current file name, but only do that if we are the principal parser */
    if (this == &principal_parser())
    {
        return reader_current_filename();
    }
    return NULL;
 }
 /**
   Calculates the on-screen width of the specified substring of the
@ -1940,9 +1926,9 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok)
                /*
                  Check if we have reached the maximum recursion depth
                */
-                if (forbidden_function.size() > MAX_RECURSION_DEPTH)
+                if (forbidden_function.size() > FISH_MAX_STACK_DEPTH)
                {
-                    error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG);
+                    error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
                }
                else
                {
--- a/proc.cpp
+++ b/proc.cpp
@ -136,7 +136,8 @@ static bool proc_had_barrier = false;
 int get_is_interactive(void)
 {
    ASSERT_IS_MAIN_THREAD();
-    return is_interactive;
+    // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false.
    return is_interactive > 0;
 }
 bool get_proc_had_barrier()