Initial work towards rewriting detect_errors to use new parser.

Low-level tests currently pass; high level tests fail.
2024-12-26 12:53:13 +00:00 · 2013-12-11 18:34:28 -08:00 · 2013-12-11 18:34:28 -08:00 · d5d9b9284a
commit d5d9b9284a
parent 383b6aabf5
7 changed files with 316 additions and 28 deletions
--- a/highlight.cpp
+++ b/highlight.cpp
@ -2089,8 +2089,9 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
            case symbol_decorated_statement:
            case symbol_if_statement:
            {
                // Color the 'end'
                this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND);
                // Color the 'end'
                this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND);
            }
            break;
--- a/parse_constants.h
+++ b/parse_constants.h
@ -47,6 +47,8 @@ enum parse_token_type_t
    symbol_redirection,
    symbol_optional_background,
    symbol_end_command,
    // Terminal types
    parse_token_type_string,
--- a/parse_productions.cpp
+++ b/parse_productions.cpp
@ -187,7 +187,7 @@ RESOLVE(statement)
 PRODUCTIONS(if_statement) =
 {
-    {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list}
+    {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list}
 };
 RESOLVE_ONLY(if_statement)
@ -231,7 +231,7 @@ RESOLVE(else_continuation)
 PRODUCTIONS(switch_statement) =
 {
-    { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)}
+    { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command}
 };
 RESOLVE_ONLY(switch_statement)
@ -272,7 +272,7 @@ RESOLVE(argument_list)
 PRODUCTIONS(block_statement) =
 {
-    {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list}
+    {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list}
 };
 RESOLVE_ONLY(block_statement)
@ -287,8 +287,6 @@ RESOLVE(block_header)
 {
    switch (token1.keyword)
    {
        case parse_keyword_else:
            return NO_PRODUCTION;
        case parse_keyword_for:
            return 0;
        case parse_keyword_while:
@ -443,6 +441,12 @@ RESOLVE(optional_background)
    }
 }
 PRODUCTIONS(end_command) =
 {
    {KEYWORD(parse_keyword_end)}
 };
 RESOLVE_ONLY(end_command)
 #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
 const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text)
 {
@ -483,6 +487,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
            TEST(argument)
            TEST(redirection)
            TEST(optional_background)
            TEST(end_command)
        case parse_token_type_string:
        case parse_token_type_pipe:
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -110,6 +110,10 @@ wcstring token_type_description(parse_token_type_t type)
            return L"symbol_argument";
        case symbol_redirection:
            return L"symbol_redirection";
        case symbol_optional_background:
            return L"optional_background";
        case symbol_end_command:
            return L"symbol_end_command";
        case parse_token_type_string:
@ -124,8 +128,7 @@ wcstring token_type_description(parse_token_type_t type)
            return L"token_end";
        case parse_token_type_terminate:
            return L"token_terminate";
-        case symbol_optional_background:
+
            return L"optional_background";
        case parse_special_type_parse_error:
            return L"parse_error";
@ -1057,21 +1060,37 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, pars
    return result;
 }
-static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result)
+const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const
 {
-    if (parent.type == type) result->push_back(&parent);
+    const parse_node_t *ancestor = &node;
-    for (size_t i=0; i < parent.child_count; i++)
+    while ((ancestor = this->get_parent(*ancestor)))
    {
-        const parse_node_t *child = tree.get_child(parent, i);
+        if (ancestor->type == desired_type)
-        assert(child != NULL);
+        {
-        find_nodes_recursive(tree, *child, type, result);
+            break;
        }
    }
    return ancestor;
 }
 static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count)
 {
    if (result->size() < max_count)
    {
        if (parent.type == type) result->push_back(&parent);
        for (size_t i=0; i < parent.child_count; i++)
        {
            const parse_node_t *child = tree.get_child(parent, i);
            assert(child != NULL);
            find_nodes_recursive(tree, *child, type, result, max_count);
        }
    }
 }
-parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const
+parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const
 {
    parse_node_list_t result;
-    find_nodes_recursive(*this, parent, type, &result);
+    find_nodes_recursive(*this, parent, type, &result, max_count);
    return result;
 }
@ -1188,6 +1207,37 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co
    return result;
 }
 bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const
 {
    // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false
    bool result = false;
    const parse_node_t *ancestor = &node;
    if (ancestor)
        ancestor = this->get_parent(*ancestor, symbol_decorated_statement);
    if (ancestor)
        ancestor = this->get_parent(*ancestor, symbol_statement);
    if (ancestor)
        ancestor = this->get_parent(*ancestor);
    if (ancestor)
    {
        if (ancestor->type == symbol_job_continuation)
        {
            // Second or more in a pipeline
            result = true;
        }
        else if (ancestor->type == symbol_job && include_first)
        {
            // Check to see if we have a job continuation that's not empty
            const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation);
            result = (continuation != NULL && continuation->child_count > 0);
        }
    }
    return result;
 }
 enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const
 {
    assert(redirection_node.type == symbol_redirection);
@ -1205,3 +1255,17 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
    }
    return result;
 }
 const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node)
 {
    const parse_node_t *result = NULL;
    if (node.type == symbol_block_statement)
    {
        const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header);
        if (block_header != NULL)
        {
            result = this->get_child(*block_header, 0);
        }
    }
    return result;
 }
--- a/parse_tree.h
+++ b/parse_tree.h
@ -159,15 +159,19 @@ class parse_node_tree_t : public std::vector<parse_node_t>
 {
 public:
-    /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
+    /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type.
-    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;    
+     */
    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
    /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
    const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
    /* Returns the first ancestor of the given type, or NULL. */
    const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const;
-    /* Find all the nodes of a given type underneath a given node */
+    /* Find all the nodes of a given type underneath a given node, up to max_count of them */
    typedef std::vector<const parse_node_t *> parse_node_list_t;
-    parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;
+    parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const;
    /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */
    const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const;
@ -186,8 +190,14 @@ public:
    /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */
    bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const;
    /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */
    bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const;
    /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */
    enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const;
    /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
    const parse_node_t *header_node_for_block_statement(const parse_node_t &node);
 };
 /* Fish grammar:
@ -210,19 +220,19 @@ public:
 # A block is a conditional, loop, or begin/end
-    if_statement = if_clause else_clause <END> arguments_or_redirections_list
+    if_statement = if_clause else_clause end_command arguments_or_redirections_list
    if_clause = <IF> job STATEMENT_TERMINATOR job_list
    else_clause = <empty> |
                 <ELSE> else_continuation
    else_continuation = if_clause else_clause |
                        STATEMENT_TERMINATOR job_list
-    switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list <END>
+    switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list end_command
    case_item_list = <empty> |
                    case_item case_item_list
    case_item = CASE argument_list STATEMENT_TERMINATOR job_list
-    block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list
+    block_statement = block_header <TOK_END> job_list end_command arguments_or_redirections_list
    block_header = for_header | while_header | function_header | begin_header
    for_header = FOR var_name IN arguments_or_redirections_list
    while_header = WHILE statement
@ -252,6 +262,8 @@ public:
    terminator = <TOK_END> | <TOK_BACKGROUND>
    optional_background = <empty> | <TOK_BACKGROUND>
    end_command = END
 */
--- a/parser.cpp
+++ b/parser.cpp
@ -44,6 +44,7 @@ The fish parser. Contains functions for parsing and evaluating code.
 #include "path.h"
 #include "signal.h"
 #include "complete.h"
 #include "parse_tree.h"
 /**
   Maximum number of function calls, i.e. recursion depth.
@ -550,14 +551,16 @@ void parser_t::allow_function()
    forbidden_function.pop_back();
 }
-void parser_t::error(int ec, int p, const wchar_t *str, ...)
+void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
 {
    va_list va;
    CHECK(str,);
    error_code = ec;
-    err_pos = p;
+    
    assert(p <= INT_MAX);
    err_pos = static_cast<int>(p);
    va_start(va, str);
    err_buff = vformat_string(str, va);
@ -1148,7 +1151,7 @@ const wchar_t *parser_t::get_buffer() const
 }
-int parser_t::is_help(const wchar_t *s, int min_match) const
+int parser_t::is_help(const wchar_t *s, int min_match)
 {
    CHECK(s, 0);
@ -2889,6 +2892,21 @@ int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *pre
    return err;
 }
 // Check if the first argument under the given node is --help
 static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src)
 {
    bool is_help = false;
    const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1);
    if (! arg_nodes.empty())
    {
        // Check the first argument only
        const parse_node_t &arg = *arg_nodes.at(0);
        const wcstring first_arg_src = arg.get_source(src);
        is_help = parser_t::is_help(first_arg_src.c_str(), 3);
    }
    return is_help;
 }
 // helper type used in parser::test below
 struct block_info_t
 {
@ -2897,6 +2915,191 @@ struct block_info_t
 };
 parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
 {
    ASSERT_IS_MAIN_THREAD();
    if (! buff)
        return PARSER_TEST_ERROR;
    const wcstring buff_src = buff;
    parse_node_tree_t node_tree;
    parse_error_list_t parse_errors;
    // Whether we encountered a parse error
    bool errored = false;
    long error_line = -1;
    // Whether we encountered an unclosed block
    // We detect this via an 'end_command' block without source
    bool has_unclosed_block = false;
    bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors);
    if (! parsed)
    {
        // report errors
        if (out)
        {
            for (size_t i=0; i < parse_errors.size(); i++)
            {
                const parse_error_t &error = parse_errors.at(i);
                this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str());
            }
        }
        errored = true;
        error_line = __LINE__;
    }
    // Expand all commands
    // Verify 'or' and 'and' not used inside pipelines
    // Verify pipes via parser_is_pipe_forbidden
    // Verify return only within a function
    if (! errored)
    {
        const size_t node_tree_size = node_tree.size();
        for (size_t i=0; i < node_tree_size; i++)
        {
            const parse_node_t &node = node_tree.at(i);
            if (node.type == symbol_end_command && ! node.has_source())
            {
                // an 'end' without source is an unclosed block
                has_unclosed_block = true;
            }
            else if (node.type == symbol_plain_statement)
            {
                wcstring command;
                if (node_tree.command_for_plain_statement(node, buff_src, &command))
                {
                    // Check that we can expand the command
                    if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
                    {
                        error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str());
                        errored = true;
                                error_line = __LINE__;
                    }
                    // Check that pipes are sound
                    bool is_boolean_command = contains(command, L"or", L"and");
                    bool is_pipe_forbidden = parser_is_pipe_forbidden(command);
                    if (! errored && (is_boolean_command || is_pipe_forbidden))
                    {
                        // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all
                        if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden))
                        {
                            error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG);
                            errored = true;
                                    error_line = __LINE__;
                        }
                    }
                    // Check that we don't return from outside a function
                    // But we allow it if it's 'return --help'
                    if (! errored && command == L"return")
                    {
                        const parse_node_t *ancestor = &node;
                        bool found_function = false;
                        while (ancestor != NULL)
                        {
                            const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor);
                            if (possible_function_header != NULL && possible_function_header->type == symbol_function_header)
                            {
                                found_function = true;
                                break;
                            }
                            ancestor = node_tree.get_parent(*ancestor);
                        }
                        if (! found_function && ! first_argument_is_help(node_tree, node, buff_src))
                        {
                            error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
                            errored = true;
                                    error_line = __LINE__;
                        }
                    }
                    // Check that we don't return from outside a function
                    if (! errored && (command == L"break" || command == L"continue"))
                    {
                        // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function.
                        // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header.
                        bool found_loop = false, end_search = false;
                        const parse_node_t *ancestor = &node;
                        while (ancestor != NULL && ! end_search)
                        {
                            bool end_search = false;
                            const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor);
                            if (loop_or_function_header != NULL)
                            {
                                switch (loop_or_function_header->type)
                                {
                                    case symbol_while_header:
                                    case symbol_for_header:
                                        // this is a loop header, so we can break or continue
                                        found_loop = true;
                                        end_search = true;
                                    case symbol_function_header:
                                        // this is a function header, so we cannot break or continue. We stop our search here.
                                        found_loop = false;
                                        end_search = true;
                                        break;
                                    default:
                                        // most likely begin / end style block, which makes no difference
                                        break;
                                }
                            }
                            ancestor = node_tree.get_parent(*ancestor);
                        }
                        const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header);
                        if (function_node == NULL)
                        {
                            // Ok, this looks bad: return not in a function!
                            // But we allow it if it's 'return --help'
                            // Get the arguments
                            bool is_help = false;
                            const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument);
                            if (! arg_nodes.empty())
                            {
                                // Check the first argument only
                                const parse_node_t &arg = *arg_nodes.at(0);
                                const wcstring first_arg_src = arg.get_source(buff_src);
                                is_help = parser_t::is_help(first_arg_src.c_str(), 3);
                            }
                            // If it's not help, then it's an invalid return
                            if (! is_help)
                            {
                                error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
                                errored = true;
                                        error_line = __LINE__;
                            }
                        }
                    }
                }
            }
        }
    }
    parser_test_error_bits_t res = 0;
    if (errored)
        res |= PARSER_TEST_ERROR;
    if (has_unclosed_block)
        res |= PARSER_TEST_INCOMPLETE;
    error_code=0;
    return res;
 }
 parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
 {
    ASSERT_IS_MAIN_THREAD();
--- a/parser.h
+++ b/parser.h
@ -420,7 +420,7 @@ public:
       \param p The character offset at which the error occured
       \param str The printf-style error message filter
    */
-    void error(int ec, int p, const wchar_t *str, ...);
+    void error(int ec, size_t p, const wchar_t *str, ...);
    /**
       Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'.
@ -488,6 +488,7 @@ public:
       \param prefix the prefix string to prepend to each error message written to the \c out buffer
    */
    parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
    parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
    /**
       Test if the specified string can be parsed as an argument list,
@ -524,7 +525,7 @@ public:
       \param s the string to test
       \param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed.
    */
-    int is_help(const wchar_t *s, int min_match) const;
+    static int is_help(const wchar_t *s, int min_match);
    /**
       Returns the file currently evaluated by the parser. This can be