Initial work towards rewriting detect_errors to use new parser.

Low-level tests currently pass; high level tests fail.
This commit is contained in:
ridiculousfish 2013-12-11 18:34:28 -08:00
parent 383b6aabf5
commit d5d9b9284a
7 changed files with 316 additions and 28 deletions

View file

@ -2089,8 +2089,9 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
case symbol_decorated_statement: case symbol_decorated_statement:
case symbol_if_statement: case symbol_if_statement:
{ {
// Color the 'end'
this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND);
// Color the 'end'
this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND);
} }
break; break;

View file

@ -48,6 +48,8 @@ enum parse_token_type_t
symbol_optional_background, symbol_optional_background,
symbol_end_command,
// Terminal types // Terminal types
parse_token_type_string, parse_token_type_string,
parse_token_type_pipe, parse_token_type_pipe,

View file

@ -187,7 +187,7 @@ RESOLVE(statement)
PRODUCTIONS(if_statement) = PRODUCTIONS(if_statement) =
{ {
{symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list}
}; };
RESOLVE_ONLY(if_statement) RESOLVE_ONLY(if_statement)
@ -231,7 +231,7 @@ RESOLVE(else_continuation)
PRODUCTIONS(switch_statement) = PRODUCTIONS(switch_statement) =
{ {
{ KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command}
}; };
RESOLVE_ONLY(switch_statement) RESOLVE_ONLY(switch_statement)
@ -272,7 +272,7 @@ RESOLVE(argument_list)
PRODUCTIONS(block_statement) = PRODUCTIONS(block_statement) =
{ {
{symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list}
}; };
RESOLVE_ONLY(block_statement) RESOLVE_ONLY(block_statement)
@ -287,8 +287,6 @@ RESOLVE(block_header)
{ {
switch (token1.keyword) switch (token1.keyword)
{ {
case parse_keyword_else:
return NO_PRODUCTION;
case parse_keyword_for: case parse_keyword_for:
return 0; return 0;
case parse_keyword_while: case parse_keyword_while:
@ -443,6 +441,12 @@ RESOLVE(optional_background)
} }
} }
PRODUCTIONS(end_command) =
{
{KEYWORD(parse_keyword_end)}
};
RESOLVE_ONLY(end_command)
#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text)
{ {
@ -483,6 +487,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
TEST(argument) TEST(argument)
TEST(redirection) TEST(redirection)
TEST(optional_background) TEST(optional_background)
TEST(end_command)
case parse_token_type_string: case parse_token_type_string:
case parse_token_type_pipe: case parse_token_type_pipe:

View file

@ -110,6 +110,10 @@ wcstring token_type_description(parse_token_type_t type)
return L"symbol_argument"; return L"symbol_argument";
case symbol_redirection: case symbol_redirection:
return L"symbol_redirection"; return L"symbol_redirection";
case symbol_optional_background:
return L"optional_background";
case symbol_end_command:
return L"symbol_end_command";
case parse_token_type_string: case parse_token_type_string:
@ -124,8 +128,7 @@ wcstring token_type_description(parse_token_type_t type)
return L"token_end"; return L"token_end";
case parse_token_type_terminate: case parse_token_type_terminate:
return L"token_terminate"; return L"token_terminate";
case symbol_optional_background:
return L"optional_background";
case parse_special_type_parse_error: case parse_special_type_parse_error:
return L"parse_error"; return L"parse_error";
@ -1057,21 +1060,37 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, pars
return result; return result;
} }
static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const
{ {
if (parent.type == type) result->push_back(&parent); const parse_node_t *ancestor = &node;
for (size_t i=0; i < parent.child_count; i++) while ((ancestor = this->get_parent(*ancestor)))
{ {
const parse_node_t *child = tree.get_child(parent, i); if (ancestor->type == desired_type)
assert(child != NULL); {
find_nodes_recursive(tree, *child, type, result); break;
}
}
return ancestor;
}
static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count)
{
if (result->size() < max_count)
{
if (parent.type == type) result->push_back(&parent);
for (size_t i=0; i < parent.child_count; i++)
{
const parse_node_t *child = tree.get_child(parent, i);
assert(child != NULL);
find_nodes_recursive(tree, *child, type, result, max_count);
}
} }
} }
parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const
{ {
parse_node_list_t result; parse_node_list_t result;
find_nodes_recursive(*this, parent, type, &result); find_nodes_recursive(*this, parent, type, &result, max_count);
return result; return result;
} }
@ -1188,6 +1207,37 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co
return result; return result;
} }
bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const
{
// Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false
bool result = false;
const parse_node_t *ancestor = &node;
if (ancestor)
ancestor = this->get_parent(*ancestor, symbol_decorated_statement);
if (ancestor)
ancestor = this->get_parent(*ancestor, symbol_statement);
if (ancestor)
ancestor = this->get_parent(*ancestor);
if (ancestor)
{
if (ancestor->type == symbol_job_continuation)
{
// Second or more in a pipeline
result = true;
}
else if (ancestor->type == symbol_job && include_first)
{
// Check to see if we have a job continuation that's not empty
const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation);
result = (continuation != NULL && continuation->child_count > 0);
}
}
return result;
}
enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const
{ {
assert(redirection_node.type == symbol_redirection); assert(redirection_node.type == symbol_redirection);
@ -1205,3 +1255,17 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
} }
return result; return result;
} }
const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node)
{
const parse_node_t *result = NULL;
if (node.type == symbol_block_statement)
{
const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header);
if (block_header != NULL)
{
result = this->get_child(*block_header, 0);
}
}
return result;
}

View file

@ -159,15 +159,19 @@ class parse_node_tree_t : public std::vector<parse_node_t>
{ {
public: public:
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type.
*/
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
/* Find all the nodes of a given type underneath a given node */ /* Returns the first ancestor of the given type, or NULL. */
const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const;
/* Find all the nodes of a given type underneath a given node, up to max_count of them */
typedef std::vector<const parse_node_t *> parse_node_list_t; typedef std::vector<const parse_node_t *> parse_node_list_t;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const;
/* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */
const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const;
@ -186,8 +190,14 @@ public:
/* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */
bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const;
/* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */
bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const;
/* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */
enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const;
/* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
const parse_node_t *header_node_for_block_statement(const parse_node_t &node);
}; };
/* Fish grammar: /* Fish grammar:
@ -210,19 +220,19 @@ public:
# A block is a conditional, loop, or begin/end # A block is a conditional, loop, or begin/end
if_statement = if_clause else_clause <END> arguments_or_redirections_list if_statement = if_clause else_clause end_command arguments_or_redirections_list
if_clause = <IF> job STATEMENT_TERMINATOR job_list if_clause = <IF> job STATEMENT_TERMINATOR job_list
else_clause = <empty> | else_clause = <empty> |
<ELSE> else_continuation <ELSE> else_continuation
else_continuation = if_clause else_clause | else_continuation = if_clause else_clause |
STATEMENT_TERMINATOR job_list STATEMENT_TERMINATOR job_list
switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list <END> switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list end_command
case_item_list = <empty> | case_item_list = <empty> |
case_item case_item_list case_item case_item_list
case_item = CASE argument_list STATEMENT_TERMINATOR job_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list
block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list block_statement = block_header <TOK_END> job_list end_command arguments_or_redirections_list
block_header = for_header | while_header | function_header | begin_header block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN arguments_or_redirections_list for_header = FOR var_name IN arguments_or_redirections_list
while_header = WHILE statement while_header = WHILE statement
@ -253,6 +263,8 @@ public:
optional_background = <empty> | <TOK_BACKGROUND> optional_background = <empty> | <TOK_BACKGROUND>
end_command = END
*/ */
#endif #endif

View file

@ -44,6 +44,7 @@ The fish parser. Contains functions for parsing and evaluating code.
#include "path.h" #include "path.h"
#include "signal.h" #include "signal.h"
#include "complete.h" #include "complete.h"
#include "parse_tree.h"
/** /**
Maximum number of function calls, i.e. recursion depth. Maximum number of function calls, i.e. recursion depth.
@ -550,14 +551,16 @@ void parser_t::allow_function()
forbidden_function.pop_back(); forbidden_function.pop_back();
} }
void parser_t::error(int ec, int p, const wchar_t *str, ...) void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
{ {
va_list va; va_list va;
CHECK(str,); CHECK(str,);
error_code = ec; error_code = ec;
err_pos = p;
assert(p <= INT_MAX);
err_pos = static_cast<int>(p);
va_start(va, str); va_start(va, str);
err_buff = vformat_string(str, va); err_buff = vformat_string(str, va);
@ -1148,7 +1151,7 @@ const wchar_t *parser_t::get_buffer() const
} }
int parser_t::is_help(const wchar_t *s, int min_match) const int parser_t::is_help(const wchar_t *s, int min_match)
{ {
CHECK(s, 0); CHECK(s, 0);
@ -2889,6 +2892,21 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre
return err; return err;
} }
// Check if the first argument under the given node is --help
static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src)
{
bool is_help = false;
const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1);
if (! arg_nodes.empty())
{
// Check the first argument only
const parse_node_t &arg = *arg_nodes.at(0);
const wcstring first_arg_src = arg.get_source(src);
is_help = parser_t::is_help(first_arg_src.c_str(), 3);
}
return is_help;
}
// helper type used in parser::test below // helper type used in parser::test below
struct block_info_t struct block_info_t
{ {
@ -2900,6 +2918,191 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
if (! buff)
return PARSER_TEST_ERROR;
const wcstring buff_src = buff;
parse_node_tree_t node_tree;
parse_error_list_t parse_errors;
// Whether we encountered a parse error
bool errored = false;
long error_line = -1;
// Whether we encountered an unclosed block
// We detect this via an 'end_command' block without source
bool has_unclosed_block = false;
bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors);
if (! parsed)
{
// report errors
if (out)
{
for (size_t i=0; i < parse_errors.size(); i++)
{
const parse_error_t &error = parse_errors.at(i);
this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str());
}
}
errored = true;
error_line = __LINE__;
}
// Expand all commands
// Verify 'or' and 'and' not used inside pipelines
// Verify pipes via parser_is_pipe_forbidden
// Verify return only within a function
if (! errored)
{
const size_t node_tree_size = node_tree.size();
for (size_t i=0; i < node_tree_size; i++)
{
const parse_node_t &node = node_tree.at(i);
if (node.type == symbol_end_command && ! node.has_source())
{
// an 'end' without source is an unclosed block
has_unclosed_block = true;
}
else if (node.type == symbol_plain_statement)
{
wcstring command;
if (node_tree.command_for_plain_statement(node, buff_src, &command))
{
// Check that we can expand the command
if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
{
error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str());
errored = true;
error_line = __LINE__;
}
// Check that pipes are sound
bool is_boolean_command = contains(command, L"or", L"and");
bool is_pipe_forbidden = parser_is_pipe_forbidden(command);
if (! errored && (is_boolean_command || is_pipe_forbidden))
{
// 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all
if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden))
{
error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
// Check that we don't return from outside a function
// But we allow it if it's 'return --help'
if (! errored && command == L"return")
{
const parse_node_t *ancestor = &node;
bool found_function = false;
while (ancestor != NULL)
{
const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor);
if (possible_function_header != NULL && possible_function_header->type == symbol_function_header)
{
found_function = true;
break;
}
ancestor = node_tree.get_parent(*ancestor);
}
if (! found_function && ! first_argument_is_help(node_tree, node, buff_src))
{
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
// Check that we don't return from outside a function
if (! errored && (command == L"break" || command == L"continue"))
{
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function.
// This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header.
bool found_loop = false, end_search = false;
const parse_node_t *ancestor = &node;
while (ancestor != NULL && ! end_search)
{
bool end_search = false;
const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor);
if (loop_or_function_header != NULL)
{
switch (loop_or_function_header->type)
{
case symbol_while_header:
case symbol_for_header:
// this is a loop header, so we can break or continue
found_loop = true;
end_search = true;
case symbol_function_header:
// this is a function header, so we cannot break or continue. We stop our search here.
found_loop = false;
end_search = true;
break;
default:
// most likely begin / end style block, which makes no difference
break;
}
}
ancestor = node_tree.get_parent(*ancestor);
}
const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header);
if (function_node == NULL)
{
// Ok, this looks bad: return not in a function!
// But we allow it if it's 'return --help'
// Get the arguments
bool is_help = false;
const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument);
if (! arg_nodes.empty())
{
// Check the first argument only
const parse_node_t &arg = *arg_nodes.at(0);
const wcstring first_arg_src = arg.get_source(buff_src);
is_help = parser_t::is_help(first_arg_src.c_str(), 3);
}
// If it's not help, then it's an invalid return
if (! is_help)
{
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
}
}
}
}
}
parser_test_error_bits_t res = 0;
if (errored)
res |= PARSER_TEST_ERROR;
if (has_unclosed_block)
res |= PARSER_TEST_INCOMPLETE;
error_code=0;
return res;
}
parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
{
ASSERT_IS_MAIN_THREAD();
/* /*
Set to one if a command name has been given for the currently Set to one if a command name has been given for the currently
parsed process specification parsed process specification

View file

@ -420,7 +420,7 @@ public:
\param p The character offset at which the error occured \param p The character offset at which the error occured
\param str The printf-style error message filter \param str The printf-style error message filter
*/ */
void error(int ec, int p, const wchar_t *str, ...); void error(int ec, size_t p, const wchar_t *str, ...);
/** /**
Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'. Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'.
@ -488,6 +488,7 @@ public:
\param prefix the prefix string to prepend to each error message written to the \c out buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer
*/ */
parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
/** /**
Test if the specified string can be parsed as an argument list, Test if the specified string can be parsed as an argument list,
@ -524,7 +525,7 @@ public:
\param s the string to test \param s the string to test
\param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed. \param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed.
*/ */
int is_help(const wchar_t *s, int min_match) const; static int is_help(const wchar_t *s, int min_match);
/** /**
Returns the file currently evaluated by the parser. This can be Returns the file currently evaluated by the parser. This can be