Initial work towards rewriting detect_errors to use new parser.

Low-level tests currently pass; high level tests fail.
This commit is contained in:
ridiculousfish 2013-12-11 18:34:28 -08:00
parent 383b6aabf5
commit d5d9b9284a
7 changed files with 316 additions and 28 deletions

View file

@ -2089,8 +2089,9 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
case symbol_decorated_statement:
case symbol_if_statement:
{
// Color the 'end'
this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND);
// Color the 'end'
this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND);
}
break;

View file

@ -47,6 +47,8 @@ enum parse_token_type_t
symbol_redirection,
symbol_optional_background,
symbol_end_command,
// Terminal types
parse_token_type_string,

View file

@ -187,7 +187,7 @@ RESOLVE(statement)
PRODUCTIONS(if_statement) =
{
{symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list}
{symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list}
};
RESOLVE_ONLY(if_statement)
@ -231,7 +231,7 @@ RESOLVE(else_continuation)
PRODUCTIONS(switch_statement) =
{
{ KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)}
{ KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command}
};
RESOLVE_ONLY(switch_statement)
@ -272,7 +272,7 @@ RESOLVE(argument_list)
PRODUCTIONS(block_statement) =
{
{symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list}
{symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list}
};
RESOLVE_ONLY(block_statement)
@ -287,8 +287,6 @@ RESOLVE(block_header)
{
switch (token1.keyword)
{
case parse_keyword_else:
return NO_PRODUCTION;
case parse_keyword_for:
return 0;
case parse_keyword_while:
@ -443,6 +441,12 @@ RESOLVE(optional_background)
}
}
PRODUCTIONS(end_command) =
{
{KEYWORD(parse_keyword_end)}
};
RESOLVE_ONLY(end_command)
#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text)
{
@ -483,6 +487,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
TEST(argument)
TEST(redirection)
TEST(optional_background)
TEST(end_command)
case parse_token_type_string:
case parse_token_type_pipe:

View file

@ -110,6 +110,10 @@ wcstring token_type_description(parse_token_type_t type)
return L"symbol_argument";
case symbol_redirection:
return L"symbol_redirection";
case symbol_optional_background:
return L"optional_background";
case symbol_end_command:
return L"symbol_end_command";
case parse_token_type_string:
@ -124,8 +128,7 @@ wcstring token_type_description(parse_token_type_t type)
return L"token_end";
case parse_token_type_terminate:
return L"token_terminate";
case symbol_optional_background:
return L"optional_background";
case parse_special_type_parse_error:
return L"parse_error";
@ -1057,21 +1060,37 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, pars
return result;
}
static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result)
const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const
{
if (parent.type == type) result->push_back(&parent);
for (size_t i=0; i < parent.child_count; i++)
const parse_node_t *ancestor = &node;
while ((ancestor = this->get_parent(*ancestor)))
{
const parse_node_t *child = tree.get_child(parent, i);
assert(child != NULL);
find_nodes_recursive(tree, *child, type, result);
if (ancestor->type == desired_type)
{
break;
}
}
return ancestor;
}
static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count)
{
if (result->size() < max_count)
{
if (parent.type == type) result->push_back(&parent);
for (size_t i=0; i < parent.child_count; i++)
{
const parse_node_t *child = tree.get_child(parent, i);
assert(child != NULL);
find_nodes_recursive(tree, *child, type, result, max_count);
}
}
}
parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const
parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const
{
parse_node_list_t result;
find_nodes_recursive(*this, parent, type, &result);
find_nodes_recursive(*this, parent, type, &result, max_count);
return result;
}
@ -1188,6 +1207,37 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co
return result;
}
bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const
{
// Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false
bool result = false;
const parse_node_t *ancestor = &node;
if (ancestor)
ancestor = this->get_parent(*ancestor, symbol_decorated_statement);
if (ancestor)
ancestor = this->get_parent(*ancestor, symbol_statement);
if (ancestor)
ancestor = this->get_parent(*ancestor);
if (ancestor)
{
if (ancestor->type == symbol_job_continuation)
{
// Second or more in a pipeline
result = true;
}
else if (ancestor->type == symbol_job && include_first)
{
// Check to see if we have a job continuation that's not empty
const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation);
result = (continuation != NULL && continuation->child_count > 0);
}
}
return result;
}
enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const
{
assert(redirection_node.type == symbol_redirection);
@ -1205,3 +1255,17 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
}
return result;
}
const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node)
{
const parse_node_t *result = NULL;
if (node.type == symbol_block_statement)
{
const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header);
if (block_header != NULL)
{
result = this->get_child(*block_header, 0);
}
}
return result;
}

View file

@ -159,15 +159,19 @@ class parse_node_tree_t : public std::vector<parse_node_t>
{
public:
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type.
*/
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
/* Returns the first ancestor of the given type, or NULL. */
const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const;
/* Find all the nodes of a given type underneath a given node */
/* Find all the nodes of a given type underneath a given node, up to max_count of them */
typedef std::vector<const parse_node_t *> parse_node_list_t;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const;
/* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */
const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const;
@ -186,8 +190,14 @@ public:
/* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */
bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const;
/* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */
bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const;
/* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */
enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const;
/* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
const parse_node_t *header_node_for_block_statement(const parse_node_t &node);
};
/* Fish grammar:
@ -210,19 +220,19 @@ public:
# A block is a conditional, loop, or begin/end
if_statement = if_clause else_clause <END> arguments_or_redirections_list
if_statement = if_clause else_clause end_command arguments_or_redirections_list
if_clause = <IF> job STATEMENT_TERMINATOR job_list
else_clause = <empty> |
<ELSE> else_continuation
else_continuation = if_clause else_clause |
STATEMENT_TERMINATOR job_list
switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list <END>
switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list end_command
case_item_list = <empty> |
case_item case_item_list
case_item = CASE argument_list STATEMENT_TERMINATOR job_list
block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list
block_statement = block_header <TOK_END> job_list end_command arguments_or_redirections_list
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN arguments_or_redirections_list
while_header = WHILE statement
@ -252,6 +262,8 @@ public:
terminator = <TOK_END> | <TOK_BACKGROUND>
optional_background = <empty> | <TOK_BACKGROUND>
end_command = END
*/

View file

@ -44,6 +44,7 @@ The fish parser. Contains functions for parsing and evaluating code.
#include "path.h"
#include "signal.h"
#include "complete.h"
#include "parse_tree.h"
/**
Maximum number of function calls, i.e. recursion depth.
@ -550,14 +551,16 @@ void parser_t::allow_function()
forbidden_function.pop_back();
}
void parser_t::error(int ec, int p, const wchar_t *str, ...)
void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
{
va_list va;
CHECK(str,);
error_code = ec;
err_pos = p;
assert(p <= INT_MAX);
err_pos = static_cast<int>(p);
va_start(va, str);
err_buff = vformat_string(str, va);
@ -1148,7 +1151,7 @@ const wchar_t *parser_t::get_buffer() const
}
int parser_t::is_help(const wchar_t *s, int min_match) const
int parser_t::is_help(const wchar_t *s, int min_match)
{
CHECK(s, 0);
@ -2889,6 +2892,21 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre
return err;
}
// Check if the first argument under the given node is --help
static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src)
{
bool is_help = false;
const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1);
if (! arg_nodes.empty())
{
// Check the first argument only
const parse_node_t &arg = *arg_nodes.at(0);
const wcstring first_arg_src = arg.get_source(src);
is_help = parser_t::is_help(first_arg_src.c_str(), 3);
}
return is_help;
}
// helper type used in parser::test below
struct block_info_t
{
@ -2897,6 +2915,191 @@ struct block_info_t
};
parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
{
ASSERT_IS_MAIN_THREAD();
if (! buff)
return PARSER_TEST_ERROR;
const wcstring buff_src = buff;
parse_node_tree_t node_tree;
parse_error_list_t parse_errors;
// Whether we encountered a parse error
bool errored = false;
long error_line = -1;
// Whether we encountered an unclosed block
// We detect this via an 'end_command' block without source
bool has_unclosed_block = false;
bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors);
if (! parsed)
{
// report errors
if (out)
{
for (size_t i=0; i < parse_errors.size(); i++)
{
const parse_error_t &error = parse_errors.at(i);
this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str());
}
}
errored = true;
error_line = __LINE__;
}
// Expand all commands
// Verify 'or' and 'and' not used inside pipelines
// Verify pipes via parser_is_pipe_forbidden
// Verify return only within a function
if (! errored)
{
const size_t node_tree_size = node_tree.size();
for (size_t i=0; i < node_tree_size; i++)
{
const parse_node_t &node = node_tree.at(i);
if (node.type == symbol_end_command && ! node.has_source())
{
// an 'end' without source is an unclosed block
has_unclosed_block = true;
}
else if (node.type == symbol_plain_statement)
{
wcstring command;
if (node_tree.command_for_plain_statement(node, buff_src, &command))
{
// Check that we can expand the command
if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
{
error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str());
errored = true;
error_line = __LINE__;
}
// Check that pipes are sound
bool is_boolean_command = contains(command, L"or", L"and");
bool is_pipe_forbidden = parser_is_pipe_forbidden(command);
if (! errored && (is_boolean_command || is_pipe_forbidden))
{
// 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all
if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden))
{
error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
// Check that we don't return from outside a function
// But we allow it if it's 'return --help'
if (! errored && command == L"return")
{
const parse_node_t *ancestor = &node;
bool found_function = false;
while (ancestor != NULL)
{
const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor);
if (possible_function_header != NULL && possible_function_header->type == symbol_function_header)
{
found_function = true;
break;
}
ancestor = node_tree.get_parent(*ancestor);
}
if (! found_function && ! first_argument_is_help(node_tree, node, buff_src))
{
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
// Check that we don't return from outside a function
if (! errored && (command == L"break" || command == L"continue"))
{
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function.
// This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header.
bool found_loop = false, end_search = false;
const parse_node_t *ancestor = &node;
while (ancestor != NULL && ! end_search)
{
bool end_search = false;
const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor);
if (loop_or_function_header != NULL)
{
switch (loop_or_function_header->type)
{
case symbol_while_header:
case symbol_for_header:
// this is a loop header, so we can break or continue
found_loop = true;
end_search = true;
case symbol_function_header:
// this is a function header, so we cannot break or continue. We stop our search here.
found_loop = false;
end_search = true;
break;
default:
// most likely begin / end style block, which makes no difference
break;
}
}
ancestor = node_tree.get_parent(*ancestor);
}
const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header);
if (function_node == NULL)
{
// Ok, this looks bad: return not in a function!
// But we allow it if it's 'return --help'
// Get the arguments
bool is_help = false;
const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument);
if (! arg_nodes.empty())
{
// Check the first argument only
const parse_node_t &arg = *arg_nodes.at(0);
const wcstring first_arg_src = arg.get_source(buff_src);
is_help = parser_t::is_help(first_arg_src.c_str(), 3);
}
// If it's not help, then it's an invalid return
if (! is_help)
{
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
}
}
}
}
}
}
}
parser_test_error_bits_t res = 0;
if (errored)
res |= PARSER_TEST_ERROR;
if (has_unclosed_block)
res |= PARSER_TEST_INCOMPLETE;
error_code=0;
return res;
}
parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
{
ASSERT_IS_MAIN_THREAD();

View file

@ -420,7 +420,7 @@ public:
\param p The character offset at which the error occured
\param str The printf-style error message filter
*/
void error(int ec, int p, const wchar_t *str, ...);
void error(int ec, size_t p, const wchar_t *str, ...);
/**
Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'.
@ -488,6 +488,7 @@ public:
\param prefix the prefix string to prepend to each error message written to the \c out buffer
*/
parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
/**
Test if the specified string can be parsed as an argument list,
@ -524,7 +525,7 @@ public:
\param s the string to test
\param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed.
*/
int is_help(const wchar_t *s, int min_match) const;
static int is_help(const wchar_t *s, int min_match);
/**
Returns the file currently evaluated by the parser. This can be