Finish rewriting detect_errors to use new parser. All tests now pass (!)

This commit is contained in:
ridiculousfish 2013-12-12 18:18:07 -08:00
parent e25d49b80b
commit 5cf59de676
10 changed files with 133 additions and 94 deletions

View file

@ -497,15 +497,19 @@ static int builtin_complete(parser_t &parser, wchar_t **argv)
{ {
if (condition && wcslen(condition)) if (condition && wcslen(condition))
{ {
if (parser.detect_errors(condition)) const wcstring condition_string = condition;
parse_error_list_t errors;
if (parser.detect_errors(condition_string, &errors))
{ {
append_format(stderr_buffer, append_format(stderr_buffer,
L"%ls: Condition '%ls' contained a syntax error\n", L"%ls: Condition '%ls' contained a syntax error",
argv[0], argv[0],
condition); condition);
for (size_t i=0; i < errors.size(); i++)
parser.detect_errors(condition, &stderr_buffer, argv[0]); {
append_format(stderr_buffer, L"\n%s: ", argv[0]);
stderr_buffer.append(errors.at(i).describe(condition_string));
}
res = true; res = true;
} }
} }

View file

@ -584,12 +584,6 @@ static void test_parser()
parser_t parser(PARSER_TYPE_GENERAL, true); parser_t parser(PARSER_TYPE_GENERAL, true);
say(L"Testing null input to parser");
if (!parser.detect_errors(NULL))
{
err(L"Null input to parser.detect_errors undetected");
}
say(L"Testing block nesting"); say(L"Testing block nesting");
if (!parser.detect_errors(L"if; end")) if (!parser.detect_errors(L"if; end"))
{ {
@ -630,11 +624,29 @@ static void test_parser()
{ {
err(L"'break' command outside of loop block context undetected"); err(L"'break' command outside of loop block context undetected");
} }
if (parser.detect_errors(L"break --help"))
{
err(L"'break --help' incorrectly marked as error");
}
if (! parser.detect_errors(L"while false ; function foo ; break ; end ; end "))
{
err(L"'break' command inside function allowed to break from loop outside it");
}
if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return")) if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return"))
{ {
err(L"Invalid pipe command undetected"); err(L"Invalid pipe command undetected");
} }
if (parser.detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end "))
{
err(L"'break' command inside switch falsely reported as error");
}
say(L"Testing basic evaluation"); say(L"Testing basic evaluation");
#if 0 #if 0
/* This fails now since the parser takes a wcstring&, and NULL converts to wchar_t * converts to wcstring which crashes (thanks C++) */ /* This fails now since the parser takes a wcstring&, and NULL converts to wchar_t * converts to wcstring which crashes (thanks C++) */

View file

@ -104,7 +104,13 @@ enum parse_statement_decoration_t
enum parse_error_code_t enum parse_error_code_t
{ {
parse_error_none, parse_error_none,
parse_error_generic, //unknown type
/* matching values from enum parser_error */
parse_error_syntax,
parse_error_eval,
parse_error_cmdsubst,
parse_error_generic, // unclassified error types
parse_error_tokenizer, //tokenizer error parse_error_tokenizer, //tokenizer error

View file

@ -1,6 +1,8 @@
#include "parse_productions.h" #include "parse_productions.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "fallback.h"
#include <vector> #include <vector>
#include <algorithm>
using namespace parse_productions; using namespace parse_productions;
@ -32,21 +34,58 @@ wcstring parse_error_t::describe(const wcstring &src) const
line_end = src.size(); line_end = src.size();
} }
assert(line_end >= line_start); assert(line_end >= line_start);
//fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); //fprintf(stderr, "source start: %lu, source_length %lu, line start %lu, line end %lu\n", source_start, source_length, line_start, line_end);
assert(source_start >= line_start); assert(source_start >= line_start);
// Append the line of text // Append the line of text
result.push_back(L'\n'); result.push_back(L'\n');
result.append(src, line_start, line_end - line_start); result.append(src, line_start, line_end - line_start);
// Append the caret line // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions
wcstring caret_space_line;
caret_space_line.reserve(source_start - line_start);
for (size_t i=line_start; i < source_start; i++)
{
wchar_t wc = src.at(i);
if (wc == L'\t')
{
caret_space_line.push_back(L'\t');
}
else
{
int width = fish_wcwidth(wc);
if (width > 0)
{
caret_space_line.append(static_cast<size_t>(width), L' ');
}
}
}
result.push_back(L'\n'); result.push_back(L'\n');
result.append(source_start - line_start, L' '); result.append(caret_space_line);
result.push_back(L'^'); result.push_back(L'^');
} }
return result; return result;
} }
wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix)
{
wcstring target;
for (size_t i=0; i < errors.size(); i++)
{
if (i > 0)
{
target.push_back(L'\n');
}
if (prefix != NULL)
{
target.append(prefix);
target.append(L": ");
}
target.append(errors.at(i).describe(src));
}
return target;
}
/** Returns a string description of the given token type */ /** Returns a string description of the given token type */
wcstring token_type_description(parse_token_type_t type) wcstring token_type_description(parse_token_type_t type)
{ {

View file

@ -38,6 +38,9 @@ struct parse_error_t
}; };
typedef std::vector<parse_error_t> parse_error_list_t; typedef std::vector<parse_error_t> parse_error_list_t;
/* Returns a description of a list of parse errors */
wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL);
/** A struct representing the token type that we use internally */ /** A struct representing the token type that we use internally */
struct parse_token_t struct parse_token_t
{ {

View file

@ -565,7 +565,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
va_start(va, str); va_start(va, str);
err_buff = vformat_string(str, va); err_buff = vformat_string(str, va);
va_end(va); va_end(va);
} }
/** /**
@ -2753,7 +2752,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
case 1: case 1:
{ {
wchar_t *subst = wcsndup(paran_begin+1, paran_end-paran_begin-1); const wcstring subst(paran_begin + 1, paran_end);
wcstring tmp; wcstring tmp;
tmp.append(arg_cpy, paran_begin - arg_cpy); tmp.append(arg_cpy, paran_begin - arg_cpy);
@ -2762,17 +2761,16 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
// debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff );
err |= parser_t::detect_errors(subst, out, prefix); parse_error_list_t errors;
err |= parser_t::detect_errors(subst, &errors);
if (out && ! errors.empty())
{
out->append(parse_errors_description(errors, subst, prefix));
}
free(subst);
free(arg_cpy); free(arg_cpy);
arg_cpy = wcsdup(tmp.c_str()); arg_cpy = wcsdup(tmp.c_str());
/*
Do _not_ call sb_destroy on this stringbuffer - it's
buffer is used as the new 'arg_cpy'. It is free'd at
the end of the loop.
*/
break; break;
} }
} }
@ -2914,39 +2912,43 @@ struct block_info_t
block_type_t type; //type of the block block_type_t type; //type of the block
}; };
parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) /* Append a syntax error to the given error list */
static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...)
{
parse_error_t error;
error.source_start = node.source_start;
error.source_length = node.source_length;
error.code = parse_error_syntax;
va_list va;
va_start(va, fmt);
error.text = vformat_string(fmt, va);
va_end(va);
errors->push_back(error);
return true;
}
parser_test_error_bits_t parser_t::detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, const wchar_t *prefix)
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
if (! buff)
return PARSER_TEST_ERROR;
const wcstring buff_src = buff;
parse_node_tree_t node_tree; parse_node_tree_t node_tree;
parse_error_list_t parse_errors; parse_error_list_t parse_errors;
// Whether we encountered a parse error // Whether we encountered a parse error
bool errored = false; bool errored = false;
long error_line = -1;
// Whether we encountered an unclosed block // Whether we encountered an unclosed block
// We detect this via an 'end_command' block without source // We detect this via an 'end_command' block without source
bool has_unclosed_block = false; bool has_unclosed_block = false;
// Parse the input string into a parse tree
// Some errors are detected here
bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors);
if (! parsed) if (! parsed)
{ {
// report errors
if (out)
{
for (size_t i=0; i < parse_errors.size(); i++)
{
const parse_error_t &error = parse_errors.at(i);
this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str());
}
}
errored = true; errored = true;
error_line = __LINE__;
} }
// Expand all commands // Expand all commands
@ -2973,9 +2975,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
// Check that we can expand the command // Check that we can expand the command
if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
{ {
error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str()); errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str());
errored = true;
error_line = __LINE__;
} }
// Check that pipes are sound // Check that pipes are sound
@ -2986,9 +2986,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
// 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all
if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden))
{ {
error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG); errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG);
errored = true;
error_line = __LINE__;
} }
} }
@ -3011,9 +3009,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
} }
if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) if (! found_function && ! first_argument_is_help(node_tree, node, buff_src))
{ {
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
} }
} }
@ -3026,7 +3022,6 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
const parse_node_t *ancestor = &node; const parse_node_t *ancestor = &node;
while (ancestor != NULL && ! end_search) while (ancestor != NULL && ! end_search)
{ {
bool end_search = false;
const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor);
if (loop_or_function_header != NULL) if (loop_or_function_header != NULL)
{ {
@ -3037,6 +3032,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
// this is a loop header, so we can break or continue // this is a loop header, so we can break or continue
found_loop = true; found_loop = true;
end_search = true; end_search = true;
break;
case symbol_function_header: case symbol_function_header:
// this is a function header, so we cannot break or continue. We stop our search here. // this is a function header, so we cannot break or continue. We stop our search here.
@ -3052,31 +3048,9 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
ancestor = node_tree.get_parent(*ancestor); ancestor = node_tree.get_parent(*ancestor);
} }
if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src))
const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header);
if (function_node == NULL)
{ {
// Ok, this looks bad: return not in a function! errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG);
// But we allow it if it's 'return --help'
// Get the arguments
bool is_help = false;
const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument);
if (! arg_nodes.empty())
{
// Check the first argument only
const parse_node_t &arg = *arg_nodes.at(0);
const wcstring first_arg_src = arg.get_source(buff_src);
is_help = parser_t::is_help(first_arg_src.c_str(), 3);
}
// If it's not help, then it's an invalid return
if (! is_help)
{
error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
errored = true;
error_line = __LINE__;
}
} }
} }
} }
@ -3092,6 +3066,11 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
if (has_unclosed_block) if (has_unclosed_block)
res |= PARSER_TEST_INCOMPLETE; res |= PARSER_TEST_INCOMPLETE;
if (out_errors)
{
out_errors->swap(parse_errors);
}
error_code=0; error_code=0;

View file

@ -11,6 +11,7 @@
#include "util.h" #include "util.h"
#include "event.h" #include "event.h"
#include "function.h" #include "function.h"
#include "parse_tree.h"
#include <vector> #include <vector>
enum { enum {
@ -487,7 +488,7 @@ public:
\param out if non-null, any errors in the command will be filled out into this buffer \param out if non-null, any errors in the command will be filled out into this buffer
\param prefix the prefix string to prepend to each error message written to the \c out buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer
*/ */
parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t detect_errors(const wcstring &buff, parse_error_list_t *out_errors = NULL, const wchar_t *prefix = NULL);
parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
/** /**

View file

@ -2472,12 +2472,11 @@ void reader_run_command(parser_t &parser, const wcstring &cmd)
int reader_shell_test(const wchar_t *b) int reader_shell_test(const wchar_t *b)
{ {
int res = parser_t::principal_parser().detect_errors(b); wcstring bstr = b;
int res = parser_t::principal_parser().detect_errors(bstr);
if (res & PARSER_TEST_ERROR) if (res & PARSER_TEST_ERROR)
{ {
wcstring sb;
const int tmp[1] = {0}; const int tmp[1] = {0};
const int tmp2[1] = {0}; const int tmp2[1] = {0};
const wcstring empty; const wcstring empty;
@ -2491,9 +2490,14 @@ int reader_shell_test(const wchar_t *b)
tmp2, tmp2,
0); 0);
parse_error_list_t errors;
parser_t::principal_parser().detect_errors(bstr, &errors, L"fish");
parser_t::principal_parser().detect_errors(b, &sb, L"fish"); if (! errors.empty())
fwprintf(stderr, L"%ls", sb.c_str()); {
const wcstring sb = parse_errors_description(errors, b, L"fish");
fwprintf(stderr, L"%ls", sb.c_str());
}
} }
return res; return res;
} }
@ -3903,13 +3907,14 @@ static int read_ni(int fd, const io_chain_t &io)
res = 1; res = 1;
} }
wcstring sb; parse_error_list_t errors;
if (! parser.detect_errors(str.c_str(), &sb, L"fish")) if (! parser.detect_errors(str, &errors, L"fish"))
{ {
parser.eval(str, io, TOP); parser.eval(str, io, TOP);
} }
else else
{ {
const wcstring sb = parse_errors_description(errors, str);
fwprintf(stderr, L"%ls", sb.c_str()); fwprintf(stderr, L"%ls", sb.c_str());
res = 1; res = 1;
} }

View file

@ -20,15 +20,6 @@ case one
echo $status echo $status
end end
# Test that non-case tokens inside `switch` don't blow away status
# (why are these even allowed?)
false
switch one
true
case one
echo $status
end
#test contains -i #test contains -i
echo test contains -i echo test contains -i
contains -i string a b c string d contains -i string a b c string d

View file

@ -3,7 +3,6 @@
3 3
0 0
1
1 1
test contains -i test contains -i
4 4