From 5cf59de6763a0000fdc87f0101ca78bd137dffcc Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 12 Dec 2013 18:18:07 -0800 Subject: [PATCH] Finish rewriting detect_errors to use new parser. All tests now pass (!) --- builtin_complete.cpp | 14 ++++--- fish_tests.cpp | 24 ++++++++--- parse_constants.h | 8 +++- parse_tree.cpp | 45 ++++++++++++++++++-- parse_tree.h | 3 ++ parser.cpp | 97 +++++++++++++++++--------------------------- parser.h | 3 +- reader.cpp | 23 +++++++---- tests/test7.in | 9 ---- tests/test7.out | 1 - 10 files changed, 133 insertions(+), 94 deletions(-) diff --git a/builtin_complete.cpp b/builtin_complete.cpp index 14b3a4b74..0cc3b7e7d 100644 --- a/builtin_complete.cpp +++ b/builtin_complete.cpp @@ -497,15 +497,19 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) { if (condition && wcslen(condition)) { - if (parser.detect_errors(condition)) + const wcstring condition_string = condition; + parse_error_list_t errors; + if (parser.detect_errors(condition_string, &errors)) { append_format(stderr_buffer, - L"%ls: Condition '%ls' contained a syntax error\n", + L"%ls: Condition '%ls' contained a syntax error", argv[0], condition); - - parser.detect_errors(condition, &stderr_buffer, argv[0]); - + for (size_t i=0; i < errors.size(); i++) + { + append_format(stderr_buffer, L"\n%s: ", argv[0]); + stderr_buffer.append(errors.at(i).describe(condition_string)); + } res = true; } } diff --git a/fish_tests.cpp b/fish_tests.cpp index c43381b3f..0c273643a 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -584,12 +584,6 @@ static void test_parser() parser_t parser(PARSER_TYPE_GENERAL, true); - say(L"Testing null input to parser"); - if (!parser.detect_errors(NULL)) - { - err(L"Null input to parser.detect_errors undetected"); - } - say(L"Testing block nesting"); if (!parser.detect_errors(L"if; end")) { @@ -630,10 +624,28 @@ static void test_parser() { err(L"'break' command outside of loop block context undetected"); } + + if (parser.detect_errors(L"break --help")) + { + err(L"'break --help' incorrectly marked as error"); + } + + if (! parser.detect_errors(L"while false ; function foo ; break ; end ; end ")) + { + err(L"'break' command inside function allowed to break from loop outside it"); + } + + if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return")) { err(L"Invalid pipe command undetected"); } + + if (parser.detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) + { + err(L"'break' command inside switch falsely reported as error"); + } + say(L"Testing basic evaluation"); #if 0 diff --git a/parse_constants.h b/parse_constants.h index 706c31b84..7ccc962c2 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -104,7 +104,13 @@ enum parse_statement_decoration_t enum parse_error_code_t { parse_error_none, - parse_error_generic, //unknown type + + /* matching values from enum parser_error */ + parse_error_syntax, + parse_error_eval, + parse_error_cmdsubst, + + parse_error_generic, // unclassified error types parse_error_tokenizer, //tokenizer error diff --git a/parse_tree.cpp b/parse_tree.cpp index b8cb348c0..ad83a0d60 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,6 +1,8 @@ #include "parse_productions.h" #include "tokenizer.h" +#include "fallback.h" #include +#include using namespace parse_productions; @@ -32,21 +34,58 @@ wcstring parse_error_t::describe(const wcstring &src) const line_end = src.size(); } assert(line_end >= line_start); - //fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); + //fprintf(stderr, "source start: %lu, source_length %lu, line start %lu, line end %lu\n", source_start, source_length, line_start, line_end); assert(source_start >= line_start); // Append the line of text result.push_back(L'\n'); result.append(src, line_start, line_end - line_start); - // Append the caret line + // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions + wcstring caret_space_line; + caret_space_line.reserve(source_start - line_start); + for (size_t i=line_start; i < source_start; i++) + { + wchar_t wc = src.at(i); + if (wc == L'\t') + { + caret_space_line.push_back(L'\t'); + } + else + { + int width = fish_wcwidth(wc); + if (width > 0) + { + caret_space_line.append(static_cast(width), L' '); + } + } + } result.push_back(L'\n'); - result.append(source_start - line_start, L' '); + result.append(caret_space_line); result.push_back(L'^'); } return result; } +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix) +{ + wcstring target; + for (size_t i=0; i < errors.size(); i++) + { + if (i > 0) + { + target.push_back(L'\n'); + } + if (prefix != NULL) + { + target.append(prefix); + target.append(L": "); + } + target.append(errors.at(i).describe(src)); + } + return target; +} + /** Returns a string description of the given token type */ wcstring token_type_description(parse_token_type_t type) { diff --git a/parse_tree.h b/parse_tree.h index e65d1bafd..8a0b3eedd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -38,6 +38,9 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; +/* Returns a description of a list of parse errors */ +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL); + /** A struct representing the token type that we use internally */ struct parse_token_t { diff --git a/parser.cpp b/parser.cpp index 6053b4a34..136bc74a4 100644 --- a/parser.cpp +++ b/parser.cpp @@ -565,7 +565,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) va_start(va, str); err_buff = vformat_string(str, va); va_end(va); - } /** @@ -2753,7 +2752,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha case 1: { - wchar_t *subst = wcsndup(paran_begin+1, paran_end-paran_begin-1); + const wcstring subst(paran_begin + 1, paran_end); wcstring tmp; tmp.append(arg_cpy, paran_begin - arg_cpy); @@ -2762,17 +2761,16 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); - err |= parser_t::detect_errors(subst, out, prefix); + parse_error_list_t errors; + err |= parser_t::detect_errors(subst, &errors); + if (out && ! errors.empty()) + { + out->append(parse_errors_description(errors, subst, prefix)); + } - free(subst); free(arg_cpy); arg_cpy = wcsdup(tmp.c_str()); - /* - Do _not_ call sb_destroy on this stringbuffer - it's - buffer is used as the new 'arg_cpy'. It is free'd at - the end of the loop. - */ break; } } @@ -2914,39 +2912,43 @@ struct block_info_t block_type_t type; //type of the block }; -parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) +/* Append a syntax error to the given error list */ +static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...) +{ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + errors->push_back(error); + return true; +} + +parser_test_error_bits_t parser_t::detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); - if (! buff) - return PARSER_TEST_ERROR; - - const wcstring buff_src = buff; parse_node_tree_t node_tree; parse_error_list_t parse_errors; // Whether we encountered a parse error bool errored = false; - long error_line = -1; // Whether we encountered an unclosed block // We detect this via an 'end_command' block without source bool has_unclosed_block = false; + // Parse the input string into a parse tree + // Some errors are detected here bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); if (! parsed) { - // report errors - if (out) - { - for (size_t i=0; i < parse_errors.size(); i++) - { - const parse_error_t &error = parse_errors.at(i); - this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str()); - } - } errored = true; - error_line = __LINE__; } // Expand all commands @@ -2973,9 +2975,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // Check that we can expand the command if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { - error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str()); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); } // Check that pipes are sound @@ -2986,9 +2986,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) { - error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG); } } @@ -3011,9 +3009,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * } if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) { - error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); } } @@ -3026,7 +3022,6 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * const parse_node_t *ancestor = &node; while (ancestor != NULL && ! end_search) { - bool end_search = false; const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); if (loop_or_function_header != NULL) { @@ -3037,6 +3032,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // this is a loop header, so we can break or continue found_loop = true; end_search = true; + break; case symbol_function_header: // this is a function header, so we cannot break or continue. We stop our search here. @@ -3052,31 +3048,9 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * ancestor = node_tree.get_parent(*ancestor); } - - - const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header); - if (function_node == NULL) + if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) { - // Ok, this looks bad: return not in a function! - // But we allow it if it's 'return --help' - // Get the arguments - bool is_help = false; - const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument); - if (! arg_nodes.empty()) - { - // Check the first argument only - const parse_node_t &arg = *arg_nodes.at(0); - const wcstring first_arg_src = arg.get_source(buff_src); - is_help = parser_t::is_help(first_arg_src.c_str(), 3); - } - - // If it's not help, then it's an invalid return - if (! is_help) - { - error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); - errored = true; - error_line = __LINE__; - } + errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG); } } } @@ -3092,6 +3066,11 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * if (has_unclosed_block) res |= PARSER_TEST_INCOMPLETE; + if (out_errors) + { + out_errors->swap(parse_errors); + } + error_code=0; diff --git a/parser.h b/parser.h index b2fbfe134..fb3efad85 100644 --- a/parser.h +++ b/parser.h @@ -11,6 +11,7 @@ #include "util.h" #include "event.h" #include "function.h" +#include "parse_tree.h" #include enum { @@ -487,7 +488,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors(const wcstring &buff, parse_error_list_t *out_errors = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); /** diff --git a/reader.cpp b/reader.cpp index df7f070e5..3eeae6271 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2472,12 +2472,11 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { - int res = parser_t::principal_parser().detect_errors(b); + wcstring bstr = b; + int res = parser_t::principal_parser().detect_errors(bstr); if (res & PARSER_TEST_ERROR) { - wcstring sb; - const int tmp[1] = {0}; const int tmp2[1] = {0}; const wcstring empty; @@ -2490,10 +2489,15 @@ int reader_shell_test(const wchar_t *b) tmp, tmp2, 0); - - - parser_t::principal_parser().detect_errors(b, &sb, L"fish"); - fwprintf(stderr, L"%ls", sb.c_str()); + + parse_error_list_t errors; + parser_t::principal_parser().detect_errors(bstr, &errors, L"fish"); + + if (! errors.empty()) + { + const wcstring sb = parse_errors_description(errors, b, L"fish"); + fwprintf(stderr, L"%ls", sb.c_str()); + } } return res; } @@ -3903,13 +3907,14 @@ static int read_ni(int fd, const io_chain_t &io) res = 1; } - wcstring sb; - if (! parser.detect_errors(str.c_str(), &sb, L"fish")) + parse_error_list_t errors; + if (! parser.detect_errors(str, &errors, L"fish")) { parser.eval(str, io, TOP); } else { + const wcstring sb = parse_errors_description(errors, str); fwprintf(stderr, L"%ls", sb.c_str()); res = 1; } diff --git a/tests/test7.in b/tests/test7.in index 22f5d92c6..a3ae8360c 100644 --- a/tests/test7.in +++ b/tests/test7.in @@ -20,15 +20,6 @@ case one echo $status end -# Test that non-case tokens inside `switch` don't blow away status -# (why are these even allowed?) -false -switch one -true -case one - echo $status -end - #test contains -i echo test contains -i contains -i string a b c string d diff --git a/tests/test7.out b/tests/test7.out index fd3b8a701..bbe2ab1a5 100644 --- a/tests/test7.out +++ b/tests/test7.out @@ -3,7 +3,6 @@ 3 0 -1 1 test contains -i 4