From dd0cc5ed9fa60f4bae5530d1708a2974eb0c454f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 13:41:12 -0800 Subject: [PATCH] Rewriting indenting functionality to use new parser --- fish_tests.cpp | 194 +++++++++++++++++++++++++++++++++++++----- parse_productions.cpp | 1 + parse_tree.cpp | 30 ++++--- parse_tree.h | 1 - parse_util.cpp | 115 +++++++++++++++++++++++++ parse_util.h | 2 + parser.cpp | 5 +- parser.h | 9 +- reader.cpp | 9 +- 9 files changed, 328 insertions(+), 38 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index c57af8abe..b4d37f8b8 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -64,23 +64,32 @@ #include "parse_util.h" static const char * const * s_arguments; +static int s_test_run_count = 0; /* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ static bool should_test_function(const char *func_name) { /* No args, test everything */ + bool result = false; if (! s_arguments || ! s_arguments[0]) - return true; - - for (size_t i=0; s_arguments[i] != NULL; i++) { - if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + result = true; + } + else + { + for (size_t i=0; s_arguments[i] != NULL; i++) { - /* Prefix match */ - return true; + if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + { + /* Prefix match */ + result = true; + break; + } } } - return false; + if (result) + s_test_run_count++; + return result; } /** @@ -640,6 +649,147 @@ static void test_parser() } } +static void test_indents() +{ + say(L"Testing indents"); + + // Here are the components of our source and the indents we expect those to be + struct indent_component_t { + const wchar_t *txt; + int indent; + }; + + const indent_component_t components1[] = + { + {L"if foo", 0}, + {L"end", 0}, + {NULL, -1} + }; + + const indent_component_t components2[] = + { + {L"if foo", 0}, + {L"", 1}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components3[] = + { + {L"if foo", 0}, + {L"foo", 1}, + {L"end", 0}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components4[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"end", 1}, + {L"end", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components5[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"", 2}, + {NULL, -1} + }; + + const indent_component_t components6[] = + { + {L"begin", 0}, + {L"foo", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components7[] = + { + {L"begin; end", 0}, + {L"foo", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components8[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"baz", 2}, + {L"end", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components9[] = + { + {L"switch foo", 0}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components10[] = + { + {L"switch foo", 0}, + {L"case bar", 1}, + {L"case baz", 1}, + {L"quux", 2}, + {L"", 2}, + {NULL, -1} + }; + + + + const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const indent_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the expected indents + wcstring text; + std::vector expected_indents; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L'\n'); + expected_indents.push_back(components[i].indent); + } + text.append(components[i].txt); + expected_indents.resize(text.size(), components[i].indent); + } + assert(expected_indents.size() == text.size()); + + // Compute the indents + std::vector indents = parse_util_compute_indents(text); + + if (expected_indents.size() != indents.size()) + { + err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), indents.size()); + } + assert(expected_indents.size() == indents.size()); + for (size_t i=0; i < text.size(); i++) + { + if (expected_indents.at(i) != indents.at(i)) + { + err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, which + 1, expected_indents.at(i), indents.at(i), text.c_str()); + break; //don't keep showing errors for the rest of the line + } + } + + } +} + static void test_utils() { say(L"Testing utils"); @@ -2176,25 +2326,26 @@ static void test_new_parser_ll2(void) } } -__attribute__((unused)) -static void test_new_parser(void) +static void test_new_parser_ad_hoc(void) { - say(L"Testing new parser"); - const wcstring src = L"echo hello world"; + /* Very ad-hoc tests for issues encountered */ + say(L"Testing new parser ad hoc tests"); + + /* Ensure that 'case' terminates a job list */ + const wcstring src = L"switch foo ; case bar; case baz; end"; parse_node_tree_t parse_tree; bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { - say(L"Parsing failed"); + err(L"Parsing failed"); } - else + + /* Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd try to run a command 'case' */ + const parse_node_t &root = parse_tree.at(0); + const parse_node_tree_t::parse_node_list_t node_list = parse_tree.find_nodes(root, symbol_case_item_list); + if (node_list.size() != 3) { -#if 0 - parse_execution_context_t ctx(parse_tree, src); - say(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - say(simulation.c_str()); -#endif + err(L"Expected 3 case item nodes, found %lu", node_list.size()); } } @@ -2415,7 +2566,7 @@ int main(int argc, char **argv) if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); - if (should_test_function("new_parser")) test_new_parser(); + if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc(); if (should_test_function("escape")) test_unescape_sane(); if (should_test_function("escape")) test_escape_crazy(); if (should_test_function("format")) test_format(); @@ -2425,6 +2576,7 @@ int main(int argc, char **argv) if (should_test_function("fork")) test_fork(); if (should_test_function("iothread")) test_iothread(); if (should_test_function("parser")) test_parser(); + if (should_test_function("indents")) test_indents(); if (should_test_function("utils")) test_utils(); if (should_test_function("escape_sequences")) test_escape_sequences(); if (should_test_function("lru")) test_lru(); @@ -2447,6 +2599,8 @@ int main(int argc, char **argv) //history_tests_t::test_history_speed(); say(L"Encountered %d errors in low-level tests", err_count); + if (s_test_run_count == 0) + say(L"*** No Tests Were Actually Run! ***"); /* Skip performance tests for now, since they seem to hang when running from inside make (?) diff --git a/parse_productions.cpp b/parse_productions.cpp index 227955453..c3ab9c3ab 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -51,6 +51,7 @@ RESOLVE(job_list) { case parse_keyword_end: case parse_keyword_else: + case parse_keyword_case: // End this job list return 0; diff --git a/parse_tree.cpp b/parse_tree.cpp index 24cf45986..ad0dd0ea9 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -240,10 +240,10 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum toke } /* Helper function for dump_tree */ -static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line) { - assert(start < nodes.size()); - const parse_node_t &node = nodes.at(start); + assert(node_idx < nodes.size()); + const parse_node_t &node = nodes.at(node_idx); const size_t spacesPerIndent = 2; @@ -253,26 +253,33 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & if (indent > 0) indent -= 1; } - append_format(*result, L"%2lu - %l2u ", *line, start); + append_format(*result, L"%2lu - %l2u ", *line, node_idx); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) { append_format(*result, L" <%lu children>", node.child_count); } - if (node.type == parse_token_type_string) + + if (node.has_source() && node.type == parse_token_type_string) { - if (node.source_start == -1) + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); + } + + if (node.type != parse_token_type_string) + { + if (node.has_source()) { - append_format(*result, L" (no source)"); + append_format(*result, L" [%ld, %ld]", (long)node.source_start, (long)node.source_length); } else { - result->append(L": \""); - result->append(src, node.source_start, node.source_length); - result->append(L"\""); + append_format(*result, L" [no src]", (long)node.source_start, (long)node.source_length); } } + result->push_back(L'\n'); ++*line; for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) @@ -658,7 +665,8 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) if (matched) { - // Success. Tell the node that it matched this token + // Success. Tell the node that it matched this token, and what its source range is + // In the parse phase, we only set source ranges for terminal types. We propagate ranges to parent nodes afterwards. parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; diff --git a/parse_tree.h b/parse_tree.h index 8621cea8f..d5b48331f 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -253,7 +253,6 @@ public: /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; - /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; diff --git a/parse_util.cpp b/parse_util.cpp index abcf019c3..842c6f75b 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -38,6 +38,7 @@ #include "env.h" #include "signal.h" #include "wildcard.h" +#include "parse_tree.h" /** Maximum number of autoloaded items opf a specific type to keep in @@ -804,3 +805,117 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) } return result; } + +/* We are given a parse tree, the index of a node within the tree, its indent, and a vector of indents the same size as the original source string. Set the indent correspdonding to the node's source range, if appropriate. + + trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false ' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways. + + switch statements also indent. +*/ +static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector *indents, int *trailing_indent) +{ + /* Guard against incomplete trees */ + if (node_idx > tree.size()) + return; + + /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ + + const parse_node_t &node = tree.at(node_idx); + const parse_token_type_t node_type = node.type; + + /* Increment the indent if we are either a root job_list, or root case_item_list */ + const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list); + const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list); + if (is_root_job_list || is_root_case_item_list) + { + node_indent += 1; + } + + /* If we have source, store the trailing indent unconditionally. If we do not have source, store the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job lists from affecting the trailing indent. For example, code like this: + + if foo + + will be parsed as this: + + job_list + job + if_statement + job [if] + job_list [empty] + job_list [empty] + + There's two "terminal" job lists, and we want the innermost one. + + Note we are relying on the fact that nodes are in the same order as the source, i.e. an in-order traversal of the node tree also traverses the source from beginning to end. + */ + if (node.has_source() || node_indent > *trailing_indent) + { + *trailing_indent = node_indent; + } + + + /* Store the indent into the indent array */ + if (node.has_source()) + { + assert(node.source_start < indents->size()); + indents->at(node.source_start) = node_indent; + } + + + /* Recursive to all our children */ + for (node_offset_t idx = 0; idx < node.child_count; idx++) + { + /* Note we pass our type to our child, which becomes its parent node type */ + compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent); + } +} + +std::vector parse_util_compute_indents(const wcstring &src) +{ + /* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */ + const size_t src_size = src.size(); + std::vector indents(src_size, -1); + + parse_node_tree_t tree; + parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + + /* The indent that we'll get for the last line */ + int trailing_indent = 0; + + /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token, which will prevent the really-root job list from indenting */ + compute_indents_recursive(tree, 0 /* node index */, 0/* current indent */, symbol_job_list, &indents, &trailing_indent); + + int last_indent = 0; + for (size_t i=0; i parse_util_compute_indents(const wcstring &src); #endif diff --git a/parser.cpp b/parser.cpp index 974ba1fe4..f9282a242 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2911,7 +2911,7 @@ struct block_info_t bool has_had_case; //if we are a switch, whether we've encountered a case }; -int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) +parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); @@ -2926,7 +2926,6 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w // These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return) std::vector block_infos; int indentation_sum = 0; //sum of indentation in block_infos - int res = 0; /* Set to 1 if the current command is inside a pipeline @@ -3704,6 +3703,8 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w if (! block_infos.empty()) unfinished = 1; + parser_test_error_bits_t res = 0; + if (err) res |= PARSER_TEST_ERROR; diff --git a/parser.h b/parser.h index 8b43b83fc..076baceff 100644 --- a/parser.h +++ b/parser.h @@ -13,8 +13,11 @@ #include "function.h" #include -#define PARSER_TEST_ERROR 1 -#define PARSER_TEST_INCOMPLETE 2 +enum { + PARSER_TEST_ERROR = 1, + PARSER_TEST_INCOMPLETE = 2 +}; +typedef unsigned int parser_test_error_bits_t; /** event_blockage_t represents a block on events of the specified type @@ -484,7 +487,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - int test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); /** Test if the specified string can be parsed as an argument list, diff --git a/reader.cpp b/reader.cpp index a09a0bda6..ac0e52f51 100644 --- a/reader.cpp +++ b/reader.cpp @@ -519,7 +519,14 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr static void reader_repaint() { // Update the indentation - parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); + if (0) + { + parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); + } + else + { + data->indents = parse_util_compute_indents(data->command_line); + } // Combine the command and autosuggestion into one string wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion);