diff --git a/fish_tests.cpp b/fish_tests.cpp index 7c2de13a7..0ec8bf7bb 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -896,9 +896,17 @@ static void test_indents() {NULL, -1} }; + const indent_component_t components12[] = + { + {L"while false", 0}, + {L"# comment", 1}, //comment indentation handling + {L"command", 1}, //comment indentation handling + {L"# comment2", 1}, //comment indentation handling + {NULL, -1} + }; - const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10, components11}; + const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10, components11, components12}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const indent_component_t *components = tests[which]; diff --git a/parse_tree.cpp b/parse_tree.cpp index 4825c6ebc..ea0b0abd2 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -13,13 +13,6 @@ static bool production_is_empty(const production_t *production) return (*production)[0] == token_type_invalid; } -void swap2(parse_node_tree_t &a, parse_node_tree_t &b) -{ - fprintf(stderr, "Swapping!\n"); - // This uses the base vector implementation - a.swap(b); -} - /** Returns a string description of this parse error */ wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, bool is_interactive, bool skip_caret) const { @@ -682,6 +675,8 @@ void parse_ll_t::dump_stack(void) const // Give each node a source range equal to the union of the ranges of its children // Terminal nodes already have source ranges (and no children) // Since children always appear after their parents, we can implement this very simply by walking backwards +// We then do a second pass to give empty nodes an empty source range (but with a valid offset) +// We do this by walking forward. If a child of a node has an invalid source range, we set it equal to the end of the source range of its previous child void parse_ll_t::determine_node_ranges(void) { size_t idx = nodes.size(); @@ -712,6 +707,30 @@ void parse_ll_t::determine_node_ranges(void) parent->source_length = max_end - min_start; } } + + /* Forwards pass */ + size_t size = nodes.size(); + for (idx = 0; idx < size; idx++) + { + /* Since we populate the source range based on the sibling node, it's simpler to walk over the children of each node. + We keep a running "child_source_cursor" which is meant to be the end of the child's source range. It's initially set to the beginning of the parent' source range. */ + parse_node_t *parent = &nodes[idx]; + // If the parent doesn't have a valid source range, then none of its children will either; skip it entirely + if (parent->source_start == SOURCE_OFFSET_INVALID) + { + continue; + } + source_offset_t child_source_cursor = parent->source_start; + for (size_t child_idx = 0; child_idx < parent->child_count; child_idx++) + { + parse_node_t *child = &nodes[parent->child_start + child_idx]; + if (child->source_start == SOURCE_OFFSET_INVALID) + { + child->source_start = child_source_cursor; + } + child_source_cursor = child->source_start + child->source_length; + } + } } void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors) @@ -1002,10 +1021,12 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. if (token1.type == parse_special_type_parse_error || token1.type == parse_special_type_tokenizer_error || token1.type == parse_special_type_comment) { - parse_node_t err_node(token1.type); - err_node.source_start = token1.source_start; - err_node.source_length = token1.source_length; - nodes.push_back(err_node); + /* We set the special node's parent to the top of the stack. This means that we have an asymmetric relationship: the special node has a parent (which is the node we were trying to generate when we encountered the special node), but the parent node does not have the special node as a child. This means for example that parents don't have to worry about tracking any comment nodes, but we can still recover the parent from the comment. */ + parse_node_t special_node(token1.type); + special_node.parent = symbol_stack.back().node_idx; + special_node.source_start = token1.source_start; + special_node.source_length = token1.source_length; + nodes.push_back(special_node); consumed = true; /* tokenizer errors are fatal */ diff --git a/parse_tree.h b/parse_tree.h index 7de162ed7..58484ce4d 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -111,7 +111,9 @@ public: /* Indicate if this node has a range of source code associated with it */ bool has_source() const { - return source_start != SOURCE_OFFSET_INVALID; + /* Should never have a nonempty range with an invalid offset */ + assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0); + return this->source_length > 0; } /* Gets source for the node, or the empty string if it has no source */ diff --git a/parse_util.cpp b/parse_util.cpp index 13eca166d..a941113f6 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -840,7 +840,7 @@ static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx; - /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ + /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ const parse_node_t &node = tree.at(node_idx); const parse_token_type_t node_type = node.type; @@ -877,10 +877,39 @@ static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset /* Store the indent into the indent array */ - if (node.has_source()) + if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) { - assert(node.source_start < indents->size()); - indents->at(node.source_start) = node_indent; + if (node.has_source()) + { + /* A normal non-empty node. Store the indent unconditionally. */ + indents->at(node.source_start) = node_indent; + } + else + { + /* An empty node. We have a source offset but no source length. This can come about when a node legitimately empty: + + while true; end + + The job_list inside the while loop is empty. It still has a source offset (at the end of the while statement) but no source extent. + We still need to capture that indent, because there may be comments inside: + while true + # loop forever + end + + The 'loop forever' comment must be indented, by virtue of storing the indent. + + Now consider what happens if we remove the end: + + while true + # loop forever + + Now both the job_list and end_command are unmaterialized. However, we want the indent to be of the job_list and not the end_command. Therefore, we only store the indent if it's bigger. + */ + if (node_indent > indents->at(node.source_start)) + { + indents->at(node.source_start) = node_indent; + } + } } @@ -900,7 +929,7 @@ std::vector parse_util_compute_indents(const wcstring &src) /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ parse_node_tree_t tree; - parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ node_offset_t start_node_idx = 0; @@ -922,6 +951,22 @@ std::vector parse_util_compute_indents(const wcstring &src) start_node_idx = max_visited_node_idx + 1; } + /* Handle comments. Each comment node has a parent (which is whatever the top of the symbol stack was when the comment was encountered). So the source range of the comment has the same indent as its parent. */ + const size_t tree_size = tree.size(); + for (node_offset_t i=0; i < tree_size; i++) + { + const parse_node_t &node = tree.at(i); + if (node.type == parse_special_type_comment && node.has_source() && node.parent < tree_size) + { + const parse_node_t &parent = tree.at(node.parent); + if (parent.source_start != SOURCE_OFFSET_INVALID) + { + indents.at(node.source_start) = indents.at(parent.source_start); + } + } + } + + /* Now apply the indents. The indents array has -1 for places where the indent does not change, so start at each value and extend it along the run of -1s */ int last_indent = 0; for (size_t i=0; i