From 39fe9fcfcd146e726454458621700ca83700296f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 23 Dec 2014 10:58:45 -0800 Subject: [PATCH] Tweak comment handling. Mark a parse node with a new flag when it has comments --- builtin.cpp | 4 ++-- parse_tree.cpp | 37 +++++++++++++++++++++++++++++++++++-- parse_tree.h | 25 +++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index e2fd3231b..630c04289 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4022,7 +4022,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; parse_error_list_t errors; - bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors); + bool success = parse_tree_from_string(src, parse_flag_include_comments, &parse_tree, &errors); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); @@ -4035,7 +4035,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) stdout_buffer.append(L"(Reparsed with continue after error)\n"); parse_tree.clear(); errors.clear(); - parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors); + parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, &errors); } const wcstring dump = parse_dump_tree(parse_tree, src); stdout_buffer.append(dump); diff --git a/parse_tree.cpp b/parse_tree.cpp index 39cee2584..25ad5b613 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -216,7 +216,6 @@ wcstring token_type_description(parse_token_type_t type) case parse_token_type_terminate: return L"token_terminate"; - case parse_special_type_parse_error: return L"parse_error"; case parse_special_type_tokenizer_error: @@ -339,7 +338,10 @@ static wcstring block_type_user_presentable_description(parse_token_type_t type) wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); - append_format(result, L" (prod %d)", this->production_idx); + if (type < FIRST_TERMINAL_TYPE) + { + append_format(result, L" (prod %d)", this->production_idx); + } return result; } @@ -437,6 +439,10 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & { append_format(*result, L" <%lu children>", node.child_count); } + if (node.has_comments()) + { + append_format(*result, L" ", node.child_count); + } if (node.has_source() && node.type == parse_token_type_string) { @@ -1120,6 +1126,12 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) nodes.push_back(special_node); consumed = true; + /* Mark special flags */ + if (token1.type == parse_special_type_comment) + { + this->node_for_top_symbol().flags |= parse_node_flag_has_comments; + } + /* tokenizer errors are fatal */ if (token1.type == parse_special_type_tokenizer_error) this->fatal_errored = true; @@ -1302,6 +1314,9 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, if (parse_flags & parse_flag_accept_incomplete_tokens) tok_options |= TOK_ACCEPT_UNFINISHED; + if (parse_flags & parse_flag_show_blank_lines) + tok_options |= TOK_SHOW_BLANK_LINES; + if (errors == NULL) tok_options |= TOK_SQUASH_ERRORS; @@ -1654,6 +1669,24 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_ return result; } +parse_node_tree_t::parse_node_list_t parse_node_tree_t::comment_nodes_for_node(const parse_node_t &parent) const +{ + parse_node_list_t result; + if (parent.has_comments()) + { + /* Walk all our nodes, looking for comment nodes that have the given node as a parent */ + for (size_t i=0; i < this->size(); i++) + { + const parse_node_t &potential_comment = this->at(i); + if (potential_comment.type == parse_special_type_comment && this->get_parent(potential_comment) == &parent) + { + result.push_back(&potential_comment); + } + } + } + return result; +} + enum parse_bool_statement_type_t parse_node_tree_t::statement_boolean_type(const parse_node_t &node) { assert(node.type == symbol_boolean_statement); diff --git a/parse_tree.h b/parse_tree.h index 59739af48..74e9d4360 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -59,8 +59,10 @@ enum parse_flag_accept_incomplete_tokens = 1 << 2, /* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */ - parse_flag_leave_unterminated = 1 << 3 + parse_flag_leave_unterminated = 1 << 3, + /* Indicate that the parser should generate job_list entries for blank lines. */ + parse_flag_show_blank_lines = 1 << 4 }; typedef unsigned int parse_tree_flags_t; @@ -69,6 +71,13 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); +enum +{ + /* Flag indicating that the node has associated comment nodes */ + parse_node_flag_has_comments = 1 << 0 +}; +typedef uint8_t parse_node_flags_t; + /** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */ class parse_node_t { @@ -94,11 +103,14 @@ public: /* Type of the node */ enum parse_token_type_t type; + /* Node flags */ + parse_node_flags_t flags; + /* Description */ wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty) + explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty), flags(0) { } @@ -116,6 +128,12 @@ public: return this->source_length > 0; } + /* Indicate if the node has comment nodes */ + bool has_comments() const + { + return !! (this->flags & parse_node_flag_has_comments); + } + /* Gets source for the node, or the empty string if it has no source */ wcstring get_source(const wcstring &str) const { @@ -184,6 +202,9 @@ public: /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; + /* Given a node, return all of its comment nodes. */ + parse_node_list_t comment_nodes_for_node(const parse_node_t &node) const; + /* Returns the boolean type for a boolean node */ static enum parse_bool_statement_type_t statement_boolean_type(const parse_node_t &node);