Tweak comment handling. Mark a parse node with a new flag when it has comments

This commit is contained in:
ridiculousfish 2014-12-23 10:58:45 -08:00
parent bf80b0db19
commit 39fe9fcfcd
3 changed files with 60 additions and 6 deletions

View file

@ -4022,7 +4022,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
const wcstring src = str2wcstring(&txt.at(0), txt.size());
parse_node_tree_t parse_tree;
parse_error_list_t errors;
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
bool success = parse_tree_from_string(src, parse_flag_include_comments, &parse_tree, &errors);
if (! success)
{
stdout_buffer.append(L"Parsing failed:\n");
@ -4035,7 +4035,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
stdout_buffer.append(L"(Reparsed with continue after error)\n");
parse_tree.clear();
errors.clear();
parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors);
parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, &errors);
}
const wcstring dump = parse_dump_tree(parse_tree, src);
stdout_buffer.append(dump);

View file

@ -216,7 +216,6 @@ wcstring token_type_description(parse_token_type_t type)
case parse_token_type_terminate:
return L"token_terminate";
case parse_special_type_parse_error:
return L"parse_error";
case parse_special_type_tokenizer_error:
@ -339,7 +338,10 @@ static wcstring block_type_user_presentable_description(parse_token_type_t type)
wcstring parse_node_t::describe(void) const
{
wcstring result = token_type_description(type);
append_format(result, L" (prod %d)", this->production_idx);
if (type < FIRST_TERMINAL_TYPE)
{
append_format(result, L" (prod %d)", this->production_idx);
}
return result;
}
@ -437,6 +439,10 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
{
append_format(*result, L" <%lu children>", node.child_count);
}
if (node.has_comments())
{
append_format(*result, L" <has_comments>", node.child_count);
}
if (node.has_source() && node.type == parse_token_type_string)
{
@ -1120,6 +1126,12 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
nodes.push_back(special_node);
consumed = true;
/* Mark special flags */
if (token1.type == parse_special_type_comment)
{
this->node_for_top_symbol().flags |= parse_node_flag_has_comments;
}
/* tokenizer errors are fatal */
if (token1.type == parse_special_type_tokenizer_error)
this->fatal_errored = true;
@ -1302,6 +1314,9 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
if (parse_flags & parse_flag_accept_incomplete_tokens)
tok_options |= TOK_ACCEPT_UNFINISHED;
if (parse_flags & parse_flag_show_blank_lines)
tok_options |= TOK_SHOW_BLANK_LINES;
if (errors == NULL)
tok_options |= TOK_SQUASH_ERRORS;
@ -1654,6 +1669,24 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_
return result;
}
parse_node_tree_t::parse_node_list_t parse_node_tree_t::comment_nodes_for_node(const parse_node_t &parent) const
{
parse_node_list_t result;
if (parent.has_comments())
{
/* Walk all our nodes, looking for comment nodes that have the given node as a parent */
for (size_t i=0; i < this->size(); i++)
{
const parse_node_t &potential_comment = this->at(i);
if (potential_comment.type == parse_special_type_comment && this->get_parent(potential_comment) == &parent)
{
result.push_back(&potential_comment);
}
}
}
return result;
}
enum parse_bool_statement_type_t parse_node_tree_t::statement_boolean_type(const parse_node_t &node)
{
assert(node.type == symbol_boolean_statement);

View file

@ -59,8 +59,10 @@ enum
parse_flag_accept_incomplete_tokens = 1 << 2,
/* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */
parse_flag_leave_unterminated = 1 << 3
parse_flag_leave_unterminated = 1 << 3,
/* Indicate that the parser should generate job_list entries for blank lines. */
parse_flag_show_blank_lines = 1 << 4
};
typedef unsigned int parse_tree_flags_t;
@ -69,6 +71,13 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type);
enum
{
/* Flag indicating that the node has associated comment nodes */
parse_node_flag_has_comments = 1 << 0
};
typedef uint8_t parse_node_flags_t;
/** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */
class parse_node_t
{
@ -94,11 +103,14 @@ public:
/* Type of the node */
enum parse_token_type_t type;
/* Node flags */
parse_node_flags_t flags;
/* Description */
wcstring describe(void) const;
/* Constructor */
explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty)
explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty), flags(0)
{
}
@ -116,6 +128,12 @@ public:
return this->source_length > 0;
}
/* Indicate if the node has comment nodes */
bool has_comments() const
{
return !! (this->flags & parse_node_flag_has_comments);
}
/* Gets source for the node, or the empty string if it has no source */
wcstring get_source(const wcstring &str) const
{
@ -184,6 +202,9 @@ public:
/* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
/* Given a node, return all of its comment nodes. */
parse_node_list_t comment_nodes_for_node(const parse_node_t &node) const;
/* Returns the boolean type for a boolean node */
static enum parse_bool_statement_type_t statement_boolean_type(const parse_node_t &node);