From d840643cb33a12ea9433b12fe7443e15db2df75c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 23 Jun 2013 02:09:46 -0700 Subject: [PATCH] More work on new parser --- parse_exec.cpp | 328 ++++++++++++++++++++++++++++++++++++------------- parse_tree.cpp | 173 +++++++++++++++++++------- parse_tree.h | 43 +++++-- 3 files changed, 403 insertions(+), 141 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index 6ab90117e..88fdd00a6 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -24,10 +24,18 @@ struct exec_argument_t node_offset_t parse_node_idx; sanity_id_t command_sanity_id; }; +typedef std::vector exec_argument_list_t; struct exec_redirection_t { + node_offset_t parse_node_idx; +}; +typedef std::vector exec_redirection_list_t; +struct exec_arguments_and_redirections_t +{ + exec_argument_list_t arguments; + exec_redirection_list_t redirections; }; struct exec_basic_statement_t @@ -35,6 +43,9 @@ struct exec_basic_statement_t // Node containing the command node_offset_t command_idx; + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + // Decoration enum { @@ -43,8 +54,6 @@ struct exec_basic_statement_t decoration_builtin } decoration; - std::vector arguments; - std::vector redirections; uint16_t sanity_id; exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) @@ -70,10 +79,48 @@ struct exec_basic_statement_t PARSER_DIE(); break; } - + } + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; } }; +struct exec_block_statement_t +{ + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; + } + +}; + +struct exec_job_t +{ + // List of statements (separated with pipes) + std::vector statements; + + void add_statement(const exec_basic_statement_t &statement) + { + statements.push_back(statement); + } +}; + + class parse_exec_t { parse_node_tree_t parse_tree; @@ -85,6 +132,9 @@ class parse_exec_t /* The stack of nodes as we execute them */ std::vector exec_nodes; + /* The stack of jobs being built */ + std::vector assembling_jobs; + /* The stack of commands being built */ std::vector assembling_statements; @@ -95,7 +145,39 @@ class parse_exec_t PARSE_ASSERT(node.source_start + node.source_length <= src.size()); output->assign(src, node.source_start, node.source_length); } + + const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const + { + return parse_tree.at(parent.child_offset(which)); + } + + void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) + { + PARSE_ASSERT(! exec_nodes.empty()); + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + node_offset_t child_node_idx = parse_node.child_start; + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; + for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) + { + node_offset_t idx = idxs[q]; + if (idx != (node_offset_t)(-1)) + { + PARSE_ASSERT(idx < parse_node.child_count); + exec_nodes.push_back(child_node_idx + idx); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } + + } + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -139,90 +221,130 @@ class parse_exec_t pop_push(0, parse_node.child_count); } - void assemble_command(node_offset_t idx) - { - // Set the command for our top basic statement - PARSE_ASSERT(! assembling_statements.empty()); - assembling_statements.back().command_idx = idx; - } - - void assemble_argument_or_redirection(node_offset_t idx) + void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const { const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(! assembling_statements.empty()); - exec_basic_statement_t &statement = assembling_statements.back(); - switch (node.type) + PARSE_ASSERT(output != NULL); + PARSE_ASSERT(node.type == symbol_argument_or_redirection); + PARSE_ASSERT(node.child_count == 1); + node_offset_t child_idx = node.child_offset(0); + const parse_node_t &child = parse_tree.at(child_idx); + switch (child.type) { case parse_token_type_string: // Argument { exec_argument_t arg = exec_argument_t(); arg.parse_node_idx = idx; - arg.command_sanity_id = statement.sanity_id; - statement.arguments.push_back(arg); + output->arguments.push_back(arg); } break; case parse_token_type_redirection: // Redirection + { + exec_redirection_t redirect = exec_redirection_t(); + redirect.parse_node_idx = idx; + output->redirections.push_back(redirect); + } break; default: PARSER_DIE(); break; } - } - void assembly_complete() + void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const { - // Finished building a command - PARSE_ASSERT(! assembling_statements.empty()); - const exec_basic_statement_t &statement = assembling_statements.back(); + node_offset_t idx = start_idx; + for (;;) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list); + PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); + if (node.child_count == 0) + { + // No more children + break; + } + else + { + // Skip to next child + assemble_1_argument_or_redirection(node.child_offset(0), output); + idx = node.child_offset(1); + } + } + } + + void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_plain_statement); + PARSE_ASSERT(node.child_count == 2); + exec_basic_statement_t statement; + statement.set_decoration(decoration); + statement.command_idx = node.child_offset(0); + assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); + assembling_jobs.back().add_statement(statement); + } + + void job_assembly_complete() + { + PARSE_ASSERT(! assembling_jobs.empty()); + const exec_job_t &job = assembling_jobs.back(); if (simulating) { - simulate_statement(statement); + simulate_job(job); } - assembling_statements.pop_back(); + assembling_jobs.pop_back(); } - void simulate_statement(const exec_basic_statement_t &statement) + void simulate_job(const exec_job_t &job) { PARSE_ASSERT(simulating); wcstring line; - switch (statement.decoration) + for (size_t i=0; i < job.statements.size(); i++) { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; + if (i > 0) + { + line.append(L" "); + } + const exec_basic_statement_t &statement = job.statements.at(i); + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; - default: - break; - } - - wcstring tmp; - get_node_string(statement.command_idx, &tmp); - line.append(L"cmd:"); - line.append(tmp); - for (size_t i=0; i < statement.arguments.size(); i++) - { - const exec_argument_t &arg = statement.arguments.at(i); - get_node_string(arg.parse_node_idx, &tmp); - line.append(L" "); - line.append(L"arg:"); + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); line.append(tmp); + for (size_t i=0; i < statement.arguments().size(); i++) + { + const exec_argument_t &arg = statement.arguments().at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } } - simulation_result.push_back(line); + simulation_result.push_back(line); } void enter_parse_node(size_t idx); void run_top_node(void); - exec_basic_statement_t *create_basic_statement(void); + exec_job_t *create_job(void); public: parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) @@ -231,10 +353,10 @@ class parse_exec_t wcstring simulate(void); }; -exec_basic_statement_t *parse_exec_t::create_basic_statement() +exec_job_t *parse_exec_t::create_job() { - assembling_statements.push_back(exec_basic_statement_t()); - return &assembling_statements.back(); + assembling_jobs.push_back(exec_job_t()); + return &assembling_jobs.back(); } void parse_exec_t::run_top_node() @@ -242,7 +364,7 @@ void parse_exec_t::run_top_node() PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - bool log = false; + bool log = true; if (log) { @@ -254,68 +376,102 @@ void parse_exec_t::run_top_node() switch (parse_node.type) { - case symbol_statement_list: + case symbol_job_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // No more jobs, done + pop(); + } + else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) + { + // Empty job, so just skip it + pop_push(1, 1); + } + else + { + // Normal job + pop_push(0, 2); + } + break; + + case symbol_job: + { + PARSE_ASSERT(parse_node.child_count == 2); + exec_job_t *job = create_job(); pop_push_all(); break; - - case symbol_statement: - { - PARSE_ASSERT(parse_node.child_count == 1); - // See if we're just an empty statement - const parse_node_t &child = parse_tree.at(parse_node.child_start + 0); - if (child.type == parse_token_type_end) + } + + case symbol_job_continuation: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); + if (parse_node.child_count == 0) { - // Empty statement + // All done with this job + job_assembly_complete(); pop(); } else { - // We have a statement to execute - pop_push_all(); + // Skip the pipe + pop_push(1, 2); } + break; + + case symbol_statement: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_block_statement: + { + PARSE_ASSERT(parse_node.child_count == 5); + pop_push_specific(0, 2, 4); + break; + } + + case symbol_block_header: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_function_header: + { + PARSE_ASSERT(parse_node.child_count == 3); + //pop_push_all(); + pop(); break; } case symbol_decorated_statement: { - PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - exec_basic_statement_t *cmd = create_basic_statement(); - cmd->set_decoration(parse_node.tag); + PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - // Push the last node (skip any decoration) - pop_push(parse_node.child_count - 1, 1); + node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); + parse_keyword_t decoration = static_cast(parse_node.tag); + assemble_command_for_plain_statement(plain_statement_idx, decoration); + pop(); break; } - + + // The following symbols should be handled by their parents, i.e. never pushed on our stack case symbol_plain_statement: - PARSE_ASSERT(parse_node.child_count == 3); - // Extract the command - PARSE_ASSERT(! assembling_statements.empty()); - assemble_command(parse_node.child_start + 0); - // Jump to statement list, then terminator - pop_push(1, 2); - break; - case symbol_arguments_or_redirections_list: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - pop_push_all(); - break; - case symbol_argument_or_redirection: - PARSE_ASSERT(parse_node.child_count == 1); - assemble_argument_or_redirection(parse_node.child_start + 0); - pop(); + PARSER_DIE(); break; case parse_token_type_end: PARSE_ASSERT(parse_node.child_count == 0); - assembly_complete(); pop(); break; default: - fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx); PARSER_DIE(); break; diff --git a/parse_tree.cpp b/parse_tree.cpp index d2c34b3bb..019afc53b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -38,13 +38,16 @@ wcstring parse_error_t::describe(const wcstring &src) const return result; } -static wcstring token_type_description(parse_token_type_t type) +wcstring token_type_description(parse_token_type_t type) { switch (type) { case token_type_invalid: return L"invalid"; - case symbol_statement_list: return L"statement_list"; + case symbol_job_list: return L"job_list"; + case symbol_job: return L"job"; + case symbol_job_continuation: return L"job_continuation"; + case symbol_statement: return L"statement"; case symbol_block_statement: return L"block_statement"; case symbol_block_header: return L"block_header"; @@ -70,6 +73,30 @@ static wcstring token_type_description(parse_token_type_t type) } } +wcstring keyword_description(parse_keyword_t k) +{ + switch (k) + { + case parse_keyword_none: return L"none"; + case parse_keyword_if: return L"if"; + case parse_keyword_else: return L"else"; + case parse_keyword_for: return L"for"; + case parse_keyword_in: return L"in"; + case parse_keyword_while: return L"while"; + case parse_keyword_begin: return L"begin"; + case parse_keyword_function: return L"function"; + case parse_keyword_switch: return L"switch"; + case parse_keyword_end: return L"end"; + case parse_keyword_and: return L"and"; + case parse_keyword_or: return L"or"; + case parse_keyword_not: return L"not"; + case parse_keyword_command: return L"command"; + case parse_keyword_builtin: return L"builtin"; + default: + return format_string(L"Unknown keyword type %ld", static_cast(k)); + } +} + wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); @@ -83,8 +110,20 @@ struct parse_token_t enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; + + wcstring describe() const; }; +wcstring parse_token_t::describe(void) const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; +} + // Convert from tokenizer_t's token type to our token static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) { @@ -124,12 +163,12 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & const size_t spacesPerIndent = 2; // unindent statement lists by 1 to flatten them - if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) + if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) { if (indent > 0) indent -= 1; } - append_format(*result, L"%2lu ", *line); + append_format(*result, L"%2lu - %l2u ", *line, start); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) @@ -190,17 +229,19 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_statement_list; + parse_stack_element_t elem = symbol_job_list; elem.node_idx = 0; symbol_stack.push_back(elem); // goal token - nodes.push_back(parse_node_t(symbol_statement_list)); + nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); // implementation of certain parser constructions - void accept_token(parse_token_t token); - void accept_token_statement_list(parse_token_t token); + void accept_token(parse_token_t token, const wcstring &src); + void accept_token_job_list(parse_token_t token); + void accept_token_job(parse_token_t token); + void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); @@ -289,7 +330,7 @@ class parse_ll_t void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { - fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); + fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); PARSER_DIE(); } @@ -304,17 +345,33 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_statement_list(parse_token_t token) +void parse_ll_t::accept_token_job_list(parse_token_t token) { - PARSE_ASSERT(stack_top_type() == symbol_statement_list); + PARSE_ASSERT(stack_top_type() == symbol_job_list); switch (token.type) { case parse_token_type_string: + // 'end' is special + if (token.keyword == parse_keyword_end) + { + // End this job list + symbol_stack_pop_push(); + } + else + { + // Normal string + symbol_stack_pop_push(symbol_job, symbol_job_list); + } + break; + case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: + symbol_stack_pop_push(symbol_job, symbol_job_list); + break; + case parse_token_type_end: - symbol_stack_pop_push(symbol_statement, symbol_statement_list); + symbol_stack_pop_push(parse_token_type_end, symbol_job_list); break; case parse_token_type_terminate: @@ -328,6 +385,30 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) } } +void parse_ll_t::accept_token_job(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job); + symbol_stack_pop_push(symbol_statement, symbol_job_continuation); +} + +void parse_ll_t::accept_token_job_continuation(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job_continuation); + switch (token.type) + { + case parse_token_type_pipe: + // Pipe, continuation + symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation); + break; + + default: + // Not a pipe, no job continuation + symbol_stack_pop_push(); + break; + } +} + + void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); @@ -341,25 +422,13 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); break; - + case parse_keyword_if: - symbol_stack_pop_push(symbol_if_header); - break; - case parse_keyword_for: - symbol_stack_pop_push(symbol_for_header); - break; - case parse_keyword_while: - symbol_stack_pop_push(symbol_while_header); - break; - - case parse_keyword_begin: - symbol_stack_pop_push(symbol_begin_header); - break; - case parse_keyword_function: - symbol_stack_pop_push(symbol_function_header); + case parse_keyword_begin: + symbol_stack_pop_push(symbol_block_statement); break; case parse_keyword_else: @@ -370,7 +439,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_end: - // TODO + PARSER_DIE(); //todo break; // 'in' is only special within a for_header @@ -384,11 +453,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) } break; - case parse_token_type_end: - // Empty line, or just a semicolon - symbol_stack_pop_push(parse_token_type_end); - break; - case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: @@ -415,7 +479,7 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) break; case parse_keyword_else: - //todo + PARSER_DIE(); //todo break; case parse_keyword_for: @@ -508,7 +572,7 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) @@ -588,15 +652,23 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) symbol_stack.pop_back(); result = true; } + else if (token.type == parse_token_type_pipe) + { + // Pipes are primitive + symbol_stack.pop_back(); + result = true; + } } return result; } -void parse_ll_t::accept_token(parse_token_t token) +void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) { - if (1) + bool logit = true; + if (logit) { - fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); + const wcstring txt = wcstring(src, token.source_start, token.source_length); + fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); @@ -605,6 +677,10 @@ void parse_ll_t::accept_token(parse_token_t token) { if (top_node_match_token(token)) { + if (logit) + { + fprintf(stderr, "Consumed token %ls\n", token.describe().c_str()); + } consumed = true; break; } @@ -612,16 +688,24 @@ void parse_ll_t::accept_token(parse_token_t token) switch (stack_top_type()) { /* Symbols */ - case symbol_statement_list: - accept_token_statement_list(token); + case symbol_job_list: + accept_token_job_list(token); break; + case symbol_job: + accept_token_job(token); + break; + + case symbol_job_continuation: + accept_token_job_continuation(token); + break; + case symbol_statement: accept_token_statement(token); break; case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); break; case symbol_block_header: @@ -644,7 +728,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list); break; case symbol_boolean_statement: @@ -673,7 +757,8 @@ void parse_ll_t::accept_token(parse_token_t token) break; default: - fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); + fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); + exit_without_destructors(EXIT_FAILURE); break; } } @@ -741,7 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ token.source_start = (size_t)tok_start; token.source_length = wcslen(tok_txt); token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token); + this->parser->accept_token(token, str); } wcstring result = dump_tree(this->parser->nodes, str); diff --git a/parse_tree.h b/parse_tree.h index b95e371a6..49ec6a132 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -20,6 +20,7 @@ class parse_node_t; typedef std::vector parse_node_tree_t; typedef size_t node_offset_t; +#define NODE_OFFSET_INVALID (static_cast(-1)) struct parse_error_t { @@ -51,7 +52,9 @@ enum parse_token_type_t token_type_invalid, // Non-terminal tokens - symbol_statement_list, + symbol_job_list, + symbol_job, + symbol_job_continuation, symbol_statement, symbol_block_statement, symbol_block_header, @@ -96,6 +99,9 @@ enum parse_keyword_t parse_keyword_builtin }; +wcstring token_type_description(parse_token_type_t type); +wcstring keyword_description(parse_keyword_t type); + /** Base class for nodes of a parse tree */ class parse_node_t { @@ -125,31 +131,46 @@ class parse_node_t explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } + + node_offset_t child_offset(node_offset_t which) const + { + PARSE_ASSERT(which < child_count); + return child_start + which; + } }; /* Fish grammar: -# A statement_list is a list of statements, separated by semicolons or newlines +# A job_list is a list of jobs, separated by semicolons or newlines - statement_list = | - statement statement_list + job_list = | + job_list | + job job_list -# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) +# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation - statement = boolean_statement | block_statement | decorated_statement | + job = statement job_continuation + job_continuation = | + statement job_continuation + +# A statement is a normal command, or an if / while / and etc + + statement = boolean_statement | block_statement | decorated_statement # A block is a conditional, loop, or begin/end - block_statement = block_header statement_list END arguments_or_redirections_list + block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = if_header | for_header | while_header | function_header | begin_header if_header = IF statement - for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement - begin_header = BEGIN STATEMENT_TERMINATOR - function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + begin_header = BEGIN + function_header = FUNCTION function_name arguments_or_redirections_list +#(TODO: functions should not support taking redirections in their arguments) + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement @@ -157,7 +178,7 @@ class parse_node_t # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list terminator + plain_statement = command arguments_or_redirections_list arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list