diff --git a/parse_productions.cpp b/parse_productions.cpp index fba24c597..e63f56023 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -1,10 +1,34 @@ #include "parse_productions.h" using namespace parse_productions; +#define NO_PRODUCTION ((production_option_idx_t)(-1)) -#define PRODUCTIONS(sym) static const ProductionList_t sym##_productions -#define RESOLVE(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) -#define RESOLVE_ONLY(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } +static bool production_is_empty(const production_t production) +{ + return production[0] == token_type_invalid; +} + +// Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it +static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) +{ + if (which < 0 || which >= MAX_PRODUCTIONS) + return false; + + bool nonempty_found = false; + for (int i=which; i < MAX_PRODUCTIONS; i++) + { + if (! production_is_empty(production_list[i])) + { + nonempty_found = true; + break; + } + } + return nonempty_found; +} + +#define PRODUCTIONS(sym) static const production_options_t productions_##sym +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = @@ -44,7 +68,6 @@ RESOLVE(job_list) case parse_token_type_terminate: // no more commands, just transition to empty return 0; - break; default: return NO_PRODUCTION; @@ -350,3 +373,119 @@ RESOLVE(arguments_or_redirections_list) } } +PRODUCTIONS(argument_or_redirection) = +{ + {parse_token_type_string}, + {parse_token_type_redirection} +}; +RESOLVE(argument_or_redirection) +{ + switch (token_type) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(optional_background) = +{ + {}, + { parse_token_type_background } +}; + +RESOLVE(optional_background) +{ + switch (token_type) + { + case parse_token_type_background: + return 1; + default: + return 0; + } +} + +#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag) +{ + bool log_it = false; + if (log_it) + { + fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); + } + + /* Fetch the list of productions and the function to resolve them */ + const production_options_t *production_list = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; + switch (node_type) + { + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list_nonempty) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(optional_background) + + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case token_type_invalid: + fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); + PARSER_DIE(); + break; + + } + PARSE_ASSERT(production_list != NULL); + PARSE_ASSERT(resolver != NULL); + + const production_t *result = NULL; + production_option_idx_t which = resolver(input_type, input_keyword, out_tag); + + if (log_it) + { + fprintf(stderr, "\tresolved to %u\n", (unsigned)which); + } + + + if (which == NO_PRODUCTION) + { + fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + result = NULL; + } + else + { + PARSE_ASSERT(production_is_valid(*production_list, which)); + result = &((*production_list)[which]); + } + *out_which_production = which; + return result; +} diff --git a/parse_productions.h b/parse_productions.h index d3743014b..d7b7c19d4 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -8,45 +8,6 @@ #include "parse_tree.h" -/* Terrifying template black magic. */ - -/* - -- Get info for symbol -- Resolve production from info -- Get productions for children -- Get symbols for productions - -Production may be: - -1. Single value -2. Sequence of values (possibly empty) -3. Options of Single / Sequence - -Info to specify: - -1. Number of different productions -2. Resolver function -3. Symbols for associated productions - -Choice: should info be a class or a data? - -data: - -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); //may be trivial - production productions[5]; -} - -struct Production_t -{ - enum parse_token_type_t symbols[5]; -} - -*/ - namespace parse_productions { @@ -54,596 +15,54 @@ namespace parse_productions #define MAX_SYMBOLS_PER_PRODUCTION 5 +typedef uint32_t production_tag_t; /* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ -typedef unsigned char Production_t[MAX_SYMBOLS_PER_PRODUCTION]; +typedef uint8_t production_element_t; -typedef Production_t ProductionList_t[MAX_PRODUCTIONS]; +/* An index into a production option list */ +typedef uint8_t production_option_idx_t; + +inline parse_token_type_t production_element_type(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + return parse_token_type_string; + } + else + { + return static_cast(elem); + } +} + +inline parse_keyword_t production_element_keyword(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + // First keyword is LAST_TOKEN_OR_SYMBOL + 1 + return static_cast(elem - LAST_TOKEN_OR_SYMBOL - 1); + } + else + { + return parse_keyword_none; + } +} + + +inline bool production_element_is_valid(production_element_t elem) +{ + return elem != token_type_invalid; +} + +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +typedef production_t production_options_t[MAX_PRODUCTIONS]; #define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); - Production_t productions[MAX_PRODUCTIONS]; -}; - - +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag); } -namespace parse_symbols -{ - -#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - -#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - -#define NO_PRODUCTION (-1) - -struct Symbol -{ - typedef int magic_symbol_type_t; -}; - -template -struct Token : public Symbol -{ - SYMBOL(WHICH); -}; - -/* Placeholder */ -typedef Token none; - -typedef Token EMPTY; - -template -struct Seq -{ - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - - typedef int magic_seq_type_t; -}; - -template -struct OR -{ - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - - typedef int magic_or_type_t; -}; - -template -struct Keyword : public Symbol -{ - static inline parse_keyword_t get_token() - { - return WHICH; - } -}; - -struct job; -struct statement; -struct job_continuation; -struct boolean_statement; -struct block_statement; -struct if_statement; -struct if_clause; -struct else_clause; -struct else_continuation; -struct switch_statement; -struct decorated_statement; -struct switch_statement; -struct case_item_list; -struct case_item; -struct argument_list_nonempty; -struct argument_list; -struct block_statement; -struct block_header; -struct for_header; -struct while_header; -struct begin_header; -struct function_header; -struct boolean_statement; -struct decorated_statement; -struct plain_statement; -struct arguments_or_redirections_list; -struct argument_or_redirection; -struct redirection; -struct statement_terminator; -struct optional_background; - -/* A job_list is a list of jobs, separated by semicolons or newlines */ -struct job_list : public Symbol -{ - typedef OR< - EMPTY, - Seq, - Seq, job_list> - > productions; - - SYMBOL(symbol_job_list) - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - // 'end' is special - switch (token_keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - return 0; - - default: - // Normal string - return 1; - } - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - return 1; - - case parse_token_type_end: - // Empty line - return 2; - - case parse_token_type_terminate: - // no more commands, just transition to empty - return 0; - break; - - default: - return NO_PRODUCTION; - } - } - -}; - -/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ -struct job : public Symbol -{ - typedef Seq sole_production; - SYMBOL(symbol_job); -}; - -struct job_continuation : public Symbol -{ - typedef OR< - EMPTY, - Seq, statement, job_continuation> - > productions; - - SYMBOL(symbol_job_continuation); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_pipe: - // Pipe, continuation - return 1; - - default: - // Not a pipe, no job continuation - return 0; - } - - } -}; - -/* A statement is a normal command, or an if / while / and etc */ -struct statement : public Symbol -{ - typedef OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > productions; - - SYMBOL(symbol_statement); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - switch (token_keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - return 0; - - case parse_keyword_for: - case parse_keyword_while: - case parse_keyword_function: - case parse_keyword_begin: - return 1; - - case parse_keyword_if: - return 2; - - case parse_keyword_else: - //symbol_stack_pop(); - return NO_PRODUCTION; - - case parse_keyword_switch: - return 3; - - case parse_keyword_end: - PARSER_DIE(); //todo - return NO_PRODUCTION; - - // 'in' is only special within a for_header - case parse_keyword_in: - case parse_keyword_none: - case parse_keyword_command: - case parse_keyword_builtin: - case parse_keyword_case: - return 4; - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - case parse_token_type_terminate: - return NO_PRODUCTION; - //parse_error(L"statement", token); - - default: - return NO_PRODUCTION; - } - } - -}; - -struct if_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - SYMBOL(symbol_if_statement); -}; - -struct if_clause : public Symbol -{ - typedef Seq, job, statement_terminator, job_list> sole_production; - SYMBOL(symbol_if_clause); -}; - -struct else_clause : public Symbol -{ - typedef OR< - EMPTY, - Seq, else_continuation> - > productions; - - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: - return 1; - default: - return 0; - } - } -}; - -struct else_continuation : public Symbol -{ - typedef OR< - Seq, - Seq - > productions; - - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: - return 0; - default: - return 1; - } - } -}; - -struct switch_statement : public Symbol -{ - typedef Seq, - Token, - statement_terminator, - case_item_list, - Keyword - > sole_production; - - SYMBOL(symbol_switch_statement); -}; - -struct case_item_list : public Symbol -{ - typedef OR - < - EMPTY, - Seq, - Seq, case_item_list> - > productions; - - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - - default: - if (tok == parse_token_type_end) - { - /* empty line */ - return 2; - } - else - { - return 0; - } - - } - } -}; - -struct case_item : public Symbol -{ - typedef Seq, argument_list, statement_terminator, job_list> sole_production; - - SYMBOL(symbol_case_item); -}; - -struct argument_list_nonempty : public Symbol -{ - typedef Seq, argument_list> sole_production; - SYMBOL(symbol_argument_list_nonempty); -}; - -struct argument_list : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 1; - default: - return 0; - } - } -}; - -struct block_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_block_statement); -}; - -struct block_header : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_block_header); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - // todo - case parse_keyword_else: - return NO_PRODUCTION; - case parse_keyword_for: - return 0; - case parse_keyword_while: - return 1; - case parse_keyword_function: - return 2; - case parse_keyword_begin: - return 3; - default: - return NO_PRODUCTION; - } - } -}; - -struct for_header : public Symbol -{ - typedef Seq, Token, Keyword, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_for_header); -}; - -struct while_header : public Symbol -{ - typedef Seq, statement> sole_production; - - SYMBOL(symbol_while_header); -}; - -struct begin_header : public Symbol -{ - typedef Keyword sole_production; - SYMBOL(symbol_begin_header); -}; - -struct function_header : public Symbol -{ - typedef Seq< Keyword, Token, argument_list> sole_production; - SYMBOL(symbol_function_header); -}; - -/* A boolean statement is AND or OR or NOT */ -struct boolean_statement : public Symbol -{ - typedef OR< - Seq, statement>, - Seq, statement>, - Seq, statement> - > productions; - - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: - return 0; - case parse_keyword_or: - return 1; - case parse_keyword_not: - return 2; - default: - return NO_PRODUCTION; - } - } -}; - -/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ -struct decorated_statement : public Symbol -{ - - typedef OR< - Seq, plain_statement>, - Seq, plain_statement>, - plain_statement - > productions; - - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; - default: - return 2; - } - } -}; - -struct plain_statement : public Symbol -{ - - typedef Seq, arguments_or_redirections_list, optional_background> sole_production; - - SYMBOL(symbol_plain_statement); - -}; - -struct arguments_or_redirections_list : public Symbol -{ - typedef OR< - EMPTY, - Seq > - productions; - - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } -}; - -struct argument_or_redirection : public Symbol -{ - typedef OR< - Token, - redirection - > productions; - - - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 0; - case parse_token_type_redirection: - return 1; - default: - return NO_PRODUCTION; - } - } -}; - -struct redirection : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_redirection); -}; - -struct statement_terminator : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_end); -}; - -struct optional_background : public Symbol -{ - typedef OR< - EMPTY, - Token - > productions; - - SYMBOL(symbol_optional_background); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_background: - return 1; - default: - return 0; - } - } -}; - -} #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index 4df277d48..a25c549f6 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -2,7 +2,7 @@ #include "tokenizer.h" #include -using namespace parse_symbols; +using namespace parse_productions; wcstring parse_error_t::describe(const wcstring &src) const { @@ -260,6 +260,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } } +__attribute__((unused)) static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) @@ -277,11 +278,11 @@ struct parse_stack_element_t enum parse_keyword_t keyword; node_offset_t node_idx; - parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) + explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) { } - - parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) + + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) { } @@ -311,26 +312,13 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_job_list; - elem.node_idx = 0; - symbol_stack.push_back(elem); // goal token + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); - // implementation of certain parser constructions void accept_token(parse_token_t token, const wcstring &src); - void accept_token_job_list(parse_token_t token); - void accept_token_job(parse_token_t token); - void accept_token_job_continuation(parse_token_t token); - void accept_token_else_clause(parse_token_t token); - void accept_token_else_continuation(parse_token_t token); - void accept_token_plain_statement(parse_token_t token); - void accept_token_argument_list(parse_token_t token); - void accept_token_arguments_or_redirections_list(parse_token_t token); - void accept_token_argument_or_redirection(parse_token_t token); - bool accept_token_string(parse_token_t token); void token_unhandled(parse_token_t token, const char *function); @@ -373,111 +361,67 @@ class parse_ll_t symbol_stack.pop_back(); } - - // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. - inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. + inline void symbol_stack_pop_push_production(const production_t *production) { - - // Logging? - if (0) + bool logit = false; + if (logit) { - fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); - if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); - if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); - if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); - if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); - if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); + size_t count = 0; + fprintf(stderr, "Applying production:\n"); + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + parse_token_type_t type = production_element_type(elem); + parse_keyword_t keyword = production_element_keyword(elem); + fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type).c_str(), keyword_description(keyword).c_str()); + count++; + } + } + if (! count) fprintf(stderr, "\t\n"); } - // Get the node for the top symbol and tell it about its children - size_t node_idx = symbol_stack.back().node_idx; - parse_node_t &node = nodes.at(node_idx); - + + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + const size_t child_start = nodes.size(); + size_t child_count = 0; + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + // Generate the parse node. Note that this push_back may invalidate node. + parse_token_type_t child_type = production_element_type(elem); + nodes.push_back(parse_node_t(child_type)); + child_count++; + } + } + + // Update the parent + const size_t parent_node_idx = symbol_stack.back().node_idx; + parse_node_t &parent_node = nodes.at(parent_node_idx); + // Should have no children yet - PARSE_ASSERT(node.child_count == 0); + PARSE_ASSERT(parent_node.child_count == 0); - // Tell the node where its children start - node.child_start = nodes.size(); - - // Add nodes for the children - // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) - if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); - if (tok2.type != token_type_invalid) add_child_to_node(node_idx, &tok2); - if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); - if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); - if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); - - // The above set the node_idx. Now replace the top of the stack. + // Tell the node about its children + parent_node.child_start = child_start; + parent_node.child_count = child_count; + + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. symbol_stack.pop_back(); - if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); - if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); - if (tok3.type != token_type_invalid) symbol_stack.push_back(tok3); - if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); - if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0) - { - symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0) - { - symbol_stack_pop_push_int(T::get_token()); - } - - // Singular. Sole productions are always of type Seq. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0) - { - typedef typename T::sole_production seq; - symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token()); - } - - // Plural productions, of type Or. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0) - { - typedef typename T::productions ors; - int which = T::production(tok.type, tok.keyword); - switch (which) + symbol_stack.reserve(symbol_stack.size() + child_count); + size_t idx = child_count; + while (idx--) { - case 0: - symbol_stack_pop_push2(); - break; - case 1: - symbol_stack_pop_push2(); - break; - case 2: - symbol_stack_pop_push2(); - break; - case 3: - symbol_stack_pop_push2(); - break; - case 4: - symbol_stack_pop_push2(); - break; - - case NO_PRODUCTION: - parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str()); - break; - - default: - parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str()); - break; + production_element_t elem = (*production)[idx]; + PARSE_ASSERT(production_element_is_valid(elem)); + symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx)); } } - // Non-sequence basic productions - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0) - { - symbol_stack_pop_push_int(T::sole_production::get_token()); - } - - }; void parse_ll_t::dump_stack(void) const @@ -551,57 +495,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_else_clause(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_clause); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_else_continuation(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_continuation); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_argument_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_list); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); - symbol_stack_produce(token); -} - -bool parse_ll_t::accept_token_string(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == parse_token_type_string); - bool result = false; - switch (token.type) - { - case parse_token_type_string: - // Got our string - symbol_stack_pop(); - result = true; - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } - return result; -} - bool parse_ll_t::top_node_match_token(parse_token_t token) { PARSE_ASSERT(! symbol_stack.empty()); @@ -654,122 +547,16 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - - switch (stack_top_type()) - { - /* Symbols */ - case symbol_job_list: - symbol_stack_produce(token); - break; - - case symbol_job: - symbol_stack_produce(token); - break; - - case symbol_job_continuation: - symbol_stack_produce(token); - break; - - case symbol_statement: - symbol_stack_produce(token); - break; - - case symbol_if_statement: - symbol_stack_produce(token); - break; - - case symbol_if_clause: - symbol_stack_produce(token); - break; - - case symbol_else_clause: - accept_token_else_clause(token); - break; - - case symbol_else_continuation: - accept_token_else_continuation(token); - break; - - case symbol_block_statement: - symbol_stack_produce(token); - break; - - case symbol_block_header: - symbol_stack_produce(token); - break; - - case symbol_for_header: - symbol_stack_produce(token); - break; - - case symbol_while_header: - symbol_stack_produce(token); - break; - - case symbol_begin_header: - symbol_stack_produce(token); - break; - - case symbol_function_header: - symbol_stack_produce(token); - break; - - case symbol_switch_statement: - symbol_stack_produce(token); - break; - - case symbol_case_item_list: - symbol_stack_produce(token); - break; - - case symbol_case_item: - symbol_stack_produce(token); - break; - - case symbol_boolean_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_decorated_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_plain_statement: - symbol_stack_produce(token); - break; - - case symbol_argument_list_nonempty: - symbol_stack_produce(token); - break; - - case symbol_argument_list: - accept_token_argument_list(token); - break; - - case symbol_arguments_or_redirections_list: - accept_token_arguments_or_redirections_list(token); - break; - - case symbol_argument_or_redirection: - accept_token_argument_or_redirection(token); - break; - - case symbol_optional_background: - symbol_stack_produce(token); - break; - - /* Tokens */ - case parse_token_type_string: - consumed = accept_token_string(token); - break; - - default: - fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); - exit_without_destructors(EXIT_FAILURE); - break; - } + + // Get the production for the top of the stack + parse_stack_element_t &stack_elem = symbol_stack.back(); + parse_node_t &node = nodes.at(stack_elem.node_idx); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag); + PARSE_ASSERT(production != NULL); + + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); } } diff --git a/parse_tree.h b/parse_tree.h index ef7678f5c..c53864258 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -137,6 +137,9 @@ public: /* Type-dependent data */ uint32_t tag; + + /* Which production was used */ + uint8_t production_idx; /* Description */ wcstring describe(void) const;