From 5eb1ef4b4ad41ce45fcb37b1d18ab9dfb41b9ae7 Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Wed, 9 Nov 2016 21:37:49 -0800 Subject: [PATCH] use enum_map for parser enums This simplifies the parsing code slightly and makes it more consistent with how we deal with enums in the *builtin.cpp* module. --- src/parse_constants.h | 100 ++++++++++---- src/parse_productions.cpp | 272 +++++++++++++++++++------------------- src/parse_productions.h | 8 +- src/parse_tree.cpp | 137 ++++--------------- 4 files changed, 237 insertions(+), 280 deletions(-) diff --git a/src/parse_constants.h b/src/parse_constants.h index 2ee6287b7..84a3f9828 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -2,6 +2,7 @@ #ifndef FISH_PARSE_CONSTANTS_H #define FISH_PARSE_CONSTANTS_H +#include "common.h" #include "config.h" #define PARSE_ASSERT(a) assert(a) @@ -11,11 +12,9 @@ exit_without_destructors(-1); \ } while (0) -// IMPORTANT: If the following enum is modified you must update the corresponding parser_token_types -// array in parse_tree.cpp. +// IMPORTANT: If the following enum table is modified you must also update token_enum_map below. enum parse_token_type_t { - token_type_invalid, - + token_type_invalid = 1, // Non-terminal tokens symbol_job_list, symbol_job, @@ -27,71 +26,97 @@ enum parse_token_type_t { symbol_while_header, symbol_begin_header, symbol_function_header, - symbol_if_statement, symbol_if_clause, symbol_else_clause, symbol_else_continuation, - symbol_switch_statement, symbol_case_item_list, symbol_case_item, - symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - symbol_andor_job_list, - symbol_argument_list, - - // Freestanding argument lists are parsed from the argument list supplied to 'complete -a' + // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'. // They are not generated by parse trees rooted in symbol_job_list. symbol_freestanding_argument_list, - symbol_argument, symbol_redirection, - symbol_optional_background, - symbol_end_command, - // Terminal types. parse_token_type_string, parse_token_type_pipe, parse_token_type_redirection, parse_token_type_background, parse_token_type_end, - // Special terminal type that means no more tokens forthcoming. parse_token_type_terminate, - // Very special terminal types that don't appear in the production list. parse_special_type_parse_error, parse_special_type_tokenizer_error, parse_special_type_comment, - LAST_TOKEN_TYPE = parse_special_type_comment, + LAST_TOKEN_TYPE = parse_special_type_comment, FIRST_TERMINAL_TYPE = parse_token_type_string, LAST_TERMINAL_TYPE = parse_token_type_terminate, - LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, - FIRST_PARSE_TOKEN_TYPE = parse_token_type_string, LAST_PARSE_TOKEN_TYPE = parse_token_type_end } __packed; -// Array of strings corresponding to the enums above instantiated in parse_tree.cpp. -extern const wchar_t *const parser_token_types[]; -// These must be maintained in sorted order (except for none, which isn't a keyword). This enables -// us to do binary search. +const enum_map token_enum_map[] = { + {parse_special_type_comment, L"parse_special_type_comment"}, + {parse_special_type_parse_error, L"parse_special_type_parse_error"}, + {parse_special_type_tokenizer_error, L"parse_special_type_tokenizer_error"}, + {parse_token_type_background, L"parse_token_type_background"}, + {parse_token_type_end, L"parse_token_type_end"}, + {parse_token_type_pipe, L"parse_token_type_pipe"}, + {parse_token_type_redirection, L"parse_token_type_redirection"}, + {parse_token_type_string, L"parse_token_type_string"}, + {parse_token_type_terminate, L"parse_token_type_terminate"}, + {symbol_andor_job_list, L"symbol_andor_job_list"}, + {symbol_argument, L"symbol_argument"}, + {symbol_argument_list, L"symbol_argument_list"}, + {symbol_argument_or_redirection, L"symbol_argument_or_redirection"}, + {symbol_arguments_or_redirections_list, L"symbol_arguments_or_redirections_list"}, + {symbol_begin_header, L"symbol_begin_header"}, + {symbol_block_header, L"symbol_block_header"}, + {symbol_block_statement, L"symbol_block_statement"}, + {symbol_boolean_statement, L"symbol_boolean_statement"}, + {symbol_case_item, L"symbol_case_item"}, + {symbol_case_item_list, L"symbol_case_item_list"}, + {symbol_decorated_statement, L"symbol_decorated_statement"}, + {symbol_else_clause, L"symbol_else_clause"}, + {symbol_else_continuation, L"symbol_else_continuation"}, + {symbol_end_command, L"symbol_end_command"}, + {symbol_for_header, L"symbol_for_header"}, + {symbol_freestanding_argument_list, L"symbol_freestanding_argument_list"}, + {symbol_function_header, L"symbol_function_header"}, + {symbol_if_clause, L"symbol_if_clause"}, + {symbol_if_statement, L"symbol_if_statement"}, + {symbol_job, L"symbol_job"}, + {symbol_job_continuation, L"symbol_job_continuation"}, + {symbol_job_list, L"symbol_job_list"}, + {symbol_optional_background, L"symbol_optional_background"}, + {symbol_plain_statement, L"symbol_plain_statement"}, + {symbol_redirection, L"symbol_redirection"}, + {symbol_statement, L"symbol_statement"}, + {symbol_switch_statement, L"symbol_switch_statement"}, + {symbol_while_header, L"symbol_while_header"}, + {token_type_invalid, L"token_type_invalid"}, + {token_type_invalid, NULL}}; +#define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map) + +// IMPORTANT: If the following enum is modified you must update the corresponding keyword_enum_map +// array below. // -// IMPORTANT: If the following enum is modified you must update the corresponding keyword_map array -// in parse_tree.cpp. +// IMPORTANT: These enums must start at zero. enum parse_keyword_t { - parse_keyword_none, + parse_keyword_none = 0, parse_keyword_and, parse_keyword_begin, parse_keyword_builtin, @@ -108,9 +133,28 @@ enum parse_keyword_t { parse_keyword_or, parse_keyword_switch, parse_keyword_while, - LAST_KEYWORD = parse_keyword_while } __packed; +const enum_map keyword_enum_map[] = { + {parse_keyword_and, L"and"}, + {parse_keyword_begin, L"begin"}, + {parse_keyword_builtin, L"builtin"}, + {parse_keyword_case, L"case"}, + {parse_keyword_command, L"command"}, + {parse_keyword_else, L"else"}, + {parse_keyword_end, L"end"}, + {parse_keyword_exec, L"exec"}, + {parse_keyword_for, L"for"}, + {parse_keyword_function, L"function"}, + {parse_keyword_if, L"if"}, + {parse_keyword_in, L"in"}, + {parse_keyword_not, L"not"}, + {parse_keyword_or, L"or"}, + {parse_keyword_switch, L"switch"}, + {parse_keyword_while, L"while"}, + {parse_keyword_none, NULL}}; +#define keyword_enum_map_len (sizeof keyword_enum_map / sizeof *keyword_enum_map) + // Node tag values. // Statement decorations, stored in node tag. diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp index b3d976905..dbbd84610 100644 --- a/src/parse_productions.cpp +++ b/src/parse_productions.cpp @@ -21,34 +21,41 @@ using namespace parse_productions; // Productions are generally a static const array, and we return a pointer to the array (yes, // really). -#define RESOLVE(sym) \ - static const production_t *resolve_##sym( \ +#define RESOLVE(sym) \ + static const production_element_t *resolve_##sym( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) -// Hacktastic? -#define RESOLVE_ONLY(sym) \ - extern const production_t sym##_only; \ - static const production_t *resolve_##sym( \ +// This is a shorthand for symbols which always resolve to the same production sequence. Using this +// avoids repeating a lot of boilerplate code below. +#define RESOLVE_ONLY(sym, tokens...) \ + extern const production_element_t sym##_only[]; \ + static const production_element_t *resolve_##sym( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \ UNUSED(token1); \ UNUSED(token2); \ UNUSED(out_tag); \ - return &sym##_only; \ + return sym##_only; \ } \ - const production_t sym##_only + const production_element_t sym##_only[] = {tokens, token_type_invalid} -#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) +// Convert a parse_keyword_t enum to a parse_token_type_t enum. +#define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1) -/// Helper macro to define an array. -#define P static const production_t +/// Helper macro to define a production sequence. Note that such sequences must always end with +/// enum `token_type_invalid`. +#define P(production_name, tokens...) \ + static const production_element_t production_name[] = {tokens, token_type_invalid} + +/// The empty production is used often enough it's worth definining once at module scope. +static const production_element_t empty[] = {token_type_invalid}; /// A job_list is a list of jobs, separated by semicolons or newlines. RESOLVE(job_list) { UNUSED(token2); UNUSED(out_tag); - P list_end = {}; - P normal = {symbol_job, symbol_job_list}; - P empty_line = {parse_token_type_end, symbol_job_list}; + P(normal, symbol_job, symbol_job_list); + P(empty_line, parse_token_type_end, symbol_job_list); + switch (token1.type) { case parse_token_type_string: { // Some keywords are special. @@ -56,23 +63,23 @@ RESOLVE(job_list) { case parse_keyword_end: case parse_keyword_else: case parse_keyword_case: { - return &list_end; // end this job list + return empty; // end this job list } default: { - return &normal; // normal string + return normal; // normal string } } } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: { - return &normal; + return normal; } case parse_token_type_end: { - return &empty_line; + return empty_line; } case parse_token_type_terminate: { - return &list_end; // no more commands, just transition to empty + return empty; // no more commands, just transition to empty } default: { return NO_PRODUCTION; } } @@ -81,20 +88,19 @@ RESOLVE(job_list) { // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like // if statements, where we require a command). To represent "non-empty", we require a statement, // followed by a possibly empty job_continuation. - -RESOLVE_ONLY(job) = {symbol_statement, symbol_job_continuation, symbol_optional_background}; +RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background); RESOLVE(job_continuation) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P piped = {parse_token_type_pipe, symbol_statement, symbol_job_continuation}; + P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation); + switch (token1.type) { case parse_token_type_pipe: { - return &piped; // pipe, continuation + return piped; // pipe, continuation } default: { - return ∅ // not a pipe, no job continuation + return empty; // not a pipe, no job continuation } } } @@ -102,11 +108,12 @@ RESOLVE(job_continuation) { // A statement is a normal command, or an if / while / and etc. RESOLVE(statement) { UNUSED(out_tag); - P boolean = {symbol_boolean_statement}; - P block = {symbol_block_statement}; - P ifs = {symbol_if_statement}; - P switchs = {symbol_switch_statement}; - P decorated = {symbol_decorated_statement}; + P(boolean, symbol_boolean_statement); + P(block, symbol_block_statement); + P(ifs, symbol_if_statement); + P(switchs, symbol_switch_statement); + P(decorated, symbol_decorated_statement); + // The only block-like builtin that takes any parameters is 'function' So go to decorated // statements if the subsequent token looks like '--'. The logic here is subtle: // @@ -118,9 +125,9 @@ RESOLVE(statement) { // If we are a function, then look for help arguments. Otherwise, if the next token looks // like an option (starts with a dash), then parse it as a decorated statement. if (token1.keyword == parse_keyword_function && token2.is_help_argument) { - return &decorated; + return decorated; } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) { - return &decorated; + return decorated; } // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. @@ -129,7 +136,7 @@ RESOLVE(statement) { (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { - return &decorated; + return decorated; } } @@ -139,28 +146,28 @@ RESOLVE(statement) { case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: { - return &boolean; + return boolean; } case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: { - return █ + return block; } case parse_keyword_if: { - return &ifs; + return ifs; } case parse_keyword_else: { return NO_PRODUCTION; } case parse_keyword_switch: { - return &switchs; + return switchs; } case parse_keyword_end: { return NO_PRODUCTION; } // All other keywords fall through to decorated statement. - default: { return &decorated; } + default: { return decorated; } } break; } @@ -169,277 +176,274 @@ RESOLVE(statement) { case parse_token_type_background: case parse_token_type_terminate: { return NO_PRODUCTION; - // parse_error(L"statement", token); } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(if_statement) = {symbol_if_clause, symbol_else_clause, symbol_end_command, - symbol_arguments_or_redirections_list}; -RESOLVE_ONLY(if_clause) = {KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, - symbol_andor_job_list, symbol_job_list}; +RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command, + symbol_arguments_or_redirections_list); +RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, + symbol_andor_job_list, symbol_job_list); RESOLVE(else_clause) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P else_cont = {KEYWORD(parse_keyword_else), symbol_else_continuation}; + P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation); + switch (token1.keyword) { case parse_keyword_else: { - return &else_cont; + return else_cont; } - default: { return ∅ } + default: { return empty; } } } RESOLVE(else_continuation) { UNUSED(token2); UNUSED(out_tag); - P elseif = {symbol_if_clause, symbol_else_clause}; - P elseonly = {parse_token_type_end, symbol_job_list}; + P(elseif, symbol_if_clause, symbol_else_clause); + P(elseonly, parse_token_type_end, symbol_job_list); switch (token1.keyword) { case parse_keyword_if: { - return &elseif; + return elseif; } - default: { return &elseonly; } + default: { return elseonly; } } } -RESOLVE_ONLY(switch_statement) = { - KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, - symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list}; +RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, + symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list); RESOLVE(case_item_list) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P case_item = {symbol_case_item, symbol_case_item_list}; - P blank_line = {parse_token_type_end, symbol_case_item_list}; + P(case_item, symbol_case_item, symbol_case_item_list); + P(blank_line, parse_token_type_end, symbol_case_item_list); + if (token1.keyword == parse_keyword_case) - return &case_item; + return case_item; else if (token1.type == parse_token_type_end) - return &blank_line; + return blank_line; else - return ∅ + return empty; } -RESOLVE_ONLY(case_item) = {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, - symbol_job_list}; +RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, + symbol_job_list); RESOLVE(andor_job_list) { UNUSED(out_tag); - P list_end = {}; - P andor_job = {symbol_job, symbol_andor_job_list}; - P empty_line = {parse_token_type_end, symbol_andor_job_list}; + P(andor_job, symbol_job, symbol_andor_job_list); + P(empty_line, parse_token_type_end, symbol_andor_job_list); if (token1.type == parse_token_type_end) { - return &empty_line; + return empty_line; } else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) { // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and // --help' or a naked 'and', and not part of this list. if (token2.type == parse_token_type_string && !token2.is_help_argument) { - return &andor_job; + return andor_job; } } // All other cases end the list. - return &list_end; + return empty; } RESOLVE(argument_list) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P arg = {symbol_argument, symbol_argument_list}; + P(arg, symbol_argument, symbol_argument_list); switch (token1.type) { case parse_token_type_string: { - return &arg; + return arg; } - default: { return ∅ } + default: { return empty; } } } RESOLVE(freestanding_argument_list) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P arg = {symbol_argument, symbol_freestanding_argument_list}; - P semicolon = {parse_token_type_end, symbol_freestanding_argument_list}; + P(arg, symbol_argument, symbol_freestanding_argument_list); + P(semicolon, parse_token_type_end, symbol_freestanding_argument_list); switch (token1.type) { case parse_token_type_string: { - return &arg; + return arg; } case parse_token_type_end: { - return &semicolon; + return semicolon; } - default: { return ∅ } + default: { return empty; } } } -RESOLVE_ONLY(block_statement) = {symbol_block_header, symbol_job_list, symbol_end_command, - symbol_arguments_or_redirections_list}; +RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command, + symbol_arguments_or_redirections_list); RESOLVE(block_header) { UNUSED(token2); UNUSED(out_tag); - P forh = {symbol_for_header}; - P whileh = {symbol_while_header}; - P funch = {symbol_function_header}; - P beginh = {symbol_begin_header}; + P(forh, symbol_for_header); + P(whileh, symbol_while_header); + P(funch, symbol_function_header); + P(beginh, symbol_begin_header); switch (token1.keyword) { case parse_keyword_for: { - return &forh; + return forh; } case parse_keyword_while: { - return &whileh; + return whileh; } case parse_keyword_function: { - return &funch; + return funch; } case parse_keyword_begin: { - return &beginh; + return beginh; } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(for_header) = {KEYWORD(parse_keyword_for), parse_token_type_string, - KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end}; -RESOLVE_ONLY(while_header) = {KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end, - symbol_andor_job_list}; -RESOLVE_ONLY(begin_header) = {KEYWORD(parse_keyword_begin)}; -RESOLVE_ONLY(function_header) = {KEYWORD(parse_keyword_function), symbol_argument, - symbol_argument_list, parse_token_type_end}; +RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string, + KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end); +RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end, + symbol_andor_job_list); +RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin)); +RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument, + symbol_argument_list, parse_token_type_end); // A boolean statement is AND or OR or NOT. RESOLVE(boolean_statement) { UNUSED(token2); - P ands = {KEYWORD(parse_keyword_and), symbol_statement}; - P ors = {KEYWORD(parse_keyword_or), symbol_statement}; - P nots = {KEYWORD(parse_keyword_not), symbol_statement}; + P(ands, KEYWORD(parse_keyword_and), symbol_statement); + P(ors, KEYWORD(parse_keyword_or), symbol_statement); + P(nots, KEYWORD(parse_keyword_not), symbol_statement); switch (token1.keyword) { case parse_keyword_and: { *out_tag = parse_bool_and; - return &ands; + return ands; } case parse_keyword_or: { *out_tag = parse_bool_or; - return &ors; + return ors; } case parse_keyword_not: { *out_tag = parse_bool_not; - return ¬s; + return nots; } default: { return NO_PRODUCTION; } } } RESOLVE(decorated_statement) { - P plains = {symbol_plain_statement}; - P cmds = {KEYWORD(parse_keyword_command), symbol_plain_statement}; - P builtins = {KEYWORD(parse_keyword_builtin), symbol_plain_statement}; - P execs = {KEYWORD(parse_keyword_exec), symbol_plain_statement}; + P(plains, symbol_plain_statement); + P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement); + P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement); + P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement); // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the // second token is not a string, then this is a naked 'command' and we should execute it as // undecorated. if (token2.type != parse_token_type_string || token2.has_dash_prefix) { - return &plains; + return plains; } switch (token1.keyword) { case parse_keyword_command: { *out_tag = parse_statement_decoration_command; - return &cmds; + return cmds; } case parse_keyword_builtin: { *out_tag = parse_statement_decoration_builtin; - return &builtins; + return builtins; } case parse_keyword_exec: { *out_tag = parse_statement_decoration_exec; - return &execs; + return execs; } default: { *out_tag = parse_statement_decoration_none; - return &plains; + return plains; } } } -RESOLVE_ONLY(plain_statement) = {parse_token_type_string, symbol_arguments_or_redirections_list}; +RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list); RESOLVE(arguments_or_redirections_list) { UNUSED(token2); UNUSED(out_tag); - P empty = {}; - P value = {symbol_argument_or_redirection, symbol_arguments_or_redirections_list}; + P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list); + switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: { - return &value; + return value; } - default: { return ∅ } + default: { return empty; } } } RESOLVE(argument_or_redirection) { UNUSED(token2); UNUSED(out_tag); - P arg = {symbol_argument}; - P redir = {symbol_redirection}; + P(arg, symbol_argument); + P(redir, symbol_redirection); + switch (token1.type) { case parse_token_type_string: { - return &arg; + return arg; } case parse_token_type_redirection: { - return &redir; + return redir; } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(argument) = {parse_token_type_string}; -RESOLVE_ONLY(redirection) = {parse_token_type_redirection, parse_token_type_string}; +RESOLVE_ONLY(argument, parse_token_type_string); +RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string); RESOLVE(optional_background) { UNUSED(token2); - P empty = {}; - P background = {parse_token_type_background}; + P(background, parse_token_type_background); + switch (token1.type) { case parse_token_type_background: { *out_tag = parse_background; - return &background; + return background; } default: { *out_tag = parse_no_background; - return ∅ + return empty; } } } -RESOLVE_ONLY(end_command) = {KEYWORD(parse_keyword_end)}; +RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end)); #define TEST(sym) \ case (symbol_##sym): \ resolver = resolve_##sym; \ break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, - const parse_token_t &input1, - const parse_token_t &input2, - parse_node_tag_t *out_tag) { +const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type, + const parse_token_t &input1, + const parse_token_t &input2, + parse_node_tag_t *out_tag) { debug(5, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type), input1.describe().c_str()); // Fetch the function to resolve the list of productions. - const production_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param) - const parse_token_t &input2, //!OCLINT(unused param) - parse_node_tag_t *out_tag) = NULL; //!OCLINT(unused param) + const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param) + const parse_token_t &input2, //!OCLINT(unused param) + parse_node_tag_t *out_tag) = //!OCLINT(unused param) + NULL; switch (node_type) { TEST(job_list) TEST(job) @@ -498,7 +502,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n } PARSE_ASSERT(resolver != NULL); - const production_t *result = resolver(input1, input2, out_tag); + const production_element_t *result = resolver(input1, input2, out_tag); if (result == NULL) { debug(5, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type), input1.describe().c_str(), __FUNCTION__); diff --git a/src/parse_productions.h b/src/parse_productions.h index 3be0cbaf4..ce1589ebb 100644 --- a/src/parse_productions.h +++ b/src/parse_productions.h @@ -10,13 +10,10 @@ struct parse_token_t; namespace parse_productions { -#define MAX_SYMBOLS_PER_PRODUCTION 6 - // A production is an array of unsigned char. Symbols are encoded directly as their symbol value. // Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together // keywords and symbols. typedef uint8_t production_element_t; -typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; /// Resolve the type from a production element. inline parse_token_type_t production_element_type(production_element_t elem) { @@ -44,8 +41,9 @@ inline bool production_element_is_valid(production_element_t elem) { /// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not /// be invalid; the second token may be invalid if there's no more tokens. We may also set flags. -const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1, - const parse_token_t &input2, uint8_t *out_tag); +const production_element_t *production_for_token(parse_token_type_t node_type, + const parse_token_t &input1, + const parse_token_t &input2, uint8_t *out_tag); } #endif diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 3a0ceb168..effe42091 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -20,53 +20,10 @@ #include "tokenizer.h" #include "wutil.h" // IWYU pragma: keep -// This array provides strings for each symbol in enum parse_token_type_t in parse_constants.h. -const wchar_t *const token_type_map[] = { - L"token_type_invalid", - L"symbol_job_list", - L"symbol_job", - L"symbol_job_continuation", - L"symbol_statement", - L"symbol_block_statement", - L"symbol_block_header", - L"symbol_for_header", - L"symbol_while_header", - L"symbol_begin_header", - L"symbol_function_header", - L"symbol_if_statement", - L"symbol_if_clause", - L"symbol_else_clause", - L"symbol_else_continuation", - L"symbol_switch_statement", - L"symbol_case_item_list", - L"symbol_case_item", - L"symbol_boolean_statement", - L"symbol_decorated_statement", - L"symbol_plain_statement", - L"symbol_arguments_or_redirections_list", - L"symbol_argument_or_redirection", - L"symbol_andor_job_list", - L"symbol_argument_list", - L"symbol_freestanding_argument_list", - L"symbol_argument", - L"symbol_redirection", - L"symbol_optional_background", - L"symbol_end_command", - L"parse_token_type_string", - L"parse_token_type_pipe", - L"parse_token_type_redirection", - L"parse_token_type_background", - L"parse_token_type_end", - L"parse_token_type_terminate", - L"parse_special_type_parse_error", - L"parse_special_type_tokenizer_error", - L"parse_special_type_comment", -}; - using namespace parse_productions; -static bool production_is_empty(const production_t *production) { - return (*production)[0] == token_type_invalid; +static bool production_is_empty(const production_element_t *production) { + return *production == token_type_invalid; } /// Returns a string description of this parse error. @@ -164,7 +121,8 @@ void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) { /// Returns a string description for the given token type. const wchar_t *token_type_description(parse_token_type_t type) { - if (type >= 0 && type <= LAST_TOKEN_TYPE) return token_type_map[type]; + const wchar_t *description = enum_to_str(type, token_enum_map); + if (description) return description; // This leaks memory but it should never be run unless we have a bug elsewhere in the code. const wcstring d = format_string(L"unknown_token_type_%ld", static_cast(type)); @@ -173,37 +131,9 @@ const wchar_t *token_type_description(parse_token_type_t type) { return std::wcscpy(d2, d.c_str()); } -#define LONGIFY(x) L##x -#define KEYWORD_MAP(x) \ - { parse_keyword_##x, LONGIFY(#x) } -static const struct { - const parse_keyword_t keyword; - const wchar_t *const name; -} -keyword_map[] = -{ - // Note that these must be sorted (except for the first), so that we can do binary search. - KEYWORD_MAP(none), - KEYWORD_MAP(and), - KEYWORD_MAP(begin), - KEYWORD_MAP(builtin), - KEYWORD_MAP(case), - KEYWORD_MAP(command), - KEYWORD_MAP(else), - KEYWORD_MAP(end), - KEYWORD_MAP(exec), - KEYWORD_MAP(for), - KEYWORD_MAP(function), - KEYWORD_MAP(if), - KEYWORD_MAP(in), - KEYWORD_MAP(not), - KEYWORD_MAP(or), - KEYWORD_MAP(switch), - KEYWORD_MAP(while) -}; - const wchar_t *keyword_description(parse_keyword_t type) { - if (type >= 0 && type <= LAST_KEYWORD) return keyword_map[type].name; + const wchar_t *keyword = enum_to_str(type, keyword_enum_map); + if (keyword) return keyword; // This leaks memory but it should never be run unless we have a bug elsewhere in the code. const wcstring d = format_string(L"unknown_keyword_%ld", static_cast(type)); @@ -487,21 +417,20 @@ class parse_ll_t { } /// Pop from the top of the symbol stack, then push the given production, updating node counts. - /// Note that production_t has type "pointer to array" so some care is required. - inline void symbol_stack_pop_push_production(const production_t *production) { + /// Note that production_element_t has type "pointer to array" so some care is required. + inline void symbol_stack_pop_push_production(const production_element_t *production) { bool logit = false; if (logit) { - size_t count = 0; + int count = 0; fprintf(stderr, "Applying production:\n"); - for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) { - production_element_t elem = (*production)[i]; - if (production_element_is_valid(elem)) { - parse_token_type_t type = production_element_type(elem); - parse_keyword_t keyword = production_element_keyword(elem); - fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type), - keyword_description(keyword)); - count++; - } + for (int i = 0;; i++) { + production_element_t elem = production[i]; + if (!production_element_is_valid(elem)) break; // all done, bail out + parse_token_type_t type = production_element_type(elem); + parse_keyword_t keyword = production_element_keyword(elem); + fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type), + keyword_description(keyword)); + count++; } if (!count) fprintf(stderr, "\t\n"); } @@ -522,12 +451,9 @@ class parse_ll_t { representative_child.parent = parent_node_idx; node_offset_t child_count = 0; - for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) { - production_element_t elem = (*production)[i]; - if (!production_element_is_valid(elem)) { - break; // all done, bail out - } - + for (int i = 0;; i++) { + production_element_t elem = production[i]; + if (!production_element_is_valid(elem)) break; // all done, bail out // Append the parse node. representative_child.type = production_element_type(elem); nodes.push_back(representative_child); @@ -550,7 +476,7 @@ class parse_ll_t { symbol_stack.reserve(symbol_stack.size() + child_count); node_offset_t idx = child_count; while (idx--) { - production_element_t elem = (*production)[idx]; + production_element_t elem = production[idx]; PARSE_ASSERT(production_element_is_valid(elem)); symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx)); } @@ -1053,7 +979,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); parse_node_tag_t tag = 0; - const production_t *production = + const production_element_t *production = production_for_token(stack_elem.type, token1, token2, &tag); node.tag = tag; if (production == NULL) { @@ -1088,23 +1014,8 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { } // Given an expanded string, returns any keyword it matches. -static parse_keyword_t keyword_with_name(const wchar_t *name) { - // Binary search on keyword_map. Start at 1 since 0 is keyword_none. - parse_keyword_t result = parse_keyword_none; - size_t left = 1, right = sizeof keyword_map / sizeof *keyword_map; - while (left < right) { - size_t mid = left + (right - left) / 2; - int cmp = wcscmp(name, keyword_map[mid].name); - if (cmp < 0) { - right = mid; // name was smaller than mid - } else if (cmp > 0) { - left = mid + 1; // name was larger than mid - } else { - result = keyword_map[mid].keyword; // found it - break; - } - } - return result; +static inline parse_keyword_t keyword_with_name(const wchar_t *name) { + return str_to_enum(name, keyword_enum_map, keyword_enum_map_len); } static bool is_keyword_char(wchar_t c) {