#include "parse_productions.h" using namespace parse_productions; #define NO_PRODUCTION ((production_option_idx_t)(-1)) static bool production_is_empty(const production_t production) { return production[0] == token_type_invalid; } /* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */ static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) { if (which < 0 || which >= MAX_PRODUCTIONS) return false; bool nonempty_found = false; for (int i=which; i < MAX_PRODUCTIONS; i++) { if (! production_is_empty(production_list[i])) { nonempty_found = true; break; } } return nonempty_found; } #define PRODUCTIONS(sym) static const production_options_t productions_##sym #define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = { {}, {symbol_job, symbol_job_list}, {parse_token_type_end, symbol_job_list} }; RESOLVE(job_list) { switch (token1.type) { case parse_token_type_string: // some keywords are special switch (token1.keyword) { case parse_keyword_end: case parse_keyword_else: case parse_keyword_case: // End this job list return 0; default: // Normal string return 1; } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: return 1; case parse_token_type_end: // Empty line return 2; case parse_token_type_terminate: // no more commands, just transition to empty return 0; default: return NO_PRODUCTION; } } /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ PRODUCTIONS(job) = { {symbol_statement, symbol_job_continuation} }; RESOLVE_ONLY(job) PRODUCTIONS(job_continuation) = { {}, {parse_token_type_pipe, symbol_statement, symbol_job_continuation} }; RESOLVE(job_continuation) { switch (token1.type) { case parse_token_type_pipe: // Pipe, continuation return 1; default: // Not a pipe, no job continuation return 0; } } /* A statement is a normal command, or an if / while / and etc */ PRODUCTIONS(statement) = { {symbol_boolean_statement}, {symbol_block_statement}, {symbol_if_statement}, {symbol_switch_statement}, {symbol_decorated_statement} }; RESOLVE(statement) { /* The only block-like builtin that takes any parameters is 'function' So go to decorated statements if the subsequent token looks like '--'. The logic here is subtle: If we are 'begin', then we expect to be invoked with no arguments. If we are 'function', then we are a non-block if we are invoked with -h or --help If we are anything else, we require an argument, so do the same thing if the subsequent token is a statement terminator. */ if (token1.type == parse_token_type_string) { // If we are a function, then look for help arguments // Othewrise, if the next token looks like an option (starts with a dash), then parse it as a decorated statement if (token1.keyword == parse_keyword_function && token2.is_help_argument) { return 4; } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) { return 4; } // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if". bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { return 4; } } switch (token1.type) { case parse_token_type_string: switch (token1.keyword) { case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: return 0; case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: return 1; case parse_keyword_if: return 2; case parse_keyword_else: return NO_PRODUCTION; case parse_keyword_switch: return 3; case parse_keyword_end: return NO_PRODUCTION; // 'in' is only special within a for_header case parse_keyword_in: case parse_keyword_none: case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: return 4; } break; case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: case parse_token_type_terminate: return NO_PRODUCTION; //parse_error(L"statement", token); default: return NO_PRODUCTION; } } PRODUCTIONS(if_statement) = { {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) PRODUCTIONS(if_clause) = { { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } }; RESOLVE_ONLY(if_clause) PRODUCTIONS(else_clause) = { { }, { KEYWORD(parse_keyword_else), symbol_else_continuation } }; RESOLVE(else_clause) { switch (token1.keyword) { case parse_keyword_else: return 1; default: return 0; } } PRODUCTIONS(else_continuation) = { {symbol_if_clause, symbol_else_clause}, {parse_token_type_end, symbol_job_list} }; RESOLVE(else_continuation) { switch (token1.keyword) { case parse_keyword_if: return 0; default: return 1; } } PRODUCTIONS(switch_statement) = { { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(switch_statement) PRODUCTIONS(case_item_list) = { {}, {symbol_case_item, symbol_case_item_list}, {parse_token_type_end, symbol_case_item_list} }; RESOLVE(case_item_list) { if (token1.keyword == parse_keyword_case) return 1; else if (token1.type == parse_token_type_end) return 2; //empty line else return 0; } PRODUCTIONS(case_item) = { {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} }; RESOLVE_ONLY(case_item) PRODUCTIONS(argument_list) = { {}, {symbol_argument, symbol_argument_list} }; RESOLVE(argument_list) { switch (token1.type) { case parse_token_type_string: return 1; default: return 0; } } PRODUCTIONS(block_statement) = { {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) PRODUCTIONS(block_header) = { {symbol_for_header}, {symbol_while_header}, {symbol_function_header}, {symbol_begin_header} }; RESOLVE(block_header) { switch (token1.keyword) { case parse_keyword_for: return 0; case parse_keyword_while: return 1; case parse_keyword_function: return 2; case parse_keyword_begin: return 3; default: return NO_PRODUCTION; } } PRODUCTIONS(for_header) = { {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_argument_list} }; RESOLVE_ONLY(for_header) PRODUCTIONS(while_header) = { {KEYWORD(parse_keyword_while), symbol_job} }; RESOLVE_ONLY(while_header) PRODUCTIONS(begin_header) = { {KEYWORD(parse_keyword_begin)} }; RESOLVE_ONLY(begin_header) PRODUCTIONS(function_header) = { {KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list} }; RESOLVE_ONLY(function_header) /* A boolean statement is AND or OR or NOT */ PRODUCTIONS(boolean_statement) = { {KEYWORD(parse_keyword_and), symbol_statement}, {KEYWORD(parse_keyword_or), symbol_statement}, {KEYWORD(parse_keyword_not), symbol_statement} }; RESOLVE(boolean_statement) { switch (token1.keyword) { case parse_keyword_and: return 0; case parse_keyword_or: return 1; case parse_keyword_not: return 2; default: return NO_PRODUCTION; } } PRODUCTIONS(decorated_statement) = { {symbol_plain_statement}, {KEYWORD(parse_keyword_command), symbol_plain_statement}, {KEYWORD(parse_keyword_builtin), symbol_plain_statement}, }; RESOLVE(decorated_statement) { /* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */ if (token2.type != parse_token_type_string || token2.has_dash_prefix) { return 0; } switch (token1.keyword) { default: return 0; case parse_keyword_command: return 1; case parse_keyword_builtin: return 2; } } PRODUCTIONS(plain_statement) = { {parse_token_type_string, symbol_arguments_or_redirections_list, symbol_optional_background} }; RESOLVE_ONLY(plain_statement) PRODUCTIONS(arguments_or_redirections_list) = { {}, {symbol_argument_or_redirection, symbol_arguments_or_redirections_list} }; RESOLVE(arguments_or_redirections_list) { switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: return 1; default: return 0; } } PRODUCTIONS(argument_or_redirection) = { {symbol_argument}, {symbol_redirection} }; RESOLVE(argument_or_redirection) { switch (token1.type) { case parse_token_type_string: return 0; case parse_token_type_redirection: return 1; default: return NO_PRODUCTION; } } PRODUCTIONS(argument) = { {parse_token_type_string} }; RESOLVE_ONLY(argument) PRODUCTIONS(redirection) = { {parse_token_type_redirection, parse_token_type_string} }; RESOLVE_ONLY(redirection) PRODUCTIONS(optional_background) = { {}, { parse_token_type_background } }; RESOLVE(optional_background) { switch (token1.type) { case parse_token_type_background: return 1; default: return 0; } } PRODUCTIONS(end_command) = { {KEYWORD(parse_keyword_end)} }; RESOLVE_ONLY(end_command) #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) { fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str()); } /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL; switch (node_type) { TEST(job_list) TEST(job) TEST(statement) TEST(job_continuation) TEST(boolean_statement) TEST(block_statement) TEST(if_statement) TEST(if_clause) TEST(else_clause) TEST(else_continuation) TEST(switch_statement) TEST(decorated_statement) TEST(case_item_list) TEST(case_item) TEST(argument_list) TEST(block_header) TEST(for_header) TEST(while_header) TEST(begin_header) TEST(function_header) TEST(plain_statement) TEST(arguments_or_redirections_list) TEST(argument_or_redirection) TEST(argument) TEST(redirection) TEST(optional_background) TEST(end_command) case parse_token_type_string: case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: case parse_token_type_end: case parse_token_type_terminate: fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; case parse_special_type_parse_error: case parse_special_type_tokenizer_error: case parse_special_type_comment: fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; case token_type_invalid: fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); PARSER_DIE(); break; } PARSE_ASSERT(production_list != NULL); PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; production_option_idx_t which = resolver(input1, input2); if (log_it) { fprintf(stderr, "\tresolved to %u\n", (unsigned)which); } if (which == NO_PRODUCTION) { if (log_it) { fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__); } result = NULL; } else { PARSE_ASSERT(production_is_valid(*production_list, which)); result = &((*production_list)[which]); } *out_which_production = which; return result; }