mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-26 12:53:13 +00:00
use enum_map for parser enums
This simplifies the parsing code slightly and makes it more consistent with how we deal with enums in the *builtin.cpp* module.
This commit is contained in:
parent
5d6415b6bf
commit
5eb1ef4b4a
4 changed files with 237 additions and 280 deletions
|
@ -2,6 +2,7 @@
|
|||
#ifndef FISH_PARSE_CONSTANTS_H
|
||||
#define FISH_PARSE_CONSTANTS_H
|
||||
|
||||
#include "common.h"
|
||||
#include "config.h"
|
||||
|
||||
#define PARSE_ASSERT(a) assert(a)
|
||||
|
@ -11,11 +12,9 @@
|
|||
exit_without_destructors(-1); \
|
||||
} while (0)
|
||||
|
||||
// IMPORTANT: If the following enum is modified you must update the corresponding parser_token_types
|
||||
// array in parse_tree.cpp.
|
||||
// IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
|
||||
enum parse_token_type_t {
|
||||
token_type_invalid,
|
||||
|
||||
token_type_invalid = 1,
|
||||
// Non-terminal tokens
|
||||
symbol_job_list,
|
||||
symbol_job,
|
||||
|
@ -27,71 +26,97 @@ enum parse_token_type_t {
|
|||
symbol_while_header,
|
||||
symbol_begin_header,
|
||||
symbol_function_header,
|
||||
|
||||
symbol_if_statement,
|
||||
symbol_if_clause,
|
||||
symbol_else_clause,
|
||||
symbol_else_continuation,
|
||||
|
||||
symbol_switch_statement,
|
||||
symbol_case_item_list,
|
||||
symbol_case_item,
|
||||
|
||||
symbol_boolean_statement,
|
||||
symbol_decorated_statement,
|
||||
symbol_plain_statement,
|
||||
symbol_arguments_or_redirections_list,
|
||||
symbol_argument_or_redirection,
|
||||
|
||||
symbol_andor_job_list,
|
||||
|
||||
symbol_argument_list,
|
||||
|
||||
// Freestanding argument lists are parsed from the argument list supplied to 'complete -a'
|
||||
// Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
|
||||
// They are not generated by parse trees rooted in symbol_job_list.
|
||||
symbol_freestanding_argument_list,
|
||||
|
||||
symbol_argument,
|
||||
symbol_redirection,
|
||||
|
||||
symbol_optional_background,
|
||||
|
||||
symbol_end_command,
|
||||
|
||||
// Terminal types.
|
||||
parse_token_type_string,
|
||||
parse_token_type_pipe,
|
||||
parse_token_type_redirection,
|
||||
parse_token_type_background,
|
||||
parse_token_type_end,
|
||||
|
||||
// Special terminal type that means no more tokens forthcoming.
|
||||
parse_token_type_terminate,
|
||||
|
||||
// Very special terminal types that don't appear in the production list.
|
||||
parse_special_type_parse_error,
|
||||
parse_special_type_tokenizer_error,
|
||||
parse_special_type_comment,
|
||||
LAST_TOKEN_TYPE = parse_special_type_comment,
|
||||
|
||||
LAST_TOKEN_TYPE = parse_special_type_comment,
|
||||
FIRST_TERMINAL_TYPE = parse_token_type_string,
|
||||
LAST_TERMINAL_TYPE = parse_token_type_terminate,
|
||||
|
||||
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
||||
|
||||
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
|
||||
LAST_PARSE_TOKEN_TYPE = parse_token_type_end
|
||||
} __packed;
|
||||
// Array of strings corresponding to the enums above instantiated in parse_tree.cpp.
|
||||
extern const wchar_t *const parser_token_types[];
|
||||
|
||||
// These must be maintained in sorted order (except for none, which isn't a keyword). This enables
|
||||
// us to do binary search.
|
||||
const enum_map<parse_token_type_t> token_enum_map[] = {
|
||||
{parse_special_type_comment, L"parse_special_type_comment"},
|
||||
{parse_special_type_parse_error, L"parse_special_type_parse_error"},
|
||||
{parse_special_type_tokenizer_error, L"parse_special_type_tokenizer_error"},
|
||||
{parse_token_type_background, L"parse_token_type_background"},
|
||||
{parse_token_type_end, L"parse_token_type_end"},
|
||||
{parse_token_type_pipe, L"parse_token_type_pipe"},
|
||||
{parse_token_type_redirection, L"parse_token_type_redirection"},
|
||||
{parse_token_type_string, L"parse_token_type_string"},
|
||||
{parse_token_type_terminate, L"parse_token_type_terminate"},
|
||||
{symbol_andor_job_list, L"symbol_andor_job_list"},
|
||||
{symbol_argument, L"symbol_argument"},
|
||||
{symbol_argument_list, L"symbol_argument_list"},
|
||||
{symbol_argument_or_redirection, L"symbol_argument_or_redirection"},
|
||||
{symbol_arguments_or_redirections_list, L"symbol_arguments_or_redirections_list"},
|
||||
{symbol_begin_header, L"symbol_begin_header"},
|
||||
{symbol_block_header, L"symbol_block_header"},
|
||||
{symbol_block_statement, L"symbol_block_statement"},
|
||||
{symbol_boolean_statement, L"symbol_boolean_statement"},
|
||||
{symbol_case_item, L"symbol_case_item"},
|
||||
{symbol_case_item_list, L"symbol_case_item_list"},
|
||||
{symbol_decorated_statement, L"symbol_decorated_statement"},
|
||||
{symbol_else_clause, L"symbol_else_clause"},
|
||||
{symbol_else_continuation, L"symbol_else_continuation"},
|
||||
{symbol_end_command, L"symbol_end_command"},
|
||||
{symbol_for_header, L"symbol_for_header"},
|
||||
{symbol_freestanding_argument_list, L"symbol_freestanding_argument_list"},
|
||||
{symbol_function_header, L"symbol_function_header"},
|
||||
{symbol_if_clause, L"symbol_if_clause"},
|
||||
{symbol_if_statement, L"symbol_if_statement"},
|
||||
{symbol_job, L"symbol_job"},
|
||||
{symbol_job_continuation, L"symbol_job_continuation"},
|
||||
{symbol_job_list, L"symbol_job_list"},
|
||||
{symbol_optional_background, L"symbol_optional_background"},
|
||||
{symbol_plain_statement, L"symbol_plain_statement"},
|
||||
{symbol_redirection, L"symbol_redirection"},
|
||||
{symbol_statement, L"symbol_statement"},
|
||||
{symbol_switch_statement, L"symbol_switch_statement"},
|
||||
{symbol_while_header, L"symbol_while_header"},
|
||||
{token_type_invalid, L"token_type_invalid"},
|
||||
{token_type_invalid, NULL}};
|
||||
#define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
|
||||
|
||||
// IMPORTANT: If the following enum is modified you must update the corresponding keyword_enum_map
|
||||
// array below.
|
||||
//
|
||||
// IMPORTANT: If the following enum is modified you must update the corresponding keyword_map array
|
||||
// in parse_tree.cpp.
|
||||
// IMPORTANT: These enums must start at zero.
|
||||
enum parse_keyword_t {
|
||||
parse_keyword_none,
|
||||
parse_keyword_none = 0,
|
||||
parse_keyword_and,
|
||||
parse_keyword_begin,
|
||||
parse_keyword_builtin,
|
||||
|
@ -108,9 +133,28 @@ enum parse_keyword_t {
|
|||
parse_keyword_or,
|
||||
parse_keyword_switch,
|
||||
parse_keyword_while,
|
||||
LAST_KEYWORD = parse_keyword_while
|
||||
} __packed;
|
||||
|
||||
const enum_map<parse_keyword_t> keyword_enum_map[] = {
|
||||
{parse_keyword_and, L"and"},
|
||||
{parse_keyword_begin, L"begin"},
|
||||
{parse_keyword_builtin, L"builtin"},
|
||||
{parse_keyword_case, L"case"},
|
||||
{parse_keyword_command, L"command"},
|
||||
{parse_keyword_else, L"else"},
|
||||
{parse_keyword_end, L"end"},
|
||||
{parse_keyword_exec, L"exec"},
|
||||
{parse_keyword_for, L"for"},
|
||||
{parse_keyword_function, L"function"},
|
||||
{parse_keyword_if, L"if"},
|
||||
{parse_keyword_in, L"in"},
|
||||
{parse_keyword_not, L"not"},
|
||||
{parse_keyword_or, L"or"},
|
||||
{parse_keyword_switch, L"switch"},
|
||||
{parse_keyword_while, L"while"},
|
||||
{parse_keyword_none, NULL}};
|
||||
#define keyword_enum_map_len (sizeof keyword_enum_map / sizeof *keyword_enum_map)
|
||||
|
||||
// Node tag values.
|
||||
|
||||
// Statement decorations, stored in node tag.
|
||||
|
|
|
@ -21,34 +21,41 @@ using namespace parse_productions;
|
|||
// Productions are generally a static const array, and we return a pointer to the array (yes,
|
||||
// really).
|
||||
|
||||
#define RESOLVE(sym) \
|
||||
static const production_t *resolve_##sym( \
|
||||
#define RESOLVE(sym) \
|
||||
static const production_element_t *resolve_##sym( \
|
||||
const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
|
||||
|
||||
// Hacktastic?
|
||||
#define RESOLVE_ONLY(sym) \
|
||||
extern const production_t sym##_only; \
|
||||
static const production_t *resolve_##sym( \
|
||||
// This is a shorthand for symbols which always resolve to the same production sequence. Using this
|
||||
// avoids repeating a lot of boilerplate code below.
|
||||
#define RESOLVE_ONLY(sym, tokens...) \
|
||||
extern const production_element_t sym##_only[]; \
|
||||
static const production_element_t *resolve_##sym( \
|
||||
const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \
|
||||
UNUSED(token1); \
|
||||
UNUSED(token2); \
|
||||
UNUSED(out_tag); \
|
||||
return &sym##_only; \
|
||||
return sym##_only; \
|
||||
} \
|
||||
const production_t sym##_only
|
||||
const production_element_t sym##_only[] = {tokens, token_type_invalid}
|
||||
|
||||
#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
|
||||
// Convert a parse_keyword_t enum to a parse_token_type_t enum.
|
||||
#define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1)
|
||||
|
||||
/// Helper macro to define an array.
|
||||
#define P static const production_t
|
||||
/// Helper macro to define a production sequence. Note that such sequences must always end with
|
||||
/// enum `token_type_invalid`.
|
||||
#define P(production_name, tokens...) \
|
||||
static const production_element_t production_name[] = {tokens, token_type_invalid}
|
||||
|
||||
/// The empty production is used often enough it's worth definining once at module scope.
|
||||
static const production_element_t empty[] = {token_type_invalid};
|
||||
|
||||
/// A job_list is a list of jobs, separated by semicolons or newlines.
|
||||
RESOLVE(job_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P list_end = {};
|
||||
P normal = {symbol_job, symbol_job_list};
|
||||
P empty_line = {parse_token_type_end, symbol_job_list};
|
||||
P(normal, symbol_job, symbol_job_list);
|
||||
P(empty_line, parse_token_type_end, symbol_job_list);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
// Some keywords are special.
|
||||
|
@ -56,23 +63,23 @@ RESOLVE(job_list) {
|
|||
case parse_keyword_end:
|
||||
case parse_keyword_else:
|
||||
case parse_keyword_case: {
|
||||
return &list_end; // end this job list
|
||||
return empty; // end this job list
|
||||
}
|
||||
default: {
|
||||
return &normal; // normal string
|
||||
return normal; // normal string
|
||||
}
|
||||
}
|
||||
}
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_type_background: {
|
||||
return &normal;
|
||||
return normal;
|
||||
}
|
||||
case parse_token_type_end: {
|
||||
return &empty_line;
|
||||
return empty_line;
|
||||
}
|
||||
case parse_token_type_terminate: {
|
||||
return &list_end; // no more commands, just transition to empty
|
||||
return empty; // no more commands, just transition to empty
|
||||
}
|
||||
default: { return NO_PRODUCTION; }
|
||||
}
|
||||
|
@ -81,20 +88,19 @@ RESOLVE(job_list) {
|
|||
// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like
|
||||
// if statements, where we require a command). To represent "non-empty", we require a statement,
|
||||
// followed by a possibly empty job_continuation.
|
||||
|
||||
RESOLVE_ONLY(job) = {symbol_statement, symbol_job_continuation, symbol_optional_background};
|
||||
RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background);
|
||||
|
||||
RESOLVE(job_continuation) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P piped = {parse_token_type_pipe, symbol_statement, symbol_job_continuation};
|
||||
P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_pipe: {
|
||||
return &piped; // pipe, continuation
|
||||
return piped; // pipe, continuation
|
||||
}
|
||||
default: {
|
||||
return ∅ // not a pipe, no job continuation
|
||||
return empty; // not a pipe, no job continuation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -102,11 +108,12 @@ RESOLVE(job_continuation) {
|
|||
// A statement is a normal command, or an if / while / and etc.
|
||||
RESOLVE(statement) {
|
||||
UNUSED(out_tag);
|
||||
P boolean = {symbol_boolean_statement};
|
||||
P block = {symbol_block_statement};
|
||||
P ifs = {symbol_if_statement};
|
||||
P switchs = {symbol_switch_statement};
|
||||
P decorated = {symbol_decorated_statement};
|
||||
P(boolean, symbol_boolean_statement);
|
||||
P(block, symbol_block_statement);
|
||||
P(ifs, symbol_if_statement);
|
||||
P(switchs, symbol_switch_statement);
|
||||
P(decorated, symbol_decorated_statement);
|
||||
|
||||
// The only block-like builtin that takes any parameters is 'function' So go to decorated
|
||||
// statements if the subsequent token looks like '--'. The logic here is subtle:
|
||||
//
|
||||
|
@ -118,9 +125,9 @@ RESOLVE(statement) {
|
|||
// If we are a function, then look for help arguments. Otherwise, if the next token looks
|
||||
// like an option (starts with a dash), then parse it as a decorated statement.
|
||||
if (token1.keyword == parse_keyword_function && token2.is_help_argument) {
|
||||
return &decorated;
|
||||
return decorated;
|
||||
} else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) {
|
||||
return &decorated;
|
||||
return decorated;
|
||||
}
|
||||
|
||||
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
|
||||
|
@ -129,7 +136,7 @@ RESOLVE(statement) {
|
|||
(token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end);
|
||||
if (naked_invocation_invokes_help &&
|
||||
(token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
|
||||
return &decorated;
|
||||
return decorated;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,28 +146,28 @@ RESOLVE(statement) {
|
|||
case parse_keyword_and:
|
||||
case parse_keyword_or:
|
||||
case parse_keyword_not: {
|
||||
return &boolean;
|
||||
return boolean;
|
||||
}
|
||||
case parse_keyword_for:
|
||||
case parse_keyword_while:
|
||||
case parse_keyword_function:
|
||||
case parse_keyword_begin: {
|
||||
return █
|
||||
return block;
|
||||
}
|
||||
case parse_keyword_if: {
|
||||
return &ifs;
|
||||
return ifs;
|
||||
}
|
||||
case parse_keyword_else: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
case parse_keyword_switch: {
|
||||
return &switchs;
|
||||
return switchs;
|
||||
}
|
||||
case parse_keyword_end: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
// All other keywords fall through to decorated statement.
|
||||
default: { return &decorated; }
|
||||
default: { return decorated; }
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -169,277 +176,274 @@ RESOLVE(statement) {
|
|||
case parse_token_type_background:
|
||||
case parse_token_type_terminate: {
|
||||
return NO_PRODUCTION;
|
||||
// parse_error(L"statement", token);
|
||||
}
|
||||
default: { return NO_PRODUCTION; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(if_statement) = {symbol_if_clause, symbol_else_clause, symbol_end_command,
|
||||
symbol_arguments_or_redirections_list};
|
||||
RESOLVE_ONLY(if_clause) = {KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end,
|
||||
symbol_andor_job_list, symbol_job_list};
|
||||
RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command,
|
||||
symbol_arguments_or_redirections_list);
|
||||
RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end,
|
||||
symbol_andor_job_list, symbol_job_list);
|
||||
|
||||
RESOLVE(else_clause) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P else_cont = {KEYWORD(parse_keyword_else), symbol_else_continuation};
|
||||
P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_else: {
|
||||
return &else_cont;
|
||||
return else_cont;
|
||||
}
|
||||
default: { return ∅ }
|
||||
default: { return empty; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(else_continuation) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P elseif = {symbol_if_clause, symbol_else_clause};
|
||||
P elseonly = {parse_token_type_end, symbol_job_list};
|
||||
P(elseif, symbol_if_clause, symbol_else_clause);
|
||||
P(elseonly, parse_token_type_end, symbol_job_list);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_if: {
|
||||
return &elseif;
|
||||
return elseif;
|
||||
}
|
||||
default: { return &elseonly; }
|
||||
default: { return elseonly; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(switch_statement) = {
|
||||
KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end,
|
||||
symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list};
|
||||
RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end,
|
||||
symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list);
|
||||
|
||||
RESOLVE(case_item_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P case_item = {symbol_case_item, symbol_case_item_list};
|
||||
P blank_line = {parse_token_type_end, symbol_case_item_list};
|
||||
P(case_item, symbol_case_item, symbol_case_item_list);
|
||||
P(blank_line, parse_token_type_end, symbol_case_item_list);
|
||||
|
||||
if (token1.keyword == parse_keyword_case)
|
||||
return &case_item;
|
||||
return case_item;
|
||||
else if (token1.type == parse_token_type_end)
|
||||
return &blank_line;
|
||||
return blank_line;
|
||||
else
|
||||
return ∅
|
||||
return empty;
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(case_item) = {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end,
|
||||
symbol_job_list};
|
||||
RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end,
|
||||
symbol_job_list);
|
||||
|
||||
RESOLVE(andor_job_list) {
|
||||
UNUSED(out_tag);
|
||||
P list_end = {};
|
||||
P andor_job = {symbol_job, symbol_andor_job_list};
|
||||
P empty_line = {parse_token_type_end, symbol_andor_job_list};
|
||||
P(andor_job, symbol_job, symbol_andor_job_list);
|
||||
P(empty_line, parse_token_type_end, symbol_andor_job_list);
|
||||
|
||||
if (token1.type == parse_token_type_end) {
|
||||
return &empty_line;
|
||||
return empty_line;
|
||||
} else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) {
|
||||
// Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
|
||||
// --help' or a naked 'and', and not part of this list.
|
||||
if (token2.type == parse_token_type_string && !token2.is_help_argument) {
|
||||
return &andor_job;
|
||||
return andor_job;
|
||||
}
|
||||
}
|
||||
// All other cases end the list.
|
||||
return &list_end;
|
||||
return empty;
|
||||
}
|
||||
|
||||
RESOLVE(argument_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P arg = {symbol_argument, symbol_argument_list};
|
||||
P(arg, symbol_argument, symbol_argument_list);
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
return &arg;
|
||||
return arg;
|
||||
}
|
||||
default: { return ∅ }
|
||||
default: { return empty; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(freestanding_argument_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P arg = {symbol_argument, symbol_freestanding_argument_list};
|
||||
P semicolon = {parse_token_type_end, symbol_freestanding_argument_list};
|
||||
P(arg, symbol_argument, symbol_freestanding_argument_list);
|
||||
P(semicolon, parse_token_type_end, symbol_freestanding_argument_list);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
return &arg;
|
||||
return arg;
|
||||
}
|
||||
case parse_token_type_end: {
|
||||
return &semicolon;
|
||||
return semicolon;
|
||||
}
|
||||
default: { return ∅ }
|
||||
default: { return empty; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(block_statement) = {symbol_block_header, symbol_job_list, symbol_end_command,
|
||||
symbol_arguments_or_redirections_list};
|
||||
RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command,
|
||||
symbol_arguments_or_redirections_list);
|
||||
|
||||
RESOLVE(block_header) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P forh = {symbol_for_header};
|
||||
P whileh = {symbol_while_header};
|
||||
P funch = {symbol_function_header};
|
||||
P beginh = {symbol_begin_header};
|
||||
P(forh, symbol_for_header);
|
||||
P(whileh, symbol_while_header);
|
||||
P(funch, symbol_function_header);
|
||||
P(beginh, symbol_begin_header);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_for: {
|
||||
return &forh;
|
||||
return forh;
|
||||
}
|
||||
case parse_keyword_while: {
|
||||
return &whileh;
|
||||
return whileh;
|
||||
}
|
||||
case parse_keyword_function: {
|
||||
return &funch;
|
||||
return funch;
|
||||
}
|
||||
case parse_keyword_begin: {
|
||||
return &beginh;
|
||||
return beginh;
|
||||
}
|
||||
default: { return NO_PRODUCTION; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(for_header) = {KEYWORD(parse_keyword_for), parse_token_type_string,
|
||||
KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end};
|
||||
RESOLVE_ONLY(while_header) = {KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end,
|
||||
symbol_andor_job_list};
|
||||
RESOLVE_ONLY(begin_header) = {KEYWORD(parse_keyword_begin)};
|
||||
RESOLVE_ONLY(function_header) = {KEYWORD(parse_keyword_function), symbol_argument,
|
||||
symbol_argument_list, parse_token_type_end};
|
||||
RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string,
|
||||
KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end);
|
||||
RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end,
|
||||
symbol_andor_job_list);
|
||||
RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin));
|
||||
RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument,
|
||||
symbol_argument_list, parse_token_type_end);
|
||||
|
||||
// A boolean statement is AND or OR or NOT.
|
||||
RESOLVE(boolean_statement) {
|
||||
UNUSED(token2);
|
||||
P ands = {KEYWORD(parse_keyword_and), symbol_statement};
|
||||
P ors = {KEYWORD(parse_keyword_or), symbol_statement};
|
||||
P nots = {KEYWORD(parse_keyword_not), symbol_statement};
|
||||
P(ands, KEYWORD(parse_keyword_and), symbol_statement);
|
||||
P(ors, KEYWORD(parse_keyword_or), symbol_statement);
|
||||
P(nots, KEYWORD(parse_keyword_not), symbol_statement);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_and: {
|
||||
*out_tag = parse_bool_and;
|
||||
return &ands;
|
||||
return ands;
|
||||
}
|
||||
case parse_keyword_or: {
|
||||
*out_tag = parse_bool_or;
|
||||
return &ors;
|
||||
return ors;
|
||||
}
|
||||
case parse_keyword_not: {
|
||||
*out_tag = parse_bool_not;
|
||||
return ¬s;
|
||||
return nots;
|
||||
}
|
||||
default: { return NO_PRODUCTION; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(decorated_statement) {
|
||||
P plains = {symbol_plain_statement};
|
||||
P cmds = {KEYWORD(parse_keyword_command), symbol_plain_statement};
|
||||
P builtins = {KEYWORD(parse_keyword_builtin), symbol_plain_statement};
|
||||
P execs = {KEYWORD(parse_keyword_exec), symbol_plain_statement};
|
||||
P(plains, symbol_plain_statement);
|
||||
P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement);
|
||||
P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement);
|
||||
P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement);
|
||||
|
||||
// If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
|
||||
// second token is not a string, then this is a naked 'command' and we should execute it as
|
||||
// undecorated.
|
||||
if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
|
||||
return &plains;
|
||||
return plains;
|
||||
}
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_command: {
|
||||
*out_tag = parse_statement_decoration_command;
|
||||
return &cmds;
|
||||
return cmds;
|
||||
}
|
||||
case parse_keyword_builtin: {
|
||||
*out_tag = parse_statement_decoration_builtin;
|
||||
return &builtins;
|
||||
return builtins;
|
||||
}
|
||||
case parse_keyword_exec: {
|
||||
*out_tag = parse_statement_decoration_exec;
|
||||
return &execs;
|
||||
return execs;
|
||||
}
|
||||
default: {
|
||||
*out_tag = parse_statement_decoration_none;
|
||||
return &plains;
|
||||
return plains;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(plain_statement) = {parse_token_type_string, symbol_arguments_or_redirections_list};
|
||||
RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list);
|
||||
|
||||
RESOLVE(arguments_or_redirections_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P empty = {};
|
||||
P value = {symbol_argument_or_redirection, symbol_arguments_or_redirections_list};
|
||||
P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string:
|
||||
case parse_token_type_redirection: {
|
||||
return &value;
|
||||
return value;
|
||||
}
|
||||
default: { return ∅ }
|
||||
default: { return empty; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(argument_or_redirection) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
P arg = {symbol_argument};
|
||||
P redir = {symbol_redirection};
|
||||
P(arg, symbol_argument);
|
||||
P(redir, symbol_redirection);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
return &arg;
|
||||
return arg;
|
||||
}
|
||||
case parse_token_type_redirection: {
|
||||
return &redir;
|
||||
return redir;
|
||||
}
|
||||
default: { return NO_PRODUCTION; }
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(argument) = {parse_token_type_string};
|
||||
RESOLVE_ONLY(redirection) = {parse_token_type_redirection, parse_token_type_string};
|
||||
RESOLVE_ONLY(argument, parse_token_type_string);
|
||||
RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string);
|
||||
|
||||
RESOLVE(optional_background) {
|
||||
UNUSED(token2);
|
||||
P empty = {};
|
||||
P background = {parse_token_type_background};
|
||||
P(background, parse_token_type_background);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_background: {
|
||||
*out_tag = parse_background;
|
||||
return &background;
|
||||
return background;
|
||||
}
|
||||
default: {
|
||||
*out_tag = parse_no_background;
|
||||
return ∅
|
||||
return empty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE_ONLY(end_command) = {KEYWORD(parse_keyword_end)};
|
||||
RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end));
|
||||
|
||||
#define TEST(sym) \
|
||||
case (symbol_##sym): \
|
||||
resolver = resolve_##sym; \
|
||||
break;
|
||||
|
||||
const production_t *parse_productions::production_for_token(parse_token_type_t node_type,
|
||||
const parse_token_t &input1,
|
||||
const parse_token_t &input2,
|
||||
parse_node_tag_t *out_tag) {
|
||||
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
|
||||
const parse_token_t &input1,
|
||||
const parse_token_t &input2,
|
||||
parse_node_tag_t *out_tag) {
|
||||
debug(5, "Resolving production for %ls with input token <%ls>\n",
|
||||
token_type_description(node_type), input1.describe().c_str());
|
||||
|
||||
// Fetch the function to resolve the list of productions.
|
||||
const production_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param)
|
||||
const parse_token_t &input2, //!OCLINT(unused param)
|
||||
parse_node_tag_t *out_tag) = NULL; //!OCLINT(unused param)
|
||||
const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param)
|
||||
const parse_token_t &input2, //!OCLINT(unused param)
|
||||
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
|
||||
NULL;
|
||||
switch (node_type) {
|
||||
TEST(job_list)
|
||||
TEST(job)
|
||||
|
@ -498,7 +502,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
|
|||
}
|
||||
PARSE_ASSERT(resolver != NULL);
|
||||
|
||||
const production_t *result = resolver(input1, input2, out_tag);
|
||||
const production_element_t *result = resolver(input1, input2, out_tag);
|
||||
if (result == NULL) {
|
||||
debug(5, "Node type '%ls' has no production for input '%ls' (in %s)\n",
|
||||
token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
|
||||
|
|
|
@ -10,13 +10,10 @@ struct parse_token_t;
|
|||
|
||||
namespace parse_productions {
|
||||
|
||||
#define MAX_SYMBOLS_PER_PRODUCTION 6
|
||||
|
||||
// A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
|
||||
// Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
|
||||
// keywords and symbols.
|
||||
typedef uint8_t production_element_t;
|
||||
typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION];
|
||||
|
||||
/// Resolve the type from a production element.
|
||||
inline parse_token_type_t production_element_type(production_element_t elem) {
|
||||
|
@ -44,8 +41,9 @@ inline bool production_element_is_valid(production_element_t elem) {
|
|||
|
||||
/// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
|
||||
/// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
|
||||
const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1,
|
||||
const parse_token_t &input2, uint8_t *out_tag);
|
||||
const production_element_t *production_for_token(parse_token_type_t node_type,
|
||||
const parse_token_t &input1,
|
||||
const parse_token_t &input2, uint8_t *out_tag);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,53 +20,10 @@
|
|||
#include "tokenizer.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
// This array provides strings for each symbol in enum parse_token_type_t in parse_constants.h.
|
||||
const wchar_t *const token_type_map[] = {
|
||||
L"token_type_invalid",
|
||||
L"symbol_job_list",
|
||||
L"symbol_job",
|
||||
L"symbol_job_continuation",
|
||||
L"symbol_statement",
|
||||
L"symbol_block_statement",
|
||||
L"symbol_block_header",
|
||||
L"symbol_for_header",
|
||||
L"symbol_while_header",
|
||||
L"symbol_begin_header",
|
||||
L"symbol_function_header",
|
||||
L"symbol_if_statement",
|
||||
L"symbol_if_clause",
|
||||
L"symbol_else_clause",
|
||||
L"symbol_else_continuation",
|
||||
L"symbol_switch_statement",
|
||||
L"symbol_case_item_list",
|
||||
L"symbol_case_item",
|
||||
L"symbol_boolean_statement",
|
||||
L"symbol_decorated_statement",
|
||||
L"symbol_plain_statement",
|
||||
L"symbol_arguments_or_redirections_list",
|
||||
L"symbol_argument_or_redirection",
|
||||
L"symbol_andor_job_list",
|
||||
L"symbol_argument_list",
|
||||
L"symbol_freestanding_argument_list",
|
||||
L"symbol_argument",
|
||||
L"symbol_redirection",
|
||||
L"symbol_optional_background",
|
||||
L"symbol_end_command",
|
||||
L"parse_token_type_string",
|
||||
L"parse_token_type_pipe",
|
||||
L"parse_token_type_redirection",
|
||||
L"parse_token_type_background",
|
||||
L"parse_token_type_end",
|
||||
L"parse_token_type_terminate",
|
||||
L"parse_special_type_parse_error",
|
||||
L"parse_special_type_tokenizer_error",
|
||||
L"parse_special_type_comment",
|
||||
};
|
||||
|
||||
using namespace parse_productions;
|
||||
|
||||
static bool production_is_empty(const production_t *production) {
|
||||
return (*production)[0] == token_type_invalid;
|
||||
static bool production_is_empty(const production_element_t *production) {
|
||||
return *production == token_type_invalid;
|
||||
}
|
||||
|
||||
/// Returns a string description of this parse error.
|
||||
|
@ -164,7 +121,8 @@ void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) {
|
|||
|
||||
/// Returns a string description for the given token type.
|
||||
const wchar_t *token_type_description(parse_token_type_t type) {
|
||||
if (type >= 0 && type <= LAST_TOKEN_TYPE) return token_type_map[type];
|
||||
const wchar_t *description = enum_to_str(type, token_enum_map);
|
||||
if (description) return description;
|
||||
|
||||
// This leaks memory but it should never be run unless we have a bug elsewhere in the code.
|
||||
const wcstring d = format_string(L"unknown_token_type_%ld", static_cast<long>(type));
|
||||
|
@ -173,37 +131,9 @@ const wchar_t *token_type_description(parse_token_type_t type) {
|
|||
return std::wcscpy(d2, d.c_str());
|
||||
}
|
||||
|
||||
#define LONGIFY(x) L##x
|
||||
#define KEYWORD_MAP(x) \
|
||||
{ parse_keyword_##x, LONGIFY(#x) }
|
||||
static const struct {
|
||||
const parse_keyword_t keyword;
|
||||
const wchar_t *const name;
|
||||
}
|
||||
keyword_map[] =
|
||||
{
|
||||
// Note that these must be sorted (except for the first), so that we can do binary search.
|
||||
KEYWORD_MAP(none),
|
||||
KEYWORD_MAP(and),
|
||||
KEYWORD_MAP(begin),
|
||||
KEYWORD_MAP(builtin),
|
||||
KEYWORD_MAP(case),
|
||||
KEYWORD_MAP(command),
|
||||
KEYWORD_MAP(else),
|
||||
KEYWORD_MAP(end),
|
||||
KEYWORD_MAP(exec),
|
||||
KEYWORD_MAP(for),
|
||||
KEYWORD_MAP(function),
|
||||
KEYWORD_MAP(if),
|
||||
KEYWORD_MAP(in),
|
||||
KEYWORD_MAP(not),
|
||||
KEYWORD_MAP(or),
|
||||
KEYWORD_MAP(switch),
|
||||
KEYWORD_MAP(while)
|
||||
};
|
||||
|
||||
const wchar_t *keyword_description(parse_keyword_t type) {
|
||||
if (type >= 0 && type <= LAST_KEYWORD) return keyword_map[type].name;
|
||||
const wchar_t *keyword = enum_to_str(type, keyword_enum_map);
|
||||
if (keyword) return keyword;
|
||||
|
||||
// This leaks memory but it should never be run unless we have a bug elsewhere in the code.
|
||||
const wcstring d = format_string(L"unknown_keyword_%ld", static_cast<long>(type));
|
||||
|
@ -487,21 +417,20 @@ class parse_ll_t {
|
|||
}
|
||||
|
||||
/// Pop from the top of the symbol stack, then push the given production, updating node counts.
|
||||
/// Note that production_t has type "pointer to array" so some care is required.
|
||||
inline void symbol_stack_pop_push_production(const production_t *production) {
|
||||
/// Note that production_element_t has type "pointer to array" so some care is required.
|
||||
inline void symbol_stack_pop_push_production(const production_element_t *production) {
|
||||
bool logit = false;
|
||||
if (logit) {
|
||||
size_t count = 0;
|
||||
int count = 0;
|
||||
fprintf(stderr, "Applying production:\n");
|
||||
for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) {
|
||||
production_element_t elem = (*production)[i];
|
||||
if (production_element_is_valid(elem)) {
|
||||
parse_token_type_t type = production_element_type(elem);
|
||||
parse_keyword_t keyword = production_element_keyword(elem);
|
||||
fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type),
|
||||
keyword_description(keyword));
|
||||
count++;
|
||||
}
|
||||
for (int i = 0;; i++) {
|
||||
production_element_t elem = production[i];
|
||||
if (!production_element_is_valid(elem)) break; // all done, bail out
|
||||
parse_token_type_t type = production_element_type(elem);
|
||||
parse_keyword_t keyword = production_element_keyword(elem);
|
||||
fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type),
|
||||
keyword_description(keyword));
|
||||
count++;
|
||||
}
|
||||
if (!count) fprintf(stderr, "\t<empty>\n");
|
||||
}
|
||||
|
@ -522,12 +451,9 @@ class parse_ll_t {
|
|||
representative_child.parent = parent_node_idx;
|
||||
|
||||
node_offset_t child_count = 0;
|
||||
for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) {
|
||||
production_element_t elem = (*production)[i];
|
||||
if (!production_element_is_valid(elem)) {
|
||||
break; // all done, bail out
|
||||
}
|
||||
|
||||
for (int i = 0;; i++) {
|
||||
production_element_t elem = production[i];
|
||||
if (!production_element_is_valid(elem)) break; // all done, bail out
|
||||
// Append the parse node.
|
||||
representative_child.type = production_element_type(elem);
|
||||
nodes.push_back(representative_child);
|
||||
|
@ -550,7 +476,7 @@ class parse_ll_t {
|
|||
symbol_stack.reserve(symbol_stack.size() + child_count);
|
||||
node_offset_t idx = child_count;
|
||||
while (idx--) {
|
||||
production_element_t elem = (*production)[idx];
|
||||
production_element_t elem = production[idx];
|
||||
PARSE_ASSERT(production_element_is_valid(elem));
|
||||
symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx));
|
||||
}
|
||||
|
@ -1053,7 +979,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) {
|
|||
parse_stack_element_t &stack_elem = symbol_stack.back();
|
||||
parse_node_t &node = nodes.at(stack_elem.node_idx);
|
||||
parse_node_tag_t tag = 0;
|
||||
const production_t *production =
|
||||
const production_element_t *production =
|
||||
production_for_token(stack_elem.type, token1, token2, &tag);
|
||||
node.tag = tag;
|
||||
if (production == NULL) {
|
||||
|
@ -1088,23 +1014,8 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) {
|
|||
}
|
||||
|
||||
// Given an expanded string, returns any keyword it matches.
|
||||
static parse_keyword_t keyword_with_name(const wchar_t *name) {
|
||||
// Binary search on keyword_map. Start at 1 since 0 is keyword_none.
|
||||
parse_keyword_t result = parse_keyword_none;
|
||||
size_t left = 1, right = sizeof keyword_map / sizeof *keyword_map;
|
||||
while (left < right) {
|
||||
size_t mid = left + (right - left) / 2;
|
||||
int cmp = wcscmp(name, keyword_map[mid].name);
|
||||
if (cmp < 0) {
|
||||
right = mid; // name was smaller than mid
|
||||
} else if (cmp > 0) {
|
||||
left = mid + 1; // name was larger than mid
|
||||
} else {
|
||||
result = keyword_map[mid].keyword; // found it
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
static inline parse_keyword_t keyword_with_name(const wchar_t *name) {
|
||||
return str_to_enum(name, keyword_enum_map, keyword_enum_map_len);
|
||||
}
|
||||
|
||||
static bool is_keyword_char(wchar_t c) {
|
||||
|
|
Loading…
Reference in a new issue