From 5eb1ef4b4ad41ce45fcb37b1d18ab9dfb41b9ae7 Mon Sep 17 00:00:00 2001
From: Kurtis Rader <krader@skepticism.us>
Date: Wed, 9 Nov 2016 21:37:49 -0800
Subject: [PATCH] use enum_map for parser enums

This simplifies the parsing code slightly and makes it more consistent
with how we deal with enums in the *builtin.cpp* module.
---
 src/parse_constants.h     | 100 ++++++++++----
 src/parse_productions.cpp | 272 +++++++++++++++++++-------------------
 src/parse_productions.h   |   8 +-
 src/parse_tree.cpp        | 137 ++++---------------
 4 files changed, 237 insertions(+), 280 deletions(-)

diff --git a/src/parse_constants.h b/src/parse_constants.h
index 2ee6287b7..84a3f9828 100644
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@@ -2,6 +2,7 @@
 #ifndef FISH_PARSE_CONSTANTS_H
 #define FISH_PARSE_CONSTANTS_H
 
+#include "common.h"
 #include "config.h"
 
 #define PARSE_ASSERT(a) assert(a)
@@ -11,11 +12,9 @@
         exit_without_destructors(-1);       \
     } while (0)
 
-// IMPORTANT: If the following enum is modified you must update the corresponding parser_token_types
-// array in parse_tree.cpp.
+// IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
 enum parse_token_type_t {
-    token_type_invalid,
-
+    token_type_invalid = 1,
     // Non-terminal tokens
     symbol_job_list,
     symbol_job,
@@ -27,71 +26,97 @@ enum parse_token_type_t {
     symbol_while_header,
     symbol_begin_header,
     symbol_function_header,
-
     symbol_if_statement,
     symbol_if_clause,
     symbol_else_clause,
     symbol_else_continuation,
-
     symbol_switch_statement,
     symbol_case_item_list,
     symbol_case_item,
-
     symbol_boolean_statement,
     symbol_decorated_statement,
     symbol_plain_statement,
     symbol_arguments_or_redirections_list,
     symbol_argument_or_redirection,
-
     symbol_andor_job_list,
-
     symbol_argument_list,
-
-    // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'
+    // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
     // They are not generated by parse trees rooted in symbol_job_list.
     symbol_freestanding_argument_list,
-
     symbol_argument,
     symbol_redirection,
-
     symbol_optional_background,
-
     symbol_end_command,
-
     // Terminal types.
     parse_token_type_string,
     parse_token_type_pipe,
     parse_token_type_redirection,
     parse_token_type_background,
     parse_token_type_end,
-
     // Special terminal type that means no more tokens forthcoming.
     parse_token_type_terminate,
-
     // Very special terminal types that don't appear in the production list.
     parse_special_type_parse_error,
     parse_special_type_tokenizer_error,
     parse_special_type_comment,
-    LAST_TOKEN_TYPE = parse_special_type_comment,
 
+    LAST_TOKEN_TYPE = parse_special_type_comment,
     FIRST_TERMINAL_TYPE = parse_token_type_string,
     LAST_TERMINAL_TYPE = parse_token_type_terminate,
-
     LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
-
     FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
     LAST_PARSE_TOKEN_TYPE = parse_token_type_end
 } __packed;
-// Array of strings corresponding to the enums above instantiated in parse_tree.cpp.
-extern const wchar_t *const parser_token_types[];
 
-// These must be maintained in sorted order (except for none, which isn't a keyword). This enables
-// us to do binary search.
+const enum_map<parse_token_type_t> token_enum_map[] = {
+    {parse_special_type_comment, L"parse_special_type_comment"},
+    {parse_special_type_parse_error, L"parse_special_type_parse_error"},
+    {parse_special_type_tokenizer_error, L"parse_special_type_tokenizer_error"},
+    {parse_token_type_background, L"parse_token_type_background"},
+    {parse_token_type_end, L"parse_token_type_end"},
+    {parse_token_type_pipe, L"parse_token_type_pipe"},
+    {parse_token_type_redirection, L"parse_token_type_redirection"},
+    {parse_token_type_string, L"parse_token_type_string"},
+    {parse_token_type_terminate, L"parse_token_type_terminate"},
+    {symbol_andor_job_list, L"symbol_andor_job_list"},
+    {symbol_argument, L"symbol_argument"},
+    {symbol_argument_list, L"symbol_argument_list"},
+    {symbol_argument_or_redirection, L"symbol_argument_or_redirection"},
+    {symbol_arguments_or_redirections_list, L"symbol_arguments_or_redirections_list"},
+    {symbol_begin_header, L"symbol_begin_header"},
+    {symbol_block_header, L"symbol_block_header"},
+    {symbol_block_statement, L"symbol_block_statement"},
+    {symbol_boolean_statement, L"symbol_boolean_statement"},
+    {symbol_case_item, L"symbol_case_item"},
+    {symbol_case_item_list, L"symbol_case_item_list"},
+    {symbol_decorated_statement, L"symbol_decorated_statement"},
+    {symbol_else_clause, L"symbol_else_clause"},
+    {symbol_else_continuation, L"symbol_else_continuation"},
+    {symbol_end_command, L"symbol_end_command"},
+    {symbol_for_header, L"symbol_for_header"},
+    {symbol_freestanding_argument_list, L"symbol_freestanding_argument_list"},
+    {symbol_function_header, L"symbol_function_header"},
+    {symbol_if_clause, L"symbol_if_clause"},
+    {symbol_if_statement, L"symbol_if_statement"},
+    {symbol_job, L"symbol_job"},
+    {symbol_job_continuation, L"symbol_job_continuation"},
+    {symbol_job_list, L"symbol_job_list"},
+    {symbol_optional_background, L"symbol_optional_background"},
+    {symbol_plain_statement, L"symbol_plain_statement"},
+    {symbol_redirection, L"symbol_redirection"},
+    {symbol_statement, L"symbol_statement"},
+    {symbol_switch_statement, L"symbol_switch_statement"},
+    {symbol_while_header, L"symbol_while_header"},
+    {token_type_invalid, L"token_type_invalid"},
+    {token_type_invalid, NULL}};
+#define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
+
+// IMPORTANT: If the following enum is modified you must update the corresponding keyword_enum_map
+// array below.
 //
-// IMPORTANT: If the following enum is modified you must update the corresponding keyword_map array
-// in parse_tree.cpp.
+// IMPORTANT: These enums must start at zero.
 enum parse_keyword_t {
-    parse_keyword_none,
+    parse_keyword_none = 0,
     parse_keyword_and,
     parse_keyword_begin,
     parse_keyword_builtin,
@@ -108,9 +133,28 @@ enum parse_keyword_t {
     parse_keyword_or,
     parse_keyword_switch,
     parse_keyword_while,
-    LAST_KEYWORD = parse_keyword_while
 } __packed;
 
+const enum_map<parse_keyword_t> keyword_enum_map[] = {
+    {parse_keyword_and, L"and"},
+    {parse_keyword_begin, L"begin"},
+    {parse_keyword_builtin, L"builtin"},
+    {parse_keyword_case, L"case"},
+    {parse_keyword_command, L"command"},
+    {parse_keyword_else, L"else"},
+    {parse_keyword_end, L"end"},
+    {parse_keyword_exec, L"exec"},
+    {parse_keyword_for, L"for"},
+    {parse_keyword_function, L"function"},
+    {parse_keyword_if, L"if"},
+    {parse_keyword_in, L"in"},
+    {parse_keyword_not, L"not"},
+    {parse_keyword_or, L"or"},
+    {parse_keyword_switch, L"switch"},
+    {parse_keyword_while, L"while"},
+    {parse_keyword_none, NULL}};
+#define keyword_enum_map_len (sizeof keyword_enum_map / sizeof *keyword_enum_map)
+
 // Node tag values.
 
 // Statement decorations, stored in node tag.
diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp
index b3d976905..dbbd84610 100644
--- a/src/parse_productions.cpp
+++ b/src/parse_productions.cpp
@@ -21,34 +21,41 @@ using namespace parse_productions;
 // Productions are generally a static const array, and we return a pointer to the array (yes,
 // really).
 
-#define RESOLVE(sym)                          \
-    static const production_t *resolve_##sym( \
+#define RESOLVE(sym)                                  \
+    static const production_element_t *resolve_##sym( \
         const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
 
-// Hacktastic?
-#define RESOLVE_ONLY(sym)                                                                      \
-    extern const production_t sym##_only;                                                      \
-    static const production_t *resolve_##sym(                                                  \
+// This is a shorthand for symbols which always resolve to the same production sequence. Using this
+// avoids repeating a lot of boilerplate code below.
+#define RESOLVE_ONLY(sym, tokens...)                                                           \
+    extern const production_element_t sym##_only[];                                            \
+    static const production_element_t *resolve_##sym(                                          \
         const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \
         UNUSED(token1);                                                                        \
         UNUSED(token2);                                                                        \
         UNUSED(out_tag);                                                                       \
-        return &sym##_only;                                                                    \
+        return sym##_only;                                                                     \
     }                                                                                          \
-    const production_t sym##_only
+    const production_element_t sym##_only[] = {tokens, token_type_invalid}
 
-#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
+// Convert a parse_keyword_t enum to a parse_token_type_t enum.
+#define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1)
 
-/// Helper macro to define an array.
-#define P static const production_t
+/// Helper macro to define a production sequence. Note that such sequences must always end with
+/// enum `token_type_invalid`.
+#define P(production_name, tokens...) \
+    static const production_element_t production_name[] = {tokens, token_type_invalid}
+
+/// The empty production is used often enough it's worth definining once at module scope.
+static const production_element_t empty[] = {token_type_invalid};
 
 /// A job_list is a list of jobs, separated by semicolons or newlines.
 RESOLVE(job_list) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P list_end = {};
-    P normal = {symbol_job, symbol_job_list};
-    P empty_line = {parse_token_type_end, symbol_job_list};
+    P(normal, symbol_job, symbol_job_list);
+    P(empty_line, parse_token_type_end, symbol_job_list);
+
     switch (token1.type) {
         case parse_token_type_string: {
             // Some keywords are special.
@@ -56,23 +63,23 @@ RESOLVE(job_list) {
                 case parse_keyword_end:
                 case parse_keyword_else:
                 case parse_keyword_case: {
-                    return &list_end;  // end this job list
+                    return empty;  // end this job list
                 }
                 default: {
-                    return &normal;  // normal string
+                    return normal;  // normal string
                 }
             }
         }
         case parse_token_type_pipe:
         case parse_token_type_redirection:
         case parse_token_type_background: {
-            return &normal;
+            return normal;
         }
         case parse_token_type_end: {
-            return &empty_line;
+            return empty_line;
         }
         case parse_token_type_terminate: {
-            return &list_end;  // no more commands, just transition to empty
+            return empty;  // no more commands, just transition to empty
         }
         default: { return NO_PRODUCTION; }
     }
@@ -81,20 +88,19 @@ RESOLVE(job_list) {
 // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like
 // if statements, where we require a command). To represent "non-empty", we require a statement,
 // followed by a possibly empty job_continuation.
-
-RESOLVE_ONLY(job) = {symbol_statement, symbol_job_continuation, symbol_optional_background};
+RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background);
 
 RESOLVE(job_continuation) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P piped = {parse_token_type_pipe, symbol_statement, symbol_job_continuation};
+    P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation);
+
     switch (token1.type) {
         case parse_token_type_pipe: {
-            return &piped;  // pipe, continuation
+            return piped;  // pipe, continuation
         }
         default: {
-            return &empty;  // not a pipe, no job continuation
+            return empty;  // not a pipe, no job continuation
         }
     }
 }
@@ -102,11 +108,12 @@ RESOLVE(job_continuation) {
 // A statement is a normal command, or an if / while / and etc.
 RESOLVE(statement) {
     UNUSED(out_tag);
-    P boolean = {symbol_boolean_statement};
-    P block = {symbol_block_statement};
-    P ifs = {symbol_if_statement};
-    P switchs = {symbol_switch_statement};
-    P decorated = {symbol_decorated_statement};
+    P(boolean, symbol_boolean_statement);
+    P(block, symbol_block_statement);
+    P(ifs, symbol_if_statement);
+    P(switchs, symbol_switch_statement);
+    P(decorated, symbol_decorated_statement);
+
     // The only block-like builtin that takes any parameters is 'function' So go to decorated
     // statements if the subsequent token looks like '--'. The logic here is subtle:
     //
@@ -118,9 +125,9 @@ RESOLVE(statement) {
         // If we are a function, then look for help arguments. Otherwise, if the next token looks
         // like an option (starts with a dash), then parse it as a decorated statement.
         if (token1.keyword == parse_keyword_function && token2.is_help_argument) {
-            return &decorated;
+            return decorated;
         } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) {
-            return &decorated;
+            return decorated;
         }
 
         // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
@@ -129,7 +136,7 @@ RESOLVE(statement) {
             (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end);
         if (naked_invocation_invokes_help &&
             (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
-            return &decorated;
+            return decorated;
         }
     }
 
@@ -139,28 +146,28 @@ RESOLVE(statement) {
                 case parse_keyword_and:
                 case parse_keyword_or:
                 case parse_keyword_not: {
-                    return &boolean;
+                    return boolean;
                 }
                 case parse_keyword_for:
                 case parse_keyword_while:
                 case parse_keyword_function:
                 case parse_keyword_begin: {
-                    return &block;
+                    return block;
                 }
                 case parse_keyword_if: {
-                    return &ifs;
+                    return ifs;
                 }
                 case parse_keyword_else: {
                     return NO_PRODUCTION;
                 }
                 case parse_keyword_switch: {
-                    return &switchs;
+                    return switchs;
                 }
                 case parse_keyword_end: {
                     return NO_PRODUCTION;
                 }
                 // All other keywords fall through to decorated statement.
-                default: { return &decorated; }
+                default: { return decorated; }
             }
             break;
         }
@@ -169,277 +176,274 @@ RESOLVE(statement) {
         case parse_token_type_background:
         case parse_token_type_terminate: {
             return NO_PRODUCTION;
-            // parse_error(L"statement", token);
         }
         default: { return NO_PRODUCTION; }
     }
 }
 
-RESOLVE_ONLY(if_statement) = {symbol_if_clause, symbol_else_clause, symbol_end_command,
-                              symbol_arguments_or_redirections_list};
-RESOLVE_ONLY(if_clause) = {KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end,
-                           symbol_andor_job_list, symbol_job_list};
+RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command,
+             symbol_arguments_or_redirections_list);
+RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end,
+             symbol_andor_job_list, symbol_job_list);
 
 RESOLVE(else_clause) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P else_cont = {KEYWORD(parse_keyword_else), symbol_else_continuation};
+    P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation);
+
     switch (token1.keyword) {
         case parse_keyword_else: {
-            return &else_cont;
+            return else_cont;
         }
-        default: { return &empty; }
+        default: { return empty; }
     }
 }
 
 RESOLVE(else_continuation) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P elseif = {symbol_if_clause, symbol_else_clause};
-    P elseonly = {parse_token_type_end, symbol_job_list};
+    P(elseif, symbol_if_clause, symbol_else_clause);
+    P(elseonly, parse_token_type_end, symbol_job_list);
 
     switch (token1.keyword) {
         case parse_keyword_if: {
-            return &elseif;
+            return elseif;
         }
-        default: { return &elseonly; }
+        default: { return elseonly; }
     }
 }
 
-RESOLVE_ONLY(switch_statement) = {
-    KEYWORD(parse_keyword_switch), symbol_argument,    parse_token_type_end,
-    symbol_case_item_list,         symbol_end_command, symbol_arguments_or_redirections_list};
+RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end,
+             symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list);
 
 RESOLVE(case_item_list) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P case_item = {symbol_case_item, symbol_case_item_list};
-    P blank_line = {parse_token_type_end, symbol_case_item_list};
+    P(case_item, symbol_case_item, symbol_case_item_list);
+    P(blank_line, parse_token_type_end, symbol_case_item_list);
+
     if (token1.keyword == parse_keyword_case)
-        return &case_item;
+        return case_item;
     else if (token1.type == parse_token_type_end)
-        return &blank_line;
+        return blank_line;
     else
-        return &empty;
+        return empty;
 }
 
-RESOLVE_ONLY(case_item) = {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end,
-                           symbol_job_list};
+RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end,
+             symbol_job_list);
 
 RESOLVE(andor_job_list) {
     UNUSED(out_tag);
-    P list_end = {};
-    P andor_job = {symbol_job, symbol_andor_job_list};
-    P empty_line = {parse_token_type_end, symbol_andor_job_list};
+    P(andor_job, symbol_job, symbol_andor_job_list);
+    P(empty_line, parse_token_type_end, symbol_andor_job_list);
 
     if (token1.type == parse_token_type_end) {
-        return &empty_line;
+        return empty_line;
     } else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) {
         // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
         // --help' or a naked 'and', and not part of this list.
         if (token2.type == parse_token_type_string && !token2.is_help_argument) {
-            return &andor_job;
+            return andor_job;
         }
     }
     // All other cases end the list.
-    return &list_end;
+    return empty;
 }
 
 RESOLVE(argument_list) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P arg = {symbol_argument, symbol_argument_list};
+    P(arg, symbol_argument, symbol_argument_list);
     switch (token1.type) {
         case parse_token_type_string: {
-            return &arg;
+            return arg;
         }
-        default: { return &empty; }
+        default: { return empty; }
     }
 }
 
 RESOLVE(freestanding_argument_list) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P arg = {symbol_argument, symbol_freestanding_argument_list};
-    P semicolon = {parse_token_type_end, symbol_freestanding_argument_list};
+    P(arg, symbol_argument, symbol_freestanding_argument_list);
+    P(semicolon, parse_token_type_end, symbol_freestanding_argument_list);
 
     switch (token1.type) {
         case parse_token_type_string: {
-            return &arg;
+            return arg;
         }
         case parse_token_type_end: {
-            return &semicolon;
+            return semicolon;
         }
-        default: { return &empty; }
+        default: { return empty; }
     }
 }
 
-RESOLVE_ONLY(block_statement) = {symbol_block_header, symbol_job_list, symbol_end_command,
-                                 symbol_arguments_or_redirections_list};
+RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command,
+             symbol_arguments_or_redirections_list);
 
 RESOLVE(block_header) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P forh = {symbol_for_header};
-    P whileh = {symbol_while_header};
-    P funch = {symbol_function_header};
-    P beginh = {symbol_begin_header};
+    P(forh, symbol_for_header);
+    P(whileh, symbol_while_header);
+    P(funch, symbol_function_header);
+    P(beginh, symbol_begin_header);
 
     switch (token1.keyword) {
         case parse_keyword_for: {
-            return &forh;
+            return forh;
         }
         case parse_keyword_while: {
-            return &whileh;
+            return whileh;
         }
         case parse_keyword_function: {
-            return &funch;
+            return funch;
         }
         case parse_keyword_begin: {
-            return &beginh;
+            return beginh;
         }
         default: { return NO_PRODUCTION; }
     }
 }
 
-RESOLVE_ONLY(for_header) = {KEYWORD(parse_keyword_for), parse_token_type_string,
-                            KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end};
-RESOLVE_ONLY(while_header) = {KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end,
-                              symbol_andor_job_list};
-RESOLVE_ONLY(begin_header) = {KEYWORD(parse_keyword_begin)};
-RESOLVE_ONLY(function_header) = {KEYWORD(parse_keyword_function), symbol_argument,
-                                 symbol_argument_list, parse_token_type_end};
+RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string,
+             KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end);
+RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end,
+             symbol_andor_job_list);
+RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin));
+RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument,
+             symbol_argument_list, parse_token_type_end);
 
 // A boolean statement is AND or OR or NOT.
 RESOLVE(boolean_statement) {
     UNUSED(token2);
-    P ands = {KEYWORD(parse_keyword_and), symbol_statement};
-    P ors = {KEYWORD(parse_keyword_or), symbol_statement};
-    P nots = {KEYWORD(parse_keyword_not), symbol_statement};
+    P(ands, KEYWORD(parse_keyword_and), symbol_statement);
+    P(ors, KEYWORD(parse_keyword_or), symbol_statement);
+    P(nots, KEYWORD(parse_keyword_not), symbol_statement);
 
     switch (token1.keyword) {
         case parse_keyword_and: {
             *out_tag = parse_bool_and;
-            return &ands;
+            return ands;
         }
         case parse_keyword_or: {
             *out_tag = parse_bool_or;
-            return &ors;
+            return ors;
         }
         case parse_keyword_not: {
             *out_tag = parse_bool_not;
-            return &nots;
+            return nots;
         }
         default: { return NO_PRODUCTION; }
     }
 }
 
 RESOLVE(decorated_statement) {
-    P plains = {symbol_plain_statement};
-    P cmds = {KEYWORD(parse_keyword_command), symbol_plain_statement};
-    P builtins = {KEYWORD(parse_keyword_builtin), symbol_plain_statement};
-    P execs = {KEYWORD(parse_keyword_exec), symbol_plain_statement};
+    P(plains, symbol_plain_statement);
+    P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement);
+    P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement);
+    P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement);
 
     // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
     // second token is not a string, then this is a naked 'command' and we should execute it as
     // undecorated.
     if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
-        return &plains;
+        return plains;
     }
 
     switch (token1.keyword) {
         case parse_keyword_command: {
             *out_tag = parse_statement_decoration_command;
-            return &cmds;
+            return cmds;
         }
         case parse_keyword_builtin: {
             *out_tag = parse_statement_decoration_builtin;
-            return &builtins;
+            return builtins;
         }
         case parse_keyword_exec: {
             *out_tag = parse_statement_decoration_exec;
-            return &execs;
+            return execs;
         }
         default: {
             *out_tag = parse_statement_decoration_none;
-            return &plains;
+            return plains;
         }
     }
 }
 
-RESOLVE_ONLY(plain_statement) = {parse_token_type_string, symbol_arguments_or_redirections_list};
+RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list);
 
 RESOLVE(arguments_or_redirections_list) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P empty = {};
-    P value = {symbol_argument_or_redirection, symbol_arguments_or_redirections_list};
+    P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list);
+
     switch (token1.type) {
         case parse_token_type_string:
         case parse_token_type_redirection: {
-            return &value;
+            return value;
         }
-        default: { return &empty; }
+        default: { return empty; }
     }
 }
 
 RESOLVE(argument_or_redirection) {
     UNUSED(token2);
     UNUSED(out_tag);
-    P arg = {symbol_argument};
-    P redir = {symbol_redirection};
+    P(arg, symbol_argument);
+    P(redir, symbol_redirection);
+
     switch (token1.type) {
         case parse_token_type_string: {
-            return &arg;
+            return arg;
         }
         case parse_token_type_redirection: {
-            return &redir;
+            return redir;
         }
         default: { return NO_PRODUCTION; }
     }
 }
 
-RESOLVE_ONLY(argument) = {parse_token_type_string};
-RESOLVE_ONLY(redirection) = {parse_token_type_redirection, parse_token_type_string};
+RESOLVE_ONLY(argument, parse_token_type_string);
+RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string);
 
 RESOLVE(optional_background) {
     UNUSED(token2);
-    P empty = {};
-    P background = {parse_token_type_background};
+    P(background, parse_token_type_background);
+
     switch (token1.type) {
         case parse_token_type_background: {
             *out_tag = parse_background;
-            return &background;
+            return background;
         }
         default: {
             *out_tag = parse_no_background;
-            return &empty;
+            return empty;
         }
     }
 }
 
-RESOLVE_ONLY(end_command) = {KEYWORD(parse_keyword_end)};
+RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end));
 
 #define TEST(sym)                 \
     case (symbol_##sym):          \
         resolver = resolve_##sym; \
         break;
 
-const production_t *parse_productions::production_for_token(parse_token_type_t node_type,
-                                                            const parse_token_t &input1,
-                                                            const parse_token_t &input2,
-                                                            parse_node_tag_t *out_tag) {
+const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
+                                                                    const parse_token_t &input1,
+                                                                    const parse_token_t &input2,
+                                                                    parse_node_tag_t *out_tag) {
     debug(5, "Resolving production for %ls with input token <%ls>\n",
           token_type_description(node_type), input1.describe().c_str());
 
     // Fetch the function to resolve the list of productions.
-    const production_t *(*resolver)(const parse_token_t &input1,        //!OCLINT(unused param)
-                                    const parse_token_t &input2,        //!OCLINT(unused param)
-                                    parse_node_tag_t *out_tag) = NULL;  //!OCLINT(unused param)
+    const production_element_t *(*resolver)(const parse_token_t &input1,  //!OCLINT(unused param)
+                                            const parse_token_t &input2,  //!OCLINT(unused param)
+                                            parse_node_tag_t *out_tag) =  //!OCLINT(unused param)
+        NULL;
     switch (node_type) {
         TEST(job_list)
         TEST(job)
@@ -498,7 +502,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
     }
     PARSE_ASSERT(resolver != NULL);
 
-    const production_t *result = resolver(input1, input2, out_tag);
+    const production_element_t *result = resolver(input1, input2, out_tag);
     if (result == NULL) {
         debug(5, "Node type '%ls' has no production for input '%ls' (in %s)\n",
               token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
diff --git a/src/parse_productions.h b/src/parse_productions.h
index 3be0cbaf4..ce1589ebb 100644
--- a/src/parse_productions.h
+++ b/src/parse_productions.h
@@ -10,13 +10,10 @@ struct parse_token_t;
 
 namespace parse_productions {
 
-#define MAX_SYMBOLS_PER_PRODUCTION 6
-
 // A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
 // Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
 // keywords and symbols.
 typedef uint8_t production_element_t;
-typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION];
 
 /// Resolve the type from a production element.
 inline parse_token_type_t production_element_type(production_element_t elem) {
@@ -44,8 +41,9 @@ inline bool production_element_is_valid(production_element_t elem) {
 
 /// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
 /// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
-const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1,
-                                         const parse_token_t &input2, uint8_t *out_tag);
+const production_element_t *production_for_token(parse_token_type_t node_type,
+                                                 const parse_token_t &input1,
+                                                 const parse_token_t &input2, uint8_t *out_tag);
 }
 
 #endif
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index 3a0ceb168..effe42091 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -20,53 +20,10 @@
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 
-// This array provides strings for each symbol in enum parse_token_type_t in parse_constants.h.
-const wchar_t *const token_type_map[] = {
-    L"token_type_invalid",
-    L"symbol_job_list",
-    L"symbol_job",
-    L"symbol_job_continuation",
-    L"symbol_statement",
-    L"symbol_block_statement",
-    L"symbol_block_header",
-    L"symbol_for_header",
-    L"symbol_while_header",
-    L"symbol_begin_header",
-    L"symbol_function_header",
-    L"symbol_if_statement",
-    L"symbol_if_clause",
-    L"symbol_else_clause",
-    L"symbol_else_continuation",
-    L"symbol_switch_statement",
-    L"symbol_case_item_list",
-    L"symbol_case_item",
-    L"symbol_boolean_statement",
-    L"symbol_decorated_statement",
-    L"symbol_plain_statement",
-    L"symbol_arguments_or_redirections_list",
-    L"symbol_argument_or_redirection",
-    L"symbol_andor_job_list",
-    L"symbol_argument_list",
-    L"symbol_freestanding_argument_list",
-    L"symbol_argument",
-    L"symbol_redirection",
-    L"symbol_optional_background",
-    L"symbol_end_command",
-    L"parse_token_type_string",
-    L"parse_token_type_pipe",
-    L"parse_token_type_redirection",
-    L"parse_token_type_background",
-    L"parse_token_type_end",
-    L"parse_token_type_terminate",
-    L"parse_special_type_parse_error",
-    L"parse_special_type_tokenizer_error",
-    L"parse_special_type_comment",
-};
-
 using namespace parse_productions;
 
-static bool production_is_empty(const production_t *production) {
-    return (*production)[0] == token_type_invalid;
+static bool production_is_empty(const production_element_t *production) {
+    return *production == token_type_invalid;
 }
 
 /// Returns a string description of this parse error.
@@ -164,7 +121,8 @@ void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) {
 
 /// Returns a string description for the given token type.
 const wchar_t *token_type_description(parse_token_type_t type) {
-    if (type >= 0 && type <= LAST_TOKEN_TYPE) return token_type_map[type];
+    const wchar_t *description = enum_to_str(type, token_enum_map);
+    if (description) return description;
 
     // This leaks memory but it should never be run unless we have a bug elsewhere in the code.
     const wcstring d = format_string(L"unknown_token_type_%ld", static_cast<long>(type));
@@ -173,37 +131,9 @@ const wchar_t *token_type_description(parse_token_type_t type) {
     return std::wcscpy(d2, d.c_str());
 }
 
-#define LONGIFY(x) L##x
-#define KEYWORD_MAP(x) \
-    { parse_keyword_##x, LONGIFY(#x) }
-static const struct {
-    const parse_keyword_t keyword;
-    const wchar_t *const name;
-}
-keyword_map[] =
-{
-    // Note that these must be sorted (except for the first), so that we can do binary search.
-    KEYWORD_MAP(none),
-    KEYWORD_MAP(and),
-    KEYWORD_MAP(begin),
-    KEYWORD_MAP(builtin),
-    KEYWORD_MAP(case),
-    KEYWORD_MAP(command),
-    KEYWORD_MAP(else),
-    KEYWORD_MAP(end),
-    KEYWORD_MAP(exec),
-    KEYWORD_MAP(for),
-    KEYWORD_MAP(function),
-    KEYWORD_MAP(if),
-    KEYWORD_MAP(in),
-    KEYWORD_MAP(not),
-    KEYWORD_MAP(or),
-    KEYWORD_MAP(switch),
-    KEYWORD_MAP(while)
-};
-
 const wchar_t *keyword_description(parse_keyword_t type) {
-    if (type >= 0 && type <= LAST_KEYWORD) return keyword_map[type].name;
+    const wchar_t *keyword = enum_to_str(type, keyword_enum_map);
+    if (keyword) return keyword;
 
     // This leaks memory but it should never be run unless we have a bug elsewhere in the code.
     const wcstring d = format_string(L"unknown_keyword_%ld", static_cast<long>(type));
@@ -487,21 +417,20 @@ class parse_ll_t {
     }
 
     /// Pop from the top of the symbol stack, then push the given production, updating node counts.
-    /// Note that production_t has type "pointer to array" so some care is required.
-    inline void symbol_stack_pop_push_production(const production_t *production) {
+    /// Note that production_element_t has type "pointer to array" so some care is required.
+    inline void symbol_stack_pop_push_production(const production_element_t *production) {
         bool logit = false;
         if (logit) {
-            size_t count = 0;
+            int count = 0;
             fprintf(stderr, "Applying production:\n");
-            for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) {
-                production_element_t elem = (*production)[i];
-                if (production_element_is_valid(elem)) {
-                    parse_token_type_t type = production_element_type(elem);
-                    parse_keyword_t keyword = production_element_keyword(elem);
-                    fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type),
-                            keyword_description(keyword));
-                    count++;
-                }
+            for (int i = 0;; i++) {
+                production_element_t elem = production[i];
+                if (!production_element_is_valid(elem)) break;  // all done, bail out
+                parse_token_type_t type = production_element_type(elem);
+                parse_keyword_t keyword = production_element_keyword(elem);
+                fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type),
+                        keyword_description(keyword));
+                count++;
             }
             if (!count) fprintf(stderr, "\t<empty>\n");
         }
@@ -522,12 +451,9 @@ class parse_ll_t {
         representative_child.parent = parent_node_idx;
 
         node_offset_t child_count = 0;
-        for (size_t i = 0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) {
-            production_element_t elem = (*production)[i];
-            if (!production_element_is_valid(elem)) {
-                break;  // all done, bail out
-            }
-
+        for (int i = 0;; i++) {
+            production_element_t elem = production[i];
+            if (!production_element_is_valid(elem)) break;  // all done, bail out
             // Append the parse node.
             representative_child.type = production_element_type(elem);
             nodes.push_back(representative_child);
@@ -550,7 +476,7 @@ class parse_ll_t {
         symbol_stack.reserve(symbol_stack.size() + child_count);
         node_offset_t idx = child_count;
         while (idx--) {
-            production_element_t elem = (*production)[idx];
+            production_element_t elem = production[idx];
             PARSE_ASSERT(production_element_is_valid(elem));
             symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx));
         }
@@ -1053,7 +979,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) {
         parse_stack_element_t &stack_elem = symbol_stack.back();
         parse_node_t &node = nodes.at(stack_elem.node_idx);
         parse_node_tag_t tag = 0;
-        const production_t *production =
+        const production_element_t *production =
             production_for_token(stack_elem.type, token1, token2, &tag);
         node.tag = tag;
         if (production == NULL) {
@@ -1088,23 +1014,8 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) {
 }
 
 // Given an expanded string, returns any keyword it matches.
-static parse_keyword_t keyword_with_name(const wchar_t *name) {
-    // Binary search on keyword_map. Start at 1 since 0 is keyword_none.
-    parse_keyword_t result = parse_keyword_none;
-    size_t left = 1, right = sizeof keyword_map / sizeof *keyword_map;
-    while (left < right) {
-        size_t mid = left + (right - left) / 2;
-        int cmp = wcscmp(name, keyword_map[mid].name);
-        if (cmp < 0) {
-            right = mid;  // name was smaller than mid
-        } else if (cmp > 0) {
-            left = mid + 1;  // name was larger than mid
-        } else {
-            result = keyword_map[mid].keyword;  // found it
-            break;
-        }
-    }
-    return result;
+static inline parse_keyword_t keyword_with_name(const wchar_t *name) {
+    return str_to_enum(name, keyword_enum_map, keyword_enum_map_len);
 }
 
 static bool is_keyword_char(wchar_t c) {