More work on new parser

2024-12-27 05:13:10 +00:00 · 2013-08-11 00:35:00 -07:00 · 2013-08-11 00:35:00 -07:00 · e58b73179f
commit e58b73179f
parent 8e07e55c1f
6 changed files with 532 additions and 215 deletions
--- a/builtin.cpp
+++ b/builtin.cpp
@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
        }
        else
        {
-            if (0) {
+            const wcstring dump = parse_dump_tree(parse_tree, src);
            fprintf(stderr, "%ls", dump.c_str());
            if (0)
            {
                parse_execution_context_t ctx(parse_tree, src);
                parse_execution_simulator_t sim;
                sim.context = &ctx;
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -1816,7 +1816,10 @@ static void test_new_parser_correctness(void)
        {L"if true ; end", true},
        {L"if true; end ; end", false},
        {L"if end; end ; end", false},
-        {L"end", false}
+        {L"if end", false},
        {L"end", false},
        {L"for i i", false},
        {L"for i in a b c ; end", true}
    };
    for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void)
        }
    }
    say(L"Parse tests complete");
 }
 struct parser_fuzz_token_t
 {
    parse_token_type_t token_type;
    parse_keyword_t keyword;
    parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
    {
    }
 };
 static bool increment(std::vector<parser_fuzz_token_t> &tokens)
 {
    size_t i, end = tokens.size();
    for (i=0; i < end; i++)
    {
        bool wrapped = false;
        struct parser_fuzz_token_t &token = tokens[i];
        bool incremented_in_keyword = false;
        if (token.token_type == parse_token_type_string)
        {
            // try incrementing the keyword
            token.keyword++;
            if (token.keyword <= LAST_KEYWORD)
            {
                incremented_in_keyword = true;
            }
            else
            {
                token.keyword = parse_keyword_none;
                incremented_in_keyword = false;
            }
        }
        if (! incremented_in_keyword)
        {
            token.token_type++;
            if (token.token_type > LAST_TERMINAL_TYPE)
            {
                token.token_type = FIRST_TERMINAL_TYPE;
                wrapped = true;
            }
        }
        if (! wrapped)
        {
            break;
        }
    }
    return i == end;
 }
 static void test_new_parser_fuzzing(void)
 {
    say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
    double start = timef();
    // ensure nothing crashes
    size_t max = 5;
    for (size_t len=1; len <= max; len++)
    {
        fprintf(stderr, "%lu / %lu\n", len, max);
        std::vector<parser_fuzz_token_t> tokens(len);
        do
        {
            parse_t parser;
            parse_node_tree_t parse_tree;
            parse_error_list_t errors;
            for (size_t i=0; i < len; i++)
            {
                const parser_fuzz_token_t &token = tokens[i];
                parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors);
            }
            // keep going until we wrap
        }
        while (! increment(tokens));
    }
    double end = timef();
    say(L"All fuzzed in %f seconds!", end - start);
 }
 __attribute__((unused))
@ -1863,6 +1946,104 @@ static void test_new_parser(void)
    }
 }
 static void test_highlighting(void)
 {
    say(L"Testing syntax highlighting");
    if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed");
    if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed");
    if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed");
    // Here are the components of our source and the colors we expect those to be
    struct highlight_component_t {
        const wchar_t *txt;
        int color;
    };
    const highlight_component_t components1[] =
    {
        {L"echo", HIGHLIGHT_COMMAND},
        {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
        {L"&", HIGHLIGHT_END},
        {NULL, -1}
    };
    const highlight_component_t components2[] =
    {
        {L"command", HIGHLIGHT_COMMAND},
        {L"echo", HIGHLIGHT_COMMAND},
        {L"abc", HIGHLIGHT_PARAM},
        {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
        {L"&", HIGHLIGHT_END},
        {NULL, -1}
    };
    const highlight_component_t components3[] =
    {
        {L"if command ls", HIGHLIGHT_COMMAND},
        {L"; ", HIGHLIGHT_END},
        {L"echo", HIGHLIGHT_COMMAND},
        {L"abc", HIGHLIGHT_PARAM},
        {L"; ", HIGHLIGHT_END},
        {L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR},
        {L"; ", HIGHLIGHT_END},
        {L"end", HIGHLIGHT_COMMAND},
        {NULL, -1}
    };
    const highlight_component_t *tests[] = {components1, components2, components3};
    for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
    {
        const highlight_component_t *components = tests[which];
        // Count how many we have
        size_t component_count = 0;
        while (components[component_count].txt != NULL)
        {
            component_count++;
        }
        // Generate the text
        wcstring text;
        std::vector<int> expected_colors;
        for (size_t i=0; i < component_count; i++)
        {
            if (i > 0)
            {
                text.push_back(L' ');
                expected_colors.push_back(0);
            }
            text.append(components[i].txt);
            // hackish space handling
            const size_t text_len = wcslen(components[i].txt);
            for (size_t j=0; j < text_len; j++)
            {
                bool is_space = (components[i].txt[j] == L' ');
                expected_colors.push_back(is_space ? 0 : components[i].color);
            }
        }
        assert(expected_colors.size() == text.size());
        std::vector<int> colors(text.size());
        highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t());
        if (expected_colors.size() != colors.size())
        {
            err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size());
        }
        assert(expected_colors.size() == colors.size());
        for (size_t i=0; i < text.size(); i++)
        {
            if (expected_colors.at(i) != colors.at(i))
            {
                const wcstring spaces(i, L' ');
                err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str());
            }
        }
    }
    system("rm -Rf /tmp/fish_highlight_test");
 }
 /**
   Main test
 */
@ -1884,9 +2065,10 @@ int main(int argc, char **argv)
    reader_init();
    env_init();
-    test_new_parser_correctness();
+    //test_new_parser_fuzzing();
    //test_new_parser_correctness();
    //test_highlighting();
    //test_new_parser();
    return 0;
    test_format();
    test_escape();
--- a/highlight.cpp
+++ b/highlight.cpp
@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
 void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
 {
    ASSERT_IS_BACKGROUND_THREAD();
-    if (1) {
+    if (0)
    {
        highlight_shell_magic(buff, color, pos, error, vars);
        return;
    }
@ -1810,6 +1811,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
                break;
            case symbol_if_statement:
            {
                // Color the 'end'
                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
                // Color arguments and redirections
                const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list);
                if (arguments != NULL)
                {
                    color_arguments(buff, parse_tree, *arguments, color);
                }
            }
            break;
            case symbol_redirection:
                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
                break;
--- a/parse_productions.cpp
+++ b/parse_productions.cpp
@ -249,8 +249,10 @@ RESOLVE(argument_list)
 {
    switch (token_type)
    {
-        case parse_token_type_string: return 1;
+        case parse_token_type_string:
-        default: return 0;
+            return 1;
        default:
            return 0;
    }
 }
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -4,6 +4,7 @@
 using namespace parse_productions;
 /** Returns a string description of this parse error */
 wcstring parse_error_t::describe(const wcstring &src) const
 {
    wcstring result = text;
@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
    return result;
 }
 /** Returns a string description of the given token type */
 wcstring token_type_description(parse_token_type_t type)
 {
    switch (type)
@ -172,54 +174,52 @@ wcstring keyword_description(parse_keyword_t k)
    }
 }
 /** Returns a string description of the given parse node */
 wcstring parse_node_t::describe(void) const
 {
    wcstring result = token_type_description(type);
    return result;
 }
 /** A struct representing the token type passed to */
 struct parse_token_t
 {
    enum parse_token_type_t type; // The type of the token as represented by the parser
    enum token_type tokenizer_type; // The type of the token as represented by the tokenizer
    enum parse_keyword_t keyword; // Any keyword represented by this parser
    size_t source_start;
    size_t source_length;
-    wcstring describe() const;
+    wcstring describe() const
-};
+    {
 wcstring parse_token_t::describe(void) const
 {
        wcstring result = token_type_description(type);
        if (keyword != parse_keyword_none)
        {
            append_format(result, L" <%ls>", keyword_description(keyword).c_str());
        }
        return result;
-}
+    }
 };
-// Convert from tokenizer_t's token type to our token
+/* Convert from tokenizer_t's token type to a parse_token_t type */
-static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
+static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type)
 {
-    parse_token_t result = {};
+    parse_token_type_t result = token_type_invalid;
    result.tokenizer_type = tokenizer_token_type;
    switch (tokenizer_token_type)
    {
        case TOK_STRING:
-            result.type = parse_token_type_string;
+            result = parse_token_type_string;
            break;
        case TOK_PIPE:
-            result.type = parse_token_type_pipe;
+            result = parse_token_type_pipe;
            break;
        case TOK_END:
-            result.type = parse_token_type_end;
+            result = parse_token_type_end;
            break;
        case TOK_BACKGROUND:
-            result.type = parse_token_type_background;
+            result = parse_token_type_background;
            break;
        case TOK_REDIRECT_OUT:
@ -227,15 +227,15 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
        case TOK_REDIRECT_IN:
        case TOK_REDIRECT_FD:
        case TOK_REDIRECT_NOCLOB:
-            result.type = parse_token_type_redirection;
+            result = parse_token_type_redirection;
            break;
        case TOK_ERROR:
-            result.type = parse_special_type_tokenizer_error;
+            result = parse_special_type_tokenizer_error;
            break;
        case TOK_COMMENT:
-            result.type = parse_special_type_comment;
+            result = parse_special_type_comment;
            break;
@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
    return result;
 }
 /* Helper function for dump_tree */
 static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
 {
    assert(start < nodes.size());
@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
    }
 }
-__attribute__((unused))
+/* Gives a debugging textual description of a parse tree */
-static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
+wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
 {
    if (nodes.empty())
        return L"(empty!)";
@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
    return result;
 }
 /* Struct representing elements of the symbol stack, used in the internal state of the LL parser */
 struct parse_stack_element_t
 {
    enum parse_token_type_t type;
@ -323,31 +325,25 @@ struct parse_stack_element_t
        }
        return result;
    }
 };
 /* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */
 class parse_ll_t
 {
-    friend class parse_t;
+    /* Traditional symbol stack of the LL parser */
    std::vector<parse_stack_element_t> symbol_stack;
-    std::vector<parse_stack_element_t> symbol_stack; // LL parser stack
+    /* Parser output. This is a parse tree, but stored in an array. */
    parse_node_tree_t nodes;
    /* Whether we ran into a fatal error, including parse errors or tokenizer errors */
    bool fatal_errored;
    /* List of errors we have encountered */
    parse_error_list_t errors;
-    // Constructor
+    /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */
-    parse_ll_t() : fatal_errored(false)
+    bool top_node_handle_terminal_types(parse_token_t token);
    {
        this->reset();
    }
    bool top_node_match_token(parse_token_t token);
    void accept_token(parse_token_t token, const wcstring &src);
    // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
    void reset(void);
    void parse_error(const wchar_t *expected, parse_token_t token);
    void parse_error(parse_token_t token, const wchar_t *format, ...);
@ -355,9 +351,6 @@ class parse_ll_t
    void dump_stack(void) const;
    // Figure out the ranges of intermediate nodes
    void determine_node_ranges();
    // Get the node corresponding to the top element of the stack
    parse_node_t &node_for_top_symbol()
    {
@ -420,7 +413,12 @@ class parse_ll_t
        for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
        {
            production_element_t elem = (*production)[i];
-            if (production_element_is_valid(elem))
+            if (!production_element_is_valid(elem))
            {
                // All done, bail out
                break;
            }
            else
            {
                // Generate the parse node. Note that this push_back may invalidate node.
                parse_token_type_t child_type = production_element_type(elem);
@ -452,6 +450,36 @@ class parse_ll_t
        }
    }
    public:
    /* Constructor */
    parse_ll_t() : fatal_errored(false)
    {
        this->symbol_stack.reserve(16);
        this->nodes.reserve(64);
        this->reset_symbols_and_nodes();
    }
    /* Input */
    void accept_token(parse_token_t token);
    /* Indicate if we hit a fatal error */
    bool has_fatal_error(void) const
    {
        return this->fatal_errored;
    }
    /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */
    void reset_symbols(void);
    /* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */
    void reset_symbols_and_nodes(void);
    /* Once parsing is complete, determine the ranges of intermediate nodes */
    void determine_node_ranges();
    /* Acquire output after parsing. This transfers directly from within self */
    void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
 };
 void parse_ll_t::dump_stack(void) const
@ -509,7 +537,8 @@ void parse_ll_t::determine_node_ranges(void)
            max_end = std::max(max_end, child.source_start + child.source_length);
        }
-        if (min_start != source_start_invalid) {
+        if (min_start != source_start_invalid)
        {
            assert(max_end >= min_start);
            parent->source_start = min_start;
            parent->source_length = max_end - min_start;
@ -517,6 +546,22 @@ void parse_ll_t::determine_node_ranges(void)
    }
 }
 void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
 {
    if (output != NULL)
    {
        std::swap(*output, this->nodes);
    }
    this->nodes.clear();
    if (errors != NULL)
    {
        std::swap(*errors, this->errors);
    }
    this->errors.clear();
    this->symbol_stack.clear();
 }
 void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
 {
    //this->dump_stack();
@ -545,9 +590,9 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
    fatal_errored = true;
 }
-void parse_ll_t::reset(void)
+void parse_ll_t::reset_symbols(void)
 {
-    // add a new job_list node and then reset our symbol list to point at it
+    /* Add a new job_list node, and then reset our symbol list to point at it */
    node_offset_t where = nodes.size();
    nodes.push_back(parse_node_t(symbol_job_list));
@ -556,8 +601,31 @@ void parse_ll_t::reset(void)
    this->fatal_errored = false;
 }
 /* Reset both symbols and nodes */
 void parse_ll_t::reset_symbols_and_nodes(void)
 {
    nodes.clear();
    this->reset_symbols();
 }
-bool parse_ll_t::top_node_match_token(parse_token_t token)
+static bool type_is_terminal_type(parse_token_type_t type)
 {
    switch (type)
    {
        case parse_token_type_string:
        case parse_token_type_pipe:
        case parse_token_type_redirection:
        case parse_token_type_background:
        case parse_token_type_end:
        case parse_token_type_terminate:
            return true;
        default:
            return false;
    }
 }
 bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
 {
    if (symbol_stack.empty())
    {
@ -568,38 +636,55 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
    PARSE_ASSERT(! symbol_stack.empty());
    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
-    bool result = false;
+    bool handled = false;
    parse_stack_element_t &stack_top = symbol_stack.back();
    if (type_is_terminal_type(stack_top.type))
    {
        // The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type)
        handled = true;
        // Now see if we actually matched
        bool matched = false;
        if (stack_top.type == token.type)
        {
-        // So far so good. See if we need a particular keyword.
+            switch (stack_top.type)
-        if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword)
+            {
                case parse_token_type_string:
                    // We matched if the keywords match, or no keyword was required
                    matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword);
                    break;
                default:
                    // For other types, we only require that the types match
                    matched = true;
                    break;
            }
        }
        if (matched)
        {
            // Success. Tell the node that it matched this token
            parse_node_t &node = node_for_top_symbol();
            node.source_start = token.source_start;
            node.source_length = token.source_length;
            // We consumed this symbol
            symbol_stack.pop_back();
            result = true;
        }
-        else if (token.type == parse_token_type_pipe)
+        else
        {
-            // Pipes are primitive
+            // Failure
            this->fatal_errored = true;
        }
        // We handled the token, so pop the symbol stack
        symbol_stack.pop_back();
            result = true;
    }
-    }
+    return handled;
    return result;
 }
-void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
+void parse_ll_t::accept_token(parse_token_t token)
 {
    bool logit = false;
    if (logit)
    {
        const wcstring txt = wcstring(src, token.source_start, token.source_length);
        fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
    }
    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
@ -620,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
    {
        PARSE_ASSERT(! symbol_stack.empty());
-        if (top_node_match_token(token))
+        if (top_node_handle_terminal_types(token))
        {
            if (logit)
            {
@ -715,7 +800,7 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
        tok_options |= TOK_SHOW_COMMENTS;
    tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
-    for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok))
+    for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok))
    {
        token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
        const wchar_t *tok_txt = tok_last(&tok);
@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
        size_t tok_extent = tok_get_extent(&tok);
        assert(tok_extent < 10000000); //paranoia
-        parse_token_t token = parse_token_from_tokenizer_token(tok_type);
+        parse_token_t token;
-        token.tokenizer_type = tok_type;
+        token.type = parse_token_type_from_tokenizer_token(tok_type);
        token.source_start = (size_t)tok_start;
        token.source_length = tok_extent;
        token.keyword = keyword_for_token(tok_type, tok_txt);
-        this->parser->accept_token(token, str);
+        this->parser->accept_token(token);
-        if (this->parser->fatal_errored)
+        if (this->parser->has_fatal_error())
        {
            if (parse_flags & parse_flag_continue_after_error)
            {
                /* Mark an error and then keep going */
                token.type = parse_special_type_parse_error;
                token.keyword = parse_keyword_none;
-                this->parser->accept_token(token, str);
+                this->parser->accept_token(token);
-                this->parser->reset();
+                this->parser->reset_symbols();
            }
            else
            {
@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
    fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
 #endif
-    if (output != NULL)
+    // Acquire the output from the parser
-    {
+    this->parser->acquire_output(output, errors);
        output->swap(this->parser->nodes);
        this->parser->nodes.clear();
    }
-    if (errors != NULL)
+    // Indicate if we had a fatal error
-    {
+    return ! this->parser->has_fatal_error();
-        errors->swap(this->parser->errors);
+}
        this->parser->errors.clear();
    }
-    return ! this->parser->fatal_errored;
+bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
 {
    // Only strings can have keywords. So if we have a keyword, the type must be a string
    assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
    parse_token_t token;
    token.type = token_type;
    token.keyword = keyword;
    token.source_start = -1;
    token.source_length = 0;
    this->parser->accept_token(token);
    return ! this->parser->has_fatal_error();
 }
 void parse_t::clear()
 {
    this->parser->reset_symbols_and_nodes();
 }
 const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
--- a/parse_tree.h
+++ b/parse_tree.h
@ -36,29 +36,6 @@ struct parse_error_t
 };
 typedef std::vector<parse_error_t> parse_error_list_t;
 enum
 {
    parse_flag_none = 0,
    /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
    parse_flag_continue_after_error = 1 << 0,
    /* Include comment tokens */
    parse_flag_include_comments = 1 << 1
 };
 typedef unsigned int parse_tree_flags_t;
 class parse_ll_t;
 class parse_t
 {
    parse_ll_t * const parser;
 public:
    parse_t();
    ~parse_t();
    bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
 };
 enum parse_token_type_t
 {
    token_type_invalid,
@ -111,6 +88,9 @@ enum parse_token_type_t
    parse_special_type_tokenizer_error,
    parse_special_type_comment,
    FIRST_TERMINAL_TYPE = parse_token_type_string,
    LAST_TERMINAL_TYPE = parse_token_type_terminate,
    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
 };
@ -132,9 +112,46 @@ enum parse_keyword_t
    parse_keyword_or,
    parse_keyword_not,
    parse_keyword_command,
-    parse_keyword_builtin
+    parse_keyword_builtin,
    LAST_KEYWORD = parse_keyword_builtin
 };
 enum
 {
    parse_flag_none = 0,
    /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
    parse_flag_continue_after_error = 1 << 0,
    /* Include comment tokens */
    parse_flag_include_comments = 1 << 1
 };
 typedef unsigned int parse_tree_flags_t;
 class parse_ll_t;
 class parse_t
 {
    parse_ll_t * const parser;
 public:
    parse_t();
    ~parse_t();
    /* Parse a string */
    bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
    /* Parse a single token */
    bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
    /* Reset, ready to parse something else */
    void clear();
 };
 wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
 wcstring token_type_description(parse_token_type_t type);
 wcstring keyword_description(parse_keyword_t type);
@ -184,7 +201,7 @@ public:
 class parse_node_tree_t : public std::vector<parse_node_t>
 {
-    public:
+public:
    /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
 # A job_list is a list of jobs, separated by semicolons or newlines
    job_list = <empty> |
                <TOK_END> job_list |
                job job_list
                <TOK_END> job_list
 # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation