More work on new parser

2024-12-26 12:53:13 +00:00 · 2013-08-11 00:35:00 -07:00 · 2013-08-11 00:35:00 -07:00 · e58b73179f
commit e58b73179f
parent 8e07e55c1f
6 changed files with 532 additions and 215 deletions
--- a/builtin.cpp
+++ b/builtin.cpp
@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
        }
        else
        {
-            if (0) {
+            const wcstring dump = parse_dump_tree(parse_tree, src);
+            fprintf(stderr, "%ls", dump.c_str());
+            if (0)
+            {
                parse_execution_context_t ctx(parse_tree, src);
                parse_execution_simulator_t sim;
                sim.context = &ctx;
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -1816,7 +1816,10 @@ static void test_new_parser_correctness(void)
        {L"if true ; end", true},
        {L"if true; end ; end", false},
        {L"if end; end ; end", false},
-        {L"end", false}
+        {L"if end", false},
+        {L"end", false},
+        {L"for i i", false},
+        {L"for i in a b c ; end", true}
    };

    for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void)
        }
    }
    say(L"Parse tests complete");
+}

+struct parser_fuzz_token_t
+{
+    parse_token_type_t token_type;
+    parse_keyword_t keyword;
+
+    parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
+    {
+    }
+};
+
+static bool increment(std::vector<parser_fuzz_token_t> &tokens)
+{
+    size_t i, end = tokens.size();
+    for (i=0; i < end; i++)
+    {
+        bool wrapped = false;
+
+        struct parser_fuzz_token_t &token = tokens[i];
+        bool incremented_in_keyword = false;
+        if (token.token_type == parse_token_type_string)
+        {
+            // try incrementing the keyword
+            token.keyword++;
+            if (token.keyword <= LAST_KEYWORD)
+            {
+                incremented_in_keyword = true;
+            }
+            else
+            {
+                token.keyword = parse_keyword_none;
+                incremented_in_keyword = false;
+            }
+        }
+
+        if (! incremented_in_keyword)
+        {
+            token.token_type++;
+            if (token.token_type > LAST_TERMINAL_TYPE)
+            {
+                token.token_type = FIRST_TERMINAL_TYPE;
+                wrapped = true;
+            }
+        }
+
+        if (! wrapped)
+        {
+            break;
+        }
+    }
+    return i == end;
+}
+
+static void test_new_parser_fuzzing(void)
+{
+    say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+    double start = timef();
+    // ensure nothing crashes
+    size_t max = 5;
+    for (size_t len=1; len <= max; len++)
+    {
+        fprintf(stderr, "%lu / %lu\n", len, max);
+        std::vector<parser_fuzz_token_t> tokens(len);
+        do
+        {
+            parse_t parser;
+            parse_node_tree_t parse_tree;
+            parse_error_list_t errors;
+            for (size_t i=0; i < len; i++)
+            {
+                const parser_fuzz_token_t &token = tokens[i];
+                parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors);
+            }
+
+            // keep going until we wrap
+        }
+        while (! increment(tokens));
+    }
+    double end = timef();
+    say(L"All fuzzed in %f seconds!", end - start);
 }

 __attribute__((unused))
@ -1863,6 +1946,104 @@ static void test_new_parser(void)
    }
 }

+static void test_highlighting(void)
+{
+    say(L"Testing syntax highlighting");
+    if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed");
+    if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed");
+    if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed");
+    
+    // Here are the components of our source and the colors we expect those to be
+    struct highlight_component_t {
+        const wchar_t *txt;
+        int color;
+    };
+    
+    const highlight_component_t components1[] =
+    {
+        {L"echo", HIGHLIGHT_COMMAND},
+        {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
+        {L"&", HIGHLIGHT_END},
+        {NULL, -1}
+    };
+    
+    const highlight_component_t components2[] =
+    {
+        {L"command", HIGHLIGHT_COMMAND},
+        {L"echo", HIGHLIGHT_COMMAND},
+        {L"abc", HIGHLIGHT_PARAM},
+        {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
+        {L"&", HIGHLIGHT_END},
+        {NULL, -1}
+    };
+    
+    const highlight_component_t components3[] =
+    {
+        {L"if command ls", HIGHLIGHT_COMMAND},
+        {L"; ", HIGHLIGHT_END},
+        {L"echo", HIGHLIGHT_COMMAND},
+        {L"abc", HIGHLIGHT_PARAM},
+        {L"; ", HIGHLIGHT_END},
+        {L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR},
+        {L"; ", HIGHLIGHT_END},
+        {L"end", HIGHLIGHT_COMMAND},
+        {NULL, -1}
+    };
+    
+    const highlight_component_t *tests[] = {components1, components2, components3};
+    for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
+    {
+        const highlight_component_t *components = tests[which];
+        // Count how many we have
+        size_t component_count = 0;
+        while (components[component_count].txt != NULL)
+        {
+            component_count++;
+        }
+        
+        // Generate the text
+        wcstring text;
+        std::vector<int> expected_colors;
+        for (size_t i=0; i < component_count; i++)
+        {
+            if (i > 0)
+            {
+                text.push_back(L' ');
+                expected_colors.push_back(0);
+            }
+            text.append(components[i].txt);
+            
+            // hackish space handling
+            const size_t text_len = wcslen(components[i].txt);
+            for (size_t j=0; j < text_len; j++)
+            {
+                bool is_space = (components[i].txt[j] == L' ');
+                expected_colors.push_back(is_space ? 0 : components[i].color);
+            }
+        }
+        assert(expected_colors.size() == text.size());
+        
+        std::vector<int> colors(text.size());
+        highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t());
+        
+        if (expected_colors.size() != colors.size())
+        {
+            err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size());
+        }
+        assert(expected_colors.size() == colors.size());
+        for (size_t i=0; i < text.size(); i++)
+        {
+            if (expected_colors.at(i) != colors.at(i))
+            {
+                const wcstring spaces(i, L' ');
+                err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str());
+            }
+        }
+    }
+    
+    system("rm -Rf /tmp/fish_highlight_test");
+}
+
 /**
   Main test
 */
@ -1884,9 +2065,10 @@ int main(int argc, char **argv)
    reader_init();
    env_init();

-    test_new_parser_correctness();
+    //test_new_parser_fuzzing();
+    //test_new_parser_correctness();
+    //test_highlighting();
    //test_new_parser();
-    return 0;

    test_format();
    test_escape();
--- a/highlight.cpp
+++ b/highlight.cpp
@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
 void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
 {
    ASSERT_IS_BACKGROUND_THREAD();
-    if (1) {
+    if (0)
+    {
        highlight_shell_magic(buff, color, pos, error, vars);
        return;
    }
@ -1810,6 +1811,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
                break;

+            case symbol_if_statement:
+            {
+                // Color the 'end'
+                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
+
+                // Color arguments and redirections
+                const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list);
+                if (arguments != NULL)
+                {
+                    color_arguments(buff, parse_tree, *arguments, color);
+                }
+            }
+            break;
+
            case symbol_redirection:
                color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
                break;
--- a/parse_productions.cpp
+++ b/parse_productions.cpp
@ -249,8 +249,10 @@ RESOLVE(argument_list)
 {
    switch (token_type)
    {
-        case parse_token_type_string: return 1;
-        default: return 0;
+        case parse_token_type_string:
+            return 1;
+        default:
+            return 0;
    }
 }

--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -4,6 +4,7 @@

 using namespace parse_productions;

+/** Returns a string description of this parse error */
 wcstring parse_error_t::describe(const wcstring &src) const
 {
    wcstring result = text;
@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
    return result;
 }

+/** Returns a string description of the given token type */
 wcstring token_type_description(parse_token_type_t type)
 {
    switch (type)
@ -172,54 +174,52 @@ wcstring keyword_description(parse_keyword_t k)
    }
 }

+/** Returns a string description of the given parse node */
 wcstring parse_node_t::describe(void) const
 {
    wcstring result = token_type_description(type);
    return result;
 }

+/** A struct representing the token type passed to */
 struct parse_token_t
 {
    enum parse_token_type_t type; // The type of the token as represented by the parser
-    enum token_type tokenizer_type; // The type of the token as represented by the tokenizer
    enum parse_keyword_t keyword; // Any keyword represented by this parser
    size_t source_start;
    size_t source_length;

-    wcstring describe() const;
-};
-
-wcstring parse_token_t::describe(void) const
-{
+    wcstring describe() const
+    {
        wcstring result = token_type_description(type);
        if (keyword != parse_keyword_none)
        {
            append_format(result, L" <%ls>", keyword_description(keyword).c_str());
        }
        return result;
-}
+    }
+};

-// Convert from tokenizer_t's token type to our token
-static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
+/* Convert from tokenizer_t's token type to a parse_token_t type */
+static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type)
 {
-    parse_token_t result = {};
-    result.tokenizer_type = tokenizer_token_type;
+    parse_token_type_t result = token_type_invalid;
    switch (tokenizer_token_type)
    {
        case TOK_STRING:
-            result.type = parse_token_type_string;
+            result = parse_token_type_string;
            break;

        case TOK_PIPE:
-            result.type = parse_token_type_pipe;
+            result = parse_token_type_pipe;
            break;

        case TOK_END:
-            result.type = parse_token_type_end;
+            result = parse_token_type_end;
            break;

        case TOK_BACKGROUND:
-            result.type = parse_token_type_background;
+            result = parse_token_type_background;
            break;

        case TOK_REDIRECT_OUT:
@ -227,15 +227,15 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
        case TOK_REDIRECT_IN:
        case TOK_REDIRECT_FD:
        case TOK_REDIRECT_NOCLOB:
-            result.type = parse_token_type_redirection;
+            result = parse_token_type_redirection;
            break;

        case TOK_ERROR:
-            result.type = parse_special_type_tokenizer_error;
+            result = parse_special_type_tokenizer_error;
            break;

        case TOK_COMMENT:
-            result.type = parse_special_type_comment;
+            result = parse_special_type_comment;
            break;


@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
    return result;
 }

+/* Helper function for dump_tree */
 static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
 {
    assert(start < nodes.size());
@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
    }
 }

-__attribute__((unused))
-static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
+/* Gives a debugging textual description of a parse tree */
+wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
 {
    if (nodes.empty())
        return L"(empty!)";
@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
    return result;
 }

+/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */
 struct parse_stack_element_t
 {
    enum parse_token_type_t type;
@ -323,31 +325,25 @@ struct parse_stack_element_t
        }
        return result;
    }
-
 };

+/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */
 class parse_ll_t
 {
-    friend class parse_t;
+    /* Traditional symbol stack of the LL parser */
+    std::vector<parse_stack_element_t> symbol_stack;
    
-    std::vector<parse_stack_element_t> symbol_stack; // LL parser stack
+    /* Parser output. This is a parse tree, but stored in an array. */
    parse_node_tree_t nodes;

+    /* Whether we ran into a fatal error, including parse errors or tokenizer errors */
    bool fatal_errored;
+    
+    /* List of errors we have encountered */
    parse_error_list_t errors;

-    // Constructor
-    parse_ll_t() : fatal_errored(false)
-    {
-        this->reset();
-    }
-
-    bool top_node_match_token(parse_token_t token);
-
-    void accept_token(parse_token_t token, const wcstring &src);
-    
-    // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
-    void reset(void);
+    /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */
+    bool top_node_handle_terminal_types(parse_token_t token);

    void parse_error(const wchar_t *expected, parse_token_t token);
    void parse_error(parse_token_t token, const wchar_t *format, ...);
@ -355,9 +351,6 @@ class parse_ll_t

    void dump_stack(void) const;

-    // Figure out the ranges of intermediate nodes
-    void determine_node_ranges();
-
    // Get the node corresponding to the top element of the stack
    parse_node_t &node_for_top_symbol()
    {
@ -420,7 +413,12 @@ class parse_ll_t
        for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
        {
            production_element_t elem = (*production)[i];
-            if (production_element_is_valid(elem))
+            if (!production_element_is_valid(elem))
+            {
+                // All done, bail out
+                break;
+            }
+            else
            {
                // Generate the parse node. Note that this push_back may invalidate node.
                parse_token_type_t child_type = production_element_type(elem);
@ -452,6 +450,36 @@ class parse_ll_t
        }
    }

+    public:
+    
+    /* Constructor */
+    parse_ll_t() : fatal_errored(false)
+    {
+        this->symbol_stack.reserve(16);
+        this->nodes.reserve(64);
+        this->reset_symbols_and_nodes();
+    }
+
+    /* Input */
+    void accept_token(parse_token_t token);
+    
+    /* Indicate if we hit a fatal error */
+    bool has_fatal_error(void) const
+    {
+        return this->fatal_errored;
+    }
+    
+    /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */
+    void reset_symbols(void);
+
+    /* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */
+    void reset_symbols_and_nodes(void);
+    
+    /* Once parsing is complete, determine the ranges of intermediate nodes */
+    void determine_node_ranges();
+    
+    /* Acquire output after parsing. This transfers directly from within self */
+    void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
 };

 void parse_ll_t::dump_stack(void) const
@ -509,7 +537,8 @@ void parse_ll_t::determine_node_ranges(void)
            max_end = std::max(max_end, child.source_start + child.source_length);
        }

-        if (min_start != source_start_invalid) {
+        if (min_start != source_start_invalid)
+        {
            assert(max_end >= min_start);
            parent->source_start = min_start;
            parent->source_length = max_end - min_start;
@ -517,6 +546,22 @@ void parse_ll_t::determine_node_ranges(void)
    }
 }

+void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
+{
+    if (output != NULL)
+    {
+        std::swap(*output, this->nodes);
+    }
+    this->nodes.clear();
+    
+    if (errors != NULL)
+    {
+        std::swap(*errors, this->errors);
+    }
+    this->errors.clear();
+    this->symbol_stack.clear();
+}
+
 void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
 {
    //this->dump_stack();
@ -545,9 +590,9 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
    fatal_errored = true;
 }

-void parse_ll_t::reset(void)
+void parse_ll_t::reset_symbols(void)
 {
-    // add a new job_list node and then reset our symbol list to point at it
+    /* Add a new job_list node, and then reset our symbol list to point at it */
    node_offset_t where = nodes.size();
    nodes.push_back(parse_node_t(symbol_job_list));

@ -556,8 +601,31 @@ void parse_ll_t::reset(void)
    this->fatal_errored = false;
 }

+/* Reset both symbols and nodes */
+void parse_ll_t::reset_symbols_and_nodes(void)
+{
+    nodes.clear();
+    this->reset_symbols();
+}

-bool parse_ll_t::top_node_match_token(parse_token_t token)
+static bool type_is_terminal_type(parse_token_type_t type)
+{
+    switch (type)
+    {
+        case parse_token_type_string:
+        case parse_token_type_pipe:
+        case parse_token_type_redirection:
+        case parse_token_type_background:
+        case parse_token_type_end:
+        case parse_token_type_terminate:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
 {
    if (symbol_stack.empty())
    {
@ -568,38 +636,55 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)

    PARSE_ASSERT(! symbol_stack.empty());
    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
-    bool result = false;
+    bool handled = false;
    parse_stack_element_t &stack_top = symbol_stack.back();
+    if (type_is_terminal_type(stack_top.type))
+    {
+        // The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type)
+        handled = true;
+
+        // Now see if we actually matched
+        bool matched = false;
        if (stack_top.type == token.type)
        {
-        // So far so good. See if we need a particular keyword.
-        if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword)
+            switch (stack_top.type)
+            {
+                case parse_token_type_string:
+                    // We matched if the keywords match, or no keyword was required
+                    matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword);
+                    break;
+
+                default:
+                    // For other types, we only require that the types match
+                    matched = true;
+                    break;
+            }
+        }
+
+        if (matched)
        {
            // Success. Tell the node that it matched this token
            parse_node_t &node = node_for_top_symbol();
            node.source_start = token.source_start;
            node.source_length = token.source_length;
-
-            // We consumed this symbol
-            symbol_stack.pop_back();
-            result = true;
        }
-        else if (token.type == parse_token_type_pipe)
+        else
        {
-            // Pipes are primitive
+            // Failure
+            this->fatal_errored = true;
+        }
+
+        // We handled the token, so pop the symbol stack
        symbol_stack.pop_back();
-            result = true;
    }
-    }
-    return result;
+    return handled;
 }

-void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
+void parse_ll_t::accept_token(parse_token_t token)
 {
    bool logit = false;
    if (logit)
    {
-        const wcstring txt = wcstring(src, token.source_start, token.source_length);
        fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
    }
    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
@ -620,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
    {
        PARSE_ASSERT(! symbol_stack.empty());

-        if (top_node_match_token(token))
+        if (top_node_handle_terminal_types(token))
        {
            if (logit)
            {
@ -715,7 +800,7 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
        tok_options |= TOK_SHOW_COMMENTS;

    tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
-    for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok))
+    for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok))
    {
        token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
        const wchar_t *tok_txt = tok_last(&tok);
@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
        size_t tok_extent = tok_get_extent(&tok);
        assert(tok_extent < 10000000); //paranoia

-        parse_token_t token = parse_token_from_tokenizer_token(tok_type);
-        token.tokenizer_type = tok_type;
+        parse_token_t token;
+        token.type = parse_token_type_from_tokenizer_token(tok_type);
        token.source_start = (size_t)tok_start;
        token.source_length = tok_extent;
        token.keyword = keyword_for_token(tok_type, tok_txt);
-        this->parser->accept_token(token, str);
+        this->parser->accept_token(token);

-        if (this->parser->fatal_errored)
+        if (this->parser->has_fatal_error())
        {
            if (parse_flags & parse_flag_continue_after_error)
            {
                /* Mark an error and then keep going */
                token.type = parse_special_type_parse_error;
                token.keyword = parse_keyword_none;
-                this->parser->accept_token(token, str);
-                this->parser->reset();
+                this->parser->accept_token(token);
+                this->parser->reset_symbols();
            }
            else
            {
@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
    fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
 #endif

-    if (output != NULL)
-    {
-        output->swap(this->parser->nodes);
-        this->parser->nodes.clear();
-    }
+    // Acquire the output from the parser
+    this->parser->acquire_output(output, errors);
    
-    if (errors != NULL)
-    {
-        errors->swap(this->parser->errors);
-        this->parser->errors.clear();
-    }
+    // Indicate if we had a fatal error
+    return ! this->parser->has_fatal_error();
+}

-    return ! this->parser->fatal_errored;
+bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
+{
+    // Only strings can have keywords. So if we have a keyword, the type must be a string
+    assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
+
+    parse_token_t token;
+    token.type = token_type;
+    token.keyword = keyword;
+    token.source_start = -1;
+    token.source_length = 0;
+
+    this->parser->accept_token(token);
+
+    return ! this->parser->has_fatal_error();
+}
+
+void parse_t::clear()
+{
+    this->parser->reset_symbols_and_nodes();
 }

 const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
--- a/parse_tree.h
+++ b/parse_tree.h
@ -36,29 +36,6 @@ struct parse_error_t
 };
 typedef std::vector<parse_error_t> parse_error_list_t;

-enum
-{
-    parse_flag_none = 0,
-    
-    /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
-    parse_flag_continue_after_error = 1 << 0,
-    
-    /* Include comment tokens */
-    parse_flag_include_comments = 1 << 1
-};
-typedef unsigned int parse_tree_flags_t;
-
-class parse_ll_t;
-class parse_t
-{
-    parse_ll_t * const parser;
-
-public:
-    parse_t();
-    ~parse_t();
-    bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-};
-
 enum parse_token_type_t
 {
    token_type_invalid,
@ -111,6 +88,9 @@ enum parse_token_type_t
    parse_special_type_tokenizer_error,
    parse_special_type_comment,

+    FIRST_TERMINAL_TYPE = parse_token_type_string,
+    LAST_TERMINAL_TYPE = parse_token_type_terminate,
+
    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
 };
@ -132,9 +112,46 @@ enum parse_keyword_t
    parse_keyword_or,
    parse_keyword_not,
    parse_keyword_command,
-    parse_keyword_builtin
+    parse_keyword_builtin,
+
+    LAST_KEYWORD = parse_keyword_builtin
 };

+
+enum
+{
+    parse_flag_none = 0,
+
+    /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
+    parse_flag_continue_after_error = 1 << 0,
+
+    /* Include comment tokens */
+    parse_flag_include_comments = 1 << 1
+};
+typedef unsigned int parse_tree_flags_t;
+
+class parse_ll_t;
+class parse_t
+{
+    parse_ll_t * const parser;
+
+public:
+    parse_t();
+    ~parse_t();
+
+    /* Parse a string */
+    bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
+
+    /* Parse a single token */
+    bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
+    
+    /* Reset, ready to parse something else */
+    void clear();
+
+};
+
+wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
+
 wcstring token_type_description(parse_token_type_t type);
 wcstring keyword_description(parse_keyword_t type);

@ -184,7 +201,7 @@ public:

 class parse_node_tree_t : public std::vector<parse_node_t>
 {
-    public:
+public:

    /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
 # A job_list is a list of jobs, separated by semicolons or newlines

    job_list = <empty> |
-                <TOK_END> job_list |
                job job_list
+                <TOK_END> job_list

 # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation