Initial work towars improved error reporting. Tests currently fail.

2024-12-25 20:33:08 +00:00 · 2013-12-08 21:54:06 -08:00 · 2013-12-08 21:54:06 -08:00 · 7a3f5afee7
commit 7a3f5afee7
parent 5769fa6aed
9 changed files with 483 additions and 144 deletions
--- a/fish.xcodeproj/project.pbxproj
+++ b/fish.xcodeproj/project.pbxproj
@ -513,6 +513,7 @@
 		D0D02AE415986537008E62BD /* fish_pager */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_pager; sourceTree = BUILT_PRODUCTS_DIR; };
 		D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = osx_fish_launcher.m; path = osx/osx_fish_launcher.m; sourceTree = "<group>"; };
 		D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; };
+		D0D9B2B318555D92001AE279 /* parse_constants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parse_constants.h; sourceTree = "<group>"; };
 		D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = "<group>"; };
 		D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
 		D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_productions.h; sourceTree = "<group>"; };
@ -659,6 +660,7 @@
 				D0A0853D13B3ACEE0099B651 /* expand.cpp */,
 				D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */,
 				D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */,
+				D0D9B2B318555D92001AE279 /* parse_constants.h */,
 				D0C52F361765284C00BFAB82 /* parse_tree.h */,
 				D0C52F351765284C00BFAB82 /* parse_tree.cpp */,
 				D0A0850D13B3ACEE0099B651 /* fallback.h */,
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -2333,7 +2333,7 @@ static void test_new_parser_ll2(void)
    }
 }

-static void test_new_parser_ad_hoc(void)
+static void test_new_parser_ad_hoc()
 {
    /* Very ad-hoc tests for issues encountered */
    say(L"Testing new parser ad hoc tests");
@ -2356,6 +2356,58 @@ static void test_new_parser_ad_hoc(void)
    }
 }

+static void test_new_parser_errors(void)
+{
+    say(L"Testing new parser error reporting");
+    const struct
+    {
+        const wchar_t *src;
+        parse_error_code_t code;
+    }
+    tests[] =
+    {
+        {L"echo (abc", parse_error_tokenizer},
+        
+        {L"end", parse_error_unbalancing_end},
+        {L"echo hi ; end", parse_error_unbalancing_end},
+        
+        {L"else", parse_error_unbalancing_else},
+        {L"if true ; end ; else", parse_error_unbalancing_else},
+        
+        {L"case", parse_error_unbalancing_case},
+        {L"if true ; case ; end", parse_error_unbalancing_case}
+    };
+    
+    for (size_t i = 0; i < sizeof tests / sizeof *tests; i++)
+    {
+        const wcstring src = tests[i].src;
+        parse_error_code_t expected_code = tests[i].code;
+        
+        parse_error_list_t errors;
+        parse_node_tree_t parse_tree;
+        bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors);
+        if (success)
+        {
+            err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
+        }
+        
+        if (errors.size() != 1)
+        {
+            err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", src.c_str(), errors.size());
+        }
+        else if (errors.at(0).code != expected_code)
+        {
+            err(L"Source '%ls' was expected to produce error code %lu, but instead produced error code %lu", src.c_str(), expected_code, (unsigned long)errors.at(0).code);
+            for (size_t i=0; i < errors.size(); i++)
+            {
+                err(L"\t\t%ls", errors.at(i).describe(src).c_str());
+            }
+        }
+        
+    }
+    
+}
+
 static void test_highlighting(void)
 {
    say(L"Testing syntax highlighting");
@ -2574,6 +2626,7 @@ int main(int argc, char **argv)
    if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive
    if (should_test_function("new_parser_correctness")) test_new_parser_correctness();
    if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc();
+    if (should_test_function("new_parser_errors")) test_new_parser_errors();
    if (should_test_function("escape")) test_unescape_sane();
    if (should_test_function("escape")) test_escape_crazy();
    if (should_test_function("format")) test_format();
--- a/parse_constants.h
+++ b/parse_constants.h
@ -0,0 +1,320 @@
+/**\file parse_constants.h
+
+    Constants used in the programmatic representation of fish code.
+*/
+
+#ifndef fish_parse_constants_h
+#define fish_parse_constants_h
+
+#define PARSE_ASSERT(a) assert(a)
+#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0)
+
+
+enum parse_token_type_t
+{
+    token_type_invalid,
+
+    // Non-terminal tokens
+    symbol_job_list,
+    symbol_job,
+    symbol_job_continuation,
+    symbol_statement,
+    symbol_block_statement,
+    symbol_block_header,
+    symbol_for_header,
+    symbol_while_header,
+    symbol_begin_header,
+    symbol_function_header,
+
+    symbol_if_statement,
+    symbol_if_clause,
+    symbol_else_clause,
+    symbol_else_continuation,
+
+    symbol_switch_statement,
+    symbol_case_item_list,
+    symbol_case_item,
+
+    symbol_boolean_statement,
+    symbol_decorated_statement,
+    symbol_plain_statement,
+    symbol_arguments_or_redirections_list,
+    symbol_argument_or_redirection,
+
+    symbol_argument_list,
+
+    symbol_argument,
+    symbol_redirection,
+
+    symbol_optional_background,
+
+    // Terminal types
+    parse_token_type_string,
+    parse_token_type_pipe,
+    parse_token_type_redirection,
+    parse_token_type_background,
+    parse_token_type_end,
+    parse_token_type_terminate,
+
+    // Very special terminal types that don't appear in the production list
+    parse_special_type_parse_error,
+    parse_special_type_tokenizer_error,
+    parse_special_type_comment,
+
+    FIRST_TERMINAL_TYPE = parse_token_type_string,
+    LAST_TERMINAL_TYPE = parse_token_type_terminate,
+
+    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
+    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
+};
+
+enum parse_keyword_t
+{
+    parse_keyword_none,
+    parse_keyword_if,
+    parse_keyword_else,
+    parse_keyword_for,
+    parse_keyword_in,
+    parse_keyword_while,
+    parse_keyword_begin,
+    parse_keyword_function,
+    parse_keyword_switch,
+    parse_keyword_case,
+    parse_keyword_end,
+    parse_keyword_and,
+    parse_keyword_or,
+    parse_keyword_not,
+    parse_keyword_command,
+    parse_keyword_builtin,
+    
+    LAST_KEYWORD = parse_keyword_builtin
+};
+
+/* Statement decorations. This matches the order of productions in decorated_statement */
+enum parse_statement_decoration_t
+{
+    parse_statement_decoration_none,
+    parse_statement_decoration_command,
+    parse_statement_decoration_builtin
+};
+
+/* Parse error code list */
+enum parse_error_code_t
+{
+    parse_error_none,
+    parse_error_generic, //unknown type
+    
+    parse_error_tokenizer, //tokenizer error
+    
+    parse_error_unbalancing_end, //end outside of block
+    parse_error_unbalancing_else, //else outside of if
+    parse_error_unbalancing_case, //case outside of switch
+};
+
+
+/**
+   Error message for tokenizer error. The tokenizer message is
+   appended to this message.
+*/
+#define TOK_ERR_MSG _( L"Tokenizer error: '%ls'")
+
+/**
+   Error message for short circuit command error.
+*/
+#define COND_ERR_MSG _( L"An additional command is required" )
+
+/**
+   Error message on a function that calls itself immediately
+*/
+#define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")
+
+/**
+   Error message on reaching maximum recursion depth
+*/
+#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
+
+/**
+   Error message used when the end of a block can't be located
+*/
+#define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.")
+
+/**
+   Error message when a non-string token is found when expecting a command name
+*/
+#define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'")
+
+/**
+   Error message when a non-string token is found when expecting a command name
+*/
+#define CMD_OR_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.")
+
+/**
+   Error message when a non-string token is found when expecting a command name
+*/
+#define CMD_AND_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.")
+
+/**
+   Error message when encountering an illegal command name
+*/
+#define ILLEGAL_CMD_ERR_MSG _( L"Illegal command name '%ls'")
+
+/**
+   Error message when encountering an illegal file descriptor
+*/
+#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor '%ls'")
+
+/**
+   Error message for wildcards with no matches
+*/
+#define WILDCARD_ERR_MSG _( L"No matches for wildcard '%ls'.")
+
+/**
+   Error when using case builtin outside of switch block
+*/
+#define INVALID_CASE_ERR_MSG _( L"'case' builtin not inside of switch block")
+
+/**
+   Error when using loop control builtins (break or continue) outside of loop
+*/
+#define INVALID_LOOP_ERR_MSG _( L"Loop control command while not inside of loop" )
+
+/**
+   Error when using return builtin outside of function definition
+*/
+#define INVALID_RETURN_ERR_MSG _( L"'return' builtin command outside of function definition" )
+
+/**
+   Error when using else builtin outside of if block
+*/
+#define INVALID_ELSE_ERR_MSG _( L"'%ls' builtin not inside of if block" )
+
+/**
+   Error when using 'else if' past a naked 'else'
+*/
+#define INVALID_ELSEIF_PAST_ELSE_ERR_MSG _( L"'%ls' used past terminating 'else'" )
+
+/**
+   Error when using end builtin outside of block
+*/
+#define INVALID_END_ERR_MSG _( L"'end' command outside of block")
+
+/**
+   Error message for Posix-style assignment: foo=bar
+*/
+#define COMMAND_ASSIGN_ERR_MSG _( L"Unknown command '%ls'. Did you mean 'set %ls %ls'? See the help section on the set command by typing 'help set'.")
+
+/**
+   Error for invalid redirection token
+*/
+#define REDIRECT_TOKEN_ERR_MSG _( L"Expected redirection specification, got token of type '%ls'")
+
+/**
+   Error when encountering redirection without a command
+*/
+#define INVALID_REDIRECTION_ERR_MSG _( L"Encountered redirection when expecting a command name. Fish does not allow a redirection operation before a command.")
+
+/**
+   Error for evaluating null pointer
+*/
+#define EVAL_NULL_ERR_MSG _( L"Tried to evaluate null pointer." )
+
+/**
+   Error for evaluating in illegal scope
+*/
+#define INVALID_SCOPE_ERR_MSG _( L"Tried to evaluate commands using invalid block type '%ls'" )
+
+
+/**
+   Error for wrong token type
+*/
+#define UNEXPECTED_TOKEN_ERR_MSG _( L"Unexpected token of type '%ls'")
+
+/**
+   While block description
+*/
+#define WHILE_BLOCK N_( L"'while' block" )
+
+/**
+   For block description
+*/
+#define FOR_BLOCK N_( L"'for' block" )
+
+/**
+   Breakpoint block
+*/
+#define BREAKPOINT_BLOCK N_( L"Block created by breakpoint" )
+
+
+
+/**
+   If block description
+*/
+#define IF_BLOCK N_( L"'if' conditional block" )
+
+
+/**
+   Function definition block description
+*/
+#define FUNCTION_DEF_BLOCK N_( L"function definition block" )
+
+
+/**
+   Function invocation block description
+*/
+#define FUNCTION_CALL_BLOCK N_( L"function invocation block" )
+
+/**
+   Function invocation block description
+*/
+#define FUNCTION_CALL_NO_SHADOW_BLOCK N_( L"function invocation block with no variable shadowing" )
+
+
+/**
+   Switch block description
+*/
+#define SWITCH_BLOCK N_( L"'switch' block" )
+
+
+/**
+   Fake block description
+*/
+#define FAKE_BLOCK N_( L"unexecutable block" )
+
+
+/**
+   Top block description
+*/
+#define TOP_BLOCK N_( L"global root block" )
+
+
+/**
+   Command substitution block description
+*/
+#define SUBST_BLOCK N_( L"command substitution block" )
+
+
+/**
+   Begin block description
+*/
+#define BEGIN_BLOCK N_( L"'begin' unconditional block" )
+
+
+/**
+   Source block description
+*/
+#define SOURCE_BLOCK N_( L"Block created by the . builtin" )
+
+/**
+   Source block description
+*/
+#define EVENT_BLOCK N_( L"event handler block" )
+
+
+/**
+   Unknown block description
+*/
+#define UNKNOWN_BLOCK N_( L"unknown/invalid block" )
+
+
+
+#endif
--- a/parse_productions.cpp
+++ b/parse_productions.cpp
@ -46,7 +46,7 @@ RESOLVE(job_list)
    switch (token1.type)
    {
        case parse_token_type_string:
-            // 'end' is special
+            // some keywords are special
            switch (token1.keyword)
            {
                case parse_keyword_end:
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -4,6 +4,11 @@

 using namespace parse_productions;

+static bool production_is_empty(const production_t *production)
+{
+    return (*production)[0] == token_type_invalid;
+}
+
 /** Returns a string description of this parse error */
 wcstring parse_error_t::describe(const wcstring &src) const
 {
@ -18,7 +23,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
        //fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length);
        if (newline != wcstring::npos)
        {
-            line_start = newline;// + 1;
+            line_start = newline + 1;
        }

        size_t line_end = src.find(L'\n', source_start + source_length);
@ -155,6 +160,8 @@ wcstring keyword_description(parse_keyword_t k)
            return L"function";
        case parse_keyword_switch:
            return L"switch";
+        case parse_keyword_case:
+            return L"case";
        case parse_keyword_end:
            return L"end";
        case parse_keyword_and:
@ -167,9 +174,8 @@ wcstring keyword_description(parse_keyword_t k)
            return L"command";
        case parse_keyword_builtin:
            return L"builtin";
-        default:
-            return format_string(L"Unknown keyword type %ld", static_cast<long>(k));
    }
+    return format_string(L"Unknown keyword type %ld", static_cast<long>(k));
 }

 /** Returns a string description of the given parse node */
@ -348,7 +354,8 @@ class parse_ll_t
    bool top_node_handle_terminal_types(parse_token_t token);

    void parse_error(const wchar_t *expected, parse_token_t token);
-    void parse_error(parse_token_t token, const wchar_t *format, ...);
+    void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...);
+    void parse_error_unbalancing_token(parse_token_t token);
    void append_error_callout(wcstring &error_message, parse_token_t token);

    void dump_stack(void) const;
@ -450,6 +457,9 @@ class parse_ll_t
    /* Input */
    void accept_tokens(parse_token_t token1, parse_token_t token2);
    
+    /* Report tokenizer errors */
+    void report_tokenizer_error(parse_token_t token, const wchar_t *tok_error);
+    
    /* Indicate if we hit a fatal error */
    bool has_fatal_error(void) const
    {
@ -558,7 +568,7 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e
    this->symbol_stack.clear();
 }

-void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
+void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *fmt, ...)
 {
    this->fatal_errored = true;
    if (this->should_generate_error_messages)
@ -569,6 +579,7 @@ void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
        va_list va;
        va_start(va, fmt);
        err.text = vformat_string(fmt, va);
+        err.code = code;
        va_end(va);

        err.source_start = token.source_start;
@ -577,6 +588,42 @@ void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
    }
 }

+// Unbalancing token. This includes 'else' or 'case' or 'end' outside of the appropriate block
+// This essentially duplicates some logic from resolving the production for symbol_statement_list - yuck
+void parse_ll_t::parse_error_unbalancing_token(parse_token_t token)
+{
+    this->fatal_errored = true;
+    if (this->should_generate_error_messages)
+    {
+        assert(token.type == parse_token_type_string);
+        assert(token.keyword == parse_keyword_end || token.keyword == parse_keyword_else || token.keyword == parse_keyword_case);
+        switch (token.keyword)
+        {
+            case parse_keyword_end:
+                this->parse_error(token, parse_error_unbalancing_end, L"'end' outside of a block");
+                break;
+            
+            case parse_keyword_else:
+                this->parse_error(token, parse_error_unbalancing_else, L"'else' builtin not inside of if block");
+                break;
+
+            case parse_keyword_case:
+                this->parse_error(token, parse_error_unbalancing_case, L"'case' builtin not inside of if block");
+                break;
+            
+            default:
+                fprintf(stderr, "Unexpected token %ls passed to %s\n", token.describe().c_str(), __FUNCTION__);
+                PARSER_DIE();
+                break;
+        }
+    }
+}
+
+void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error)
+{
+    assert(tok_error != NULL);
+    this->parse_error(token, parse_error_tokenizer, L"%ls", tok_error);
+}

 void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
 {
@ -584,11 +631,7 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
    if (this->should_generate_error_messages)
    {
        wcstring desc = token_type_description(token.type);
-        parse_error_t error;
-        error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str());
-        error.source_start = token.source_start;
-        error.source_start = token.source_length;
-        errors.push_back(error);
+        this->parse_error(token, parse_error_generic, L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str());
    }
 }

@ -629,13 +672,6 @@ static bool type_is_terminal_type(parse_token_type_t type)

 bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
 {
-    if (symbol_stack.empty())
-    {
-        // This can come about with an unbalanced 'end' or 'else', which causes us to terminate the outermost job list.
-        this->fatal_errored = true;
-        return false;
-    }
-
    PARSE_ASSERT(! symbol_stack.empty());
    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
    bool handled = false;
@ -674,7 +710,30 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
        else
        {
            // Failure
-            this->fatal_errored = true;
+            if (stack_top.type == parse_token_type_string && token.type == parse_token_type_string)
+            {
+                // Must be different keywords. We should unify this with the 'matched' computation above.
+                assert(stack_top.keyword != parse_keyword_none && stack_top.keyword != token.keyword);
+                const wcstring expected = keyword_description(stack_top.keyword);
+                wcstring actual;
+                if (token.keyword == parse_keyword_none)
+                {
+                    // This is a random other string (not a keyword)
+                    this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'", expected.c_str());
+                }
+                else
+                {
+                    // Got a real keyword we can report
+                    const wcstring actual = (token.keyword == parse_keyword_none ? token.describe() : keyword_description(token.keyword));
+                    this->parse_error(token, parse_error_generic, L"Expected keyword '%ls', instead got keyword '%ls'", expected.c_str(), actual.c_str());
+                }
+            }
+            else
+            {
+                const wcstring expected = token_type_description(stack_top.type);
+                const wcstring actual = token_type_description(token.type);
+                this->parse_error(expected.c_str(), token);
+            }
        }

        // We handled the token, so pop the symbol stack
@ -734,25 +793,29 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
        {
            if (should_generate_error_messages)
            {
-                this->parse_error(token1, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str());
+                this->parse_error(token1, parse_error_generic, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str());
            }
            else
            {
-                this->parse_error(token1, NULL);
+                this->parse_error(token1, parse_error_generic, NULL);
            }
            // parse_error sets fatal_errored, which ends the loop
        }
        else
        {
+            // When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end.
+            if (symbol_stack.size() == 1 && production_is_empty(production))
+            {
+                this->parse_error_unbalancing_token(token1);
+                break;
+            }
+            
            // Manipulate the symbol stack.
            // Note that stack_elem is invalidated by popping the stack.
            symbol_stack_pop_push_production(production);
-
-            // If we end up with an empty stack, something bad happened, like an unbalanced end
-            if (symbol_stack.empty())
-            {
-                this->parse_error(token1, L"All symbols removed from symbol stack. Likely unbalanced else or end?");
-            }
+            
+            // Expect to not have an empty stack
+            assert(! symbol_stack.empty());
        }
    }
 }
@ -842,13 +905,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
    this->parser->set_should_generate_error_messages(errors != NULL);

    /* Construct the tokenizer */
-    tok_flags_t tok_options = TOK_SQUASH_ERRORS;
+    tok_flags_t tok_options = 0;
    if (parse_flags & parse_flag_include_comments)
        tok_options |= TOK_SHOW_COMMENTS;
    
    if (parse_flags & parse_flag_accept_incomplete_tokens)
        tok_options |= TOK_ACCEPT_UNFINISHED;
    
+    if (errors == NULL)
+        tok_options |= TOK_SQUASH_ERRORS;
+    
    tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
    
    /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */
@ -864,6 +930,12 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
        /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */
        this->parser->accept_tokens(queue[0], queue[1]);
        
+        /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
+        if (queue[1].type == parse_special_type_tokenizer_error)
+        {
+            this->parser->report_tokenizer_error(queue[1], tok_last(&tok));
+        }
+        
        /* Handle errors */
        if (this->parser->has_fatal_error())
        {
--- a/parse_tree.h
+++ b/parse_tree.h
@ -12,12 +12,10 @@
 #include "util.h"
 #include "common.h"
 #include "tokenizer.h"
+#include "parse_constants.h"
 #include <vector>
 #include <inttypes.h>

-#define PARSE_ASSERT(a) assert(a)
-#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0)
-
 class parse_node_t;
 class parse_node_tree_t;
 typedef size_t node_offset_t;
@ -27,6 +25,9 @@ struct parse_error_t
 {
    /** Text of the error */
    wcstring text;
+    
+    /** Code for the error */
+    enum parse_error_code_t code;

    /** Offset and length of the token in the source code that triggered this error */
    size_t source_start;
@ -37,87 +38,6 @@ struct parse_error_t
 };
 typedef std::vector<parse_error_t> parse_error_list_t;

-enum parse_token_type_t
-{
-    token_type_invalid,
-
-    // Non-terminal tokens
-    symbol_job_list,
-    symbol_job,
-    symbol_job_continuation,
-    symbol_statement,
-    symbol_block_statement,
-    symbol_block_header,
-    symbol_for_header,
-    symbol_while_header,
-    symbol_begin_header,
-    symbol_function_header,
-
-    symbol_if_statement,
-    symbol_if_clause,
-    symbol_else_clause,
-    symbol_else_continuation,
-
-    symbol_switch_statement,
-    symbol_case_item_list,
-    symbol_case_item,
-
-    symbol_boolean_statement,
-    symbol_decorated_statement,
-    symbol_plain_statement,
-    symbol_arguments_or_redirections_list,
-    symbol_argument_or_redirection,
-
-    symbol_argument_list,
-
-    symbol_argument,
-    symbol_redirection,
-
-    symbol_optional_background,
-
-    // Terminal types
-    parse_token_type_string,
-    parse_token_type_pipe,
-    parse_token_type_redirection,
-    parse_token_type_background,
-    parse_token_type_end,
-    parse_token_type_terminate,
-
-    // Very special terminal types that don't appear in the production list
-    parse_special_type_parse_error,
-    parse_special_type_tokenizer_error,
-    parse_special_type_comment,
-
-    FIRST_TERMINAL_TYPE = parse_token_type_string,
-    LAST_TERMINAL_TYPE = parse_token_type_terminate,
-
-    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
-    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
-};
-
-enum parse_keyword_t
-{
-    parse_keyword_none,
-    parse_keyword_if,
-    parse_keyword_else,
-    parse_keyword_for,
-    parse_keyword_in,
-    parse_keyword_while,
-    parse_keyword_begin,
-    parse_keyword_function,
-    parse_keyword_switch,
-    parse_keyword_case,
-    parse_keyword_end,
-    parse_keyword_and,
-    parse_keyword_or,
-    parse_keyword_not,
-    parse_keyword_command,
-    parse_keyword_builtin,
-    
-    LAST_KEYWORD = parse_keyword_builtin
-};
-
-
 /** A struct representing the token type that we use internally */
 struct parse_token_t
 {
@ -233,14 +153,6 @@ public:
    }
 };

-/* Statement decorations. This matches the order of productions in decorated_statement */
-enum parse_statement_decoration_t
-{
-    parse_statement_decoration_none,
-    parse_statement_decoration_command,
-    parse_statement_decoration_builtin
-};
-

 /* The parse tree itself */
 class parse_node_tree_t : public std::vector<parse_node_t>
--- a/parser.cpp
+++ b/parser.cpp
@ -86,11 +86,6 @@ The fish parser. Contains functions for parsing and evaluating code.
 */
 #define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.")

-/**
-   Error message on reaching maximum number of block calls
-*/
-#define BLOCK_ERR_MSG _( L"Maximum number of nested blocks reached.")
-
 /**
   Error message when a non-string token is found when expecting a command name
 */
@ -2572,9 +2567,9 @@ void parser_t::eval_job(tokenizer_t *tok)

 }

-int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type)
+int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type)
 {
-    const wchar_t * const cmd = cmdStr.c_str();
+    const wchar_t * const cmd = cmd_str.c_str();
    size_t forbid_count;
    int code;
    block_t *start_current_block = current_block;
@ -2597,13 +2592,6 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type

    debug(4, L"eval: %ls", cmd);

-    if (!cmd)
-    {
-        debug(1,
-              EVAL_NULL_ERR_MSG);
-        bugreport();
-        return 1;
-    }

    if ((block_type != TOP) &&
            (block_type != SUBST))
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@ -96,16 +96,8 @@ int tok_get_error(tokenizer_t *tok)

 tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
 {
-
-    /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
-    if (!(flags & TOK_SQUASH_ERRORS))
-    {
-        ASSERT_IS_MAIN_THREAD();
-    }
-
    CHECK(b,);

-
    this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
    this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
    this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
--- a/wutil.cpp
+++ b/wutil.cpp
@ -476,7 +476,7 @@ const wchar_t *wgettext(const wchar_t *in)
    {
        cstring mbs_in = wcs2string(key);
        char *out = fish_gettext(mbs_in.c_str());
-        val = new wcstring(format_string(L"%s", out));
+        val = new wcstring(format_string(L"%s", out)); //note that this writes into the map!
    }
    errno = err;
    return val->c_str();