Clean up tokenizer error handling.

2025-01-14 14:03:58 +00:00 · 2018-02-19 16:31:39 -08:00 · 2018-02-19 16:31:39 -08:00 · 0f62161b2b
commit 0f62161b2b
parent f30bf40300
3 changed files with 41 additions and 26 deletions
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@ -730,7 +730,8 @@ void parse_ll_t::report_tokenizer_error(const tok_t &tok) {
            parse_error_code = parse_error_tokenizer_unterminated_escape;
            break;
        }
-        case TOK_OTHER:
+        case TOK_INVALID_REDIRECT:
        case TOK_INVALID_PIPE:
        default: {
            parse_error_code = parse_error_tokenizer_other;
            break;
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -16,12 +16,6 @@
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 // Wow what a hack.
 #define TOK_CALL_ERROR(t, e, x, where)                               \
    do {                                                             \
        (t)->call_error((e), where, (t)->squash_errors ? L"" : (x)); \
    } while (0)
 /// Error string for unexpected end of string.
 #define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
@ -41,13 +35,38 @@
 #define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
 /// Set the latest tokens string to be the specified error message.
-void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where,
+void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where) {
-                             const wchar_t *error_message) {
+    assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
    this->last_type = TOK_ERROR;
    this->error = error_type;
    this->global_error_offset = where ? where - this->start : 0;
    this->last_token = error_message;
    this->has_next = false;
    this->global_error_offset = where ? where - this->start : 0;
    if (this->squash_errors) {
        this->last_token.clear();
    } else {
        switch (error_type) {
            case TOK_UNTERMINATED_QUOTE:
                this->last_token = QUOTE_ERROR;
                break;
            case TOK_UNTERMINATED_SUBSHELL:
                this->last_token = PARAN_ERROR;
                break;
            case TOK_UNTERMINATED_SLICE:
                this->last_token = SQUARE_BRACKET_ERROR;
                break;
            case TOK_UNTERMINATED_ESCAPE:
                this->last_token = UNTERMINATED_ESCAPE_ERROR;
                break;
            case TOK_INVALID_REDIRECT:
                this->last_token = REDIRECT_ERROR;
                break;
            case TOK_INVALID_PIPE:
                this->last_token = PIPE_ERROR;
                break;
            default:
                assert(0 && "Unknown error type");
        }
    }
 }
 tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start), start(start) {
@ -151,8 +170,7 @@ void tokenizer_t::read_string() {
                this->buff++;
                if (*this->buff == L'\0') {
                    if ((!this->accept_unfinished)) {
-                        TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, UNTERMINATED_ESCAPE_ERROR,
+                        this->call_error(TOK_UNTERMINATED_ESCAPE, error_location);
                                       error_location);
                        return;
                    }
                    // Since we are about to increment tok->buff, decrement it first so the
@ -191,8 +209,7 @@ void tokenizer_t::read_string() {
                                this->buff += wcslen(this->buff);
                                if (!this->accept_unfinished) {
-                                    TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR,
+                                    this->call_error(TOK_UNTERMINATED_QUOTE, error_loc);
                                                   error_loc);
                                    return;
                                }
                                do_loop = 0;
@ -221,8 +238,7 @@ void tokenizer_t::read_string() {
                                const wchar_t *error_loc = this->buff;
                                this->buff += wcslen(this->buff);
                                if ((!this->accept_unfinished)) {
-                                    TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR,
+                                    this->call_error(TOK_UNTERMINATED_QUOTE, error_loc);
                                                   error_loc);
                                    return;
                                }
                                do_loop = 0;
@ -298,14 +314,12 @@ void tokenizer_t::read_string() {
                    offset_of_open_paran = paran_offsets[paran_count - 1];
                }
-                TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR,
+                this->call_error(TOK_UNTERMINATED_SUBSHELL, this->start + offset_of_open_paran);
                               this->start + offset_of_open_paran);
                break;
            }
            case mode_array_brackets:
            case mode_array_brackets_and_subshell: {
-                TOK_CALL_ERROR(this, TOK_UNTERMINATED_SLICE, SQUARE_BRACKET_ERROR,
+                this->call_error(TOK_UNTERMINATED_SLICE, this->start + offset_of_bracket);
                               this->start + offset_of_bracket);
                break;
            }
            default: {
@ -551,7 +565,7 @@ bool tokenizer_t::tok_next() {
            int fd = -1;
            size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd);
            if (consumed == 0 || fd < 0) {
-                TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
+                this->call_error(TOK_INVALID_REDIRECT, this->buff);
            } else {
                this->buff += consumed;
                this->last_type = mode;
@ -574,7 +588,7 @@ bool tokenizer_t::tok_next() {
                // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer
                // error.
                if (mode == TOK_PIPE && fd == 0) {
-                    TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
+                    this->call_error(TOK_INVALID_PIPE, error_location);
                } else {
                    this->buff += consumed;
                    this->last_type = mode;
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -30,7 +30,8 @@ enum tokenizer_error {
    TOK_UNTERMINATED_SUBSHELL,
    TOK_UNTERMINATED_SLICE,
    TOK_UNTERMINATED_ESCAPE,
-    TOK_OTHER
+    TOK_INVALID_REDIRECT,
    TOK_INVALID_PIPE
 };
 /// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
@ -101,8 +102,7 @@ class tokenizer_t {
    /// Whether to continue the previous line after the comment.
    bool continue_line_after_comment{false};
-    void call_error(enum tokenizer_error error_type, const wchar_t *where,
+    void call_error(enum tokenizer_error error_type, const wchar_t *where);
                    const wchar_t *error_message);
    void read_string();
    bool tok_next();