Merge branch 'fix_brace_parsing'

Closes #3802 and improves tokenizer handling of invalid expressions involving braces, parentheses, and brackets.
2024-12-27 05:13:10 +00:00 · 2018-03-12 07:05:27 -05:00 · 2018-03-12 07:05:27 -05:00 · d385248cc8
commit d385248cc8
parent d367d57ae9 1e5d7d98a8
16 changed files with 360 additions and 315 deletions
--- a/src/common.cpp
+++ b/src/common.cpp
@ -1288,10 +1288,11 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
    const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
    const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
-    int bracket_count = 0;
+    bool brace_text_start = false;
    int brace_count = 0;
    bool errored = false;
-    enum { mode_unquoted, mode_single_quotes, mode_double_quotes } mode = mode_unquoted;
+    enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted;
    for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
        const wchar_t c = input[input_position];
@ -1352,21 +1353,32 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                }
                case L'{': {
                    if (unescape_special) {
-                        bracket_count++;
+                        brace_count++;
-                        to_append_or_none = BRACKET_BEGIN;
+                        to_append_or_none = BRACE_BEGIN;
                    }
                    break;
                }
                case L'}': {
                    if (unescape_special) {
-                        bracket_count--;
+                        assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
-                        to_append_or_none = BRACKET_END;
+                        brace_count--;
                        brace_text_start = brace_text_start && brace_count > 0;
                        to_append_or_none = BRACE_END;
                    }
                    break;
                }
                case L',': {
-                    if (unescape_special && bracket_count > 0) {
+                    if (unescape_special && brace_count > 0) {
-                        to_append_or_none = BRACKET_SEP;
+                        to_append_or_none = BRACE_SEP;
                        brace_text_start = false;
                    }
                    break;
                }
                case L'\n':
                case L'\t':
                case L' ': {
                    if (unescape_special && brace_count > 0) {
                        to_append_or_none = brace_text_start ? BRACE_SPACE : NOT_A_WCHAR;
                    }
                    break;
                }
@ -1380,7 +1392,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                    to_append_or_none = unescape_special ? wint_t(INTERNAL_SEPARATOR) : NOT_A_WCHAR;
                    break;
                }
-                default: { break; }
+                default: {
                    if (unescape_special && brace_count > 0) {
                        brace_text_start = true;
                    }
                    break;
                }
            }
        } else if (mode == mode_single_quotes) {
            if (c == L'\\') {
--- a/src/common.h
+++ b/src/common.h
@ -807,6 +807,19 @@ struct enum_map {
    const wchar_t *const str;
 };
 /// Use for scoped enums (i.e. `enum class`) with bitwise operations
 #define ENUM_FLAG_OPERATOR(T,X,Y) \
 inline T operator X (T lhs, T rhs) { return (T) (static_cast<std::underlying_type<T>::type>(lhs) X static_cast<std::underlying_type<T>::type>(rhs)); } \
 inline T operator Y (T &lhs, T rhs) { return lhs = (T) (static_cast<std::underlying_type<T>::type>(lhs) X static_cast<std::underlying_type<T>::type>(rhs)); }
 #define ENUM_FLAGS(T) \
 enum class T; \
 inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type<T>::type>(t)); } \
 ENUM_FLAG_OPERATOR(T,|,|=) \
 ENUM_FLAG_OPERATOR(T,^,^=) \
 ENUM_FLAG_OPERATOR(T,&,&=) \
 enum class T
 /// Given a string return the matching enum. Return the sentinal enum if no match is made. The map
 /// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16
 /// elements in the map.
--- a/src/expand.cpp
+++ b/src/expand.cpp
@ -47,6 +47,7 @@
 #include "proc.h"
 #include "reader.h"
 #include "wildcard.h"
 #include "wcstringutil.h"
 #include "wutil.h"  // IWYU pragma: keep
 #ifdef KERN_PROCARGS2
 #else
@ -570,7 +571,7 @@ static void find_process(const wchar_t *proc, expand_flags_t flags,
 static size_t parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector<long> &idx,
                          std::vector<size_t> &source_positions, size_t array_size) {
    const long size = (long)array_size;
-    size_t pos = 1;  // skip past the opening square bracket
+    size_t pos = 1;  // skip past the opening square brace
    while (1) {
        while (iswspace(in[pos]) || (in[pos] == INTERNAL_SEPARATOR)) pos++;
@ -846,39 +847,39 @@ static bool expand_variables(const wcstring &instr, std::vector<completion_t> *o
    return true;
 }
-/// Perform bracket expansion.
+/// Perform brace expansion.
-static expand_error_t expand_brackets(const wcstring &instr, expand_flags_t flags,
+static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
                                      std::vector<completion_t> *out, parse_error_list_t *errors) {
    bool syntax_error = false;
-    int bracket_count = 0;
+    int brace_count = 0;
-    const wchar_t *bracket_begin = NULL, *bracket_end = NULL;
+    const wchar_t *brace_begin = NULL, *brace_end = NULL;
    const wchar_t *last_sep = NULL;
    const wchar_t *item_begin;
-    size_t length_preceding_brackets, length_following_brackets, tot_len;
+    size_t length_preceding_braces, length_following_braces, tot_len;
    const wchar_t *const in = instr.c_str();
-    // Locate the first non-nested bracket pair.
+    // Locate the first non-nested brace pair.
    for (const wchar_t *pos = in; (*pos) && !syntax_error; pos++) {
        switch (*pos) {
-            case BRACKET_BEGIN: {
+            case BRACE_BEGIN: {
-                if (bracket_count == 0) bracket_begin = pos;
+                if (brace_count == 0) brace_begin = pos;
-                bracket_count++;
+                brace_count++;
                break;
            }
-            case BRACKET_END: {
+            case BRACE_END: {
-                bracket_count--;
+                brace_count--;
-                if (bracket_count < 0) {
+                if (brace_count < 0) {
                    syntax_error = true;
-                } else if (bracket_count == 0) {
+                } else if (brace_count == 0) {
-                    bracket_end = pos;
+                    brace_end = pos;
                }
                break;
            }
-            case BRACKET_SEP: {
+            case BRACE_SEP: {
-                if (bracket_count == 1) last_sep = pos;
+                if (brace_count == 1) last_sep = pos;
                break;
            }
            default: {
@ -887,72 +888,80 @@ static expand_error_t expand_brackets(const wcstring &instr, expand_flags_t flag
        }
    }
-    if (bracket_count > 0) {
+    if (brace_count > 0) {
        if (!(flags & EXPAND_FOR_COMPLETIONS)) {
            syntax_error = true;
        } else {
-            // The user hasn't typed an end bracket yet; make one up and append it, then expand
+            // The user hasn't typed an end brace yet; make one up and append it, then expand
            // that.
            wcstring mod;
            if (last_sep) {
-                mod.append(in, bracket_begin - in + 1);
+                mod.append(in, brace_begin - in + 1);
                mod.append(last_sep + 1);
-                mod.push_back(BRACKET_END);
+                mod.push_back(BRACE_END);
            } else {
                mod.append(in);
-                mod.push_back(BRACKET_END);
+                mod.push_back(BRACE_END);
            }
            // Note: this code looks very fishy, apparently it has never worked.
-            return expand_brackets(mod, 1, out, errors);
+            return expand_braces(mod, 1, out, errors);
        }
    }
    // Expand a literal "{}" to itself because it is useless otherwise,
    // and this eases e.g. `find -exec {}`. See #1109.
-    if (bracket_begin + 1 == bracket_end) {
+    if (brace_begin + 1 == brace_end) {
        wcstring newstr = instr;
-        newstr.at(bracket_begin - in) = L'{';
+        newstr.at(brace_begin - in) = L'{';
-        newstr.at(bracket_end - in) = L'}';
+        newstr.at(brace_end - in) = L'}';
-        return expand_brackets(newstr, flags, out, errors);
+        return expand_braces(newstr, flags, out, errors);
    }
    if (syntax_error) {
-        append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, _(L"Mismatched brackets"));
+        append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, _(L"Mismatched braces"));
        return EXPAND_ERROR;
    }
-    if (bracket_begin == NULL) {
+    if (brace_begin == NULL) {
        append_completion(out, instr);
        return EXPAND_OK;
    }
-    length_preceding_brackets = (bracket_begin - in);
+    length_preceding_braces = (brace_begin - in);
-    length_following_brackets = wcslen(bracket_end) - 1;
+    length_following_braces = wcslen(brace_end) - 1;
-    tot_len = length_preceding_brackets + length_following_brackets;
+    tot_len = length_preceding_braces + length_following_braces;
-    item_begin = bracket_begin + 1;
+    item_begin = brace_begin + 1;
-    for (const wchar_t *pos = (bracket_begin + 1); true; pos++) {
+    for (const wchar_t *pos = (brace_begin + 1); true; pos++) {
-        if (bracket_count == 0 && ((*pos == BRACKET_SEP) || (pos == bracket_end))) {
+        if (brace_count == 0 && ((*pos == BRACE_SEP) || (pos == brace_end))) {
            assert(pos >= item_begin);
            size_t item_len = pos - item_begin;
            wcstring item = wcstring(item_begin, item_len);
            item = trim(item, (const wchar_t[]) { BRACE_SPACE });
            for (auto &c : item) {
                if (c == BRACE_SPACE) {
                    c = ' ';
                }
            }
            wcstring whole_item;
            whole_item.reserve(tot_len + item_len + 2);
-            whole_item.append(in, length_preceding_brackets);
+            whole_item.append(in, length_preceding_braces);
-            whole_item.append(item_begin, item_len);
+            whole_item.append(item.begin(), item.end());
-            whole_item.append(bracket_end + 1);
+            whole_item.append(brace_end + 1);
-            expand_brackets(whole_item, flags, out, errors);
+            whole_item = trim(whole_item, (const wchar_t[]) { BRACE_SPACE });
            expand_braces(whole_item, flags, out, errors);
            item_begin = pos + 1;
-            if (pos == bracket_end) break;
+            if (pos == brace_end) break;
        }
-        if (*pos == BRACKET_BEGIN) {
+        if (*pos == BRACE_BEGIN) {
-            bracket_count++;
+            brace_count++;
        }
-        if (*pos == BRACKET_END) {
+        if (*pos == BRACE_END) {
-            bracket_count--;
+            brace_count--;
        }
    }
    return EXPAND_OK;
@ -1274,9 +1283,9 @@ static expand_error_t expand_stage_variables(const wcstring &input, std::vector<
    return EXPAND_OK;
 }
-static expand_error_t expand_stage_brackets(const wcstring &input, std::vector<completion_t> *out,
+static expand_error_t expand_stage_braces(const wcstring &input, std::vector<completion_t> *out,
                                            expand_flags_t flags, parse_error_list_t *errors) {
-    return expand_brackets(input, flags, out, errors);
+    return expand_braces(input, flags, out, errors);
 }
 static expand_error_t expand_stage_home(const wcstring &input,
@ -1393,7 +1402,7 @@ expand_error_t expand_string(const wcstring &input, std::vector<completion_t> *o
    // Our expansion stages.
    const expand_stage_t stages[] = {expand_stage_cmdsubst, expand_stage_variables,
-                                     expand_stage_brackets, expand_stage_home,
+                                     expand_stage_braces, expand_stage_home,
                                     expand_stage_wildcards};
    // Load up our single initial completion.
--- a/src/expand.h
+++ b/src/expand.h
@ -65,11 +65,13 @@ enum {
    /// Character representing variable expansion into a single element.
    VARIABLE_EXPAND_SINGLE,
    /// Character representing the start of a bracket expansion.
-    BRACKET_BEGIN,
+    BRACE_BEGIN,
    /// Character representing the end of a bracket expansion.
-    BRACKET_END,
+    BRACE_END,
    /// Character representing separation between two bracket elements.
-    BRACKET_SEP,
+    BRACE_SEP,
    /// Character that takes the place of any whitespace within non-quoted text in braces
    BRACE_SPACE,
    /// Separate subtokens in a token with this character.
    INTERNAL_SEPARATOR,
    /// Character representing an empty variable expansion. Only used transitively while expanding
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -578,6 +578,15 @@ static void test_tokenizer() {
        do_test(token.error_offset == 3);
    }
    {
        tokenizer_t t(L"abc )defg(hij", 0);
        do_test(t.next(&token));
        do_test(t.next(&token));
        do_test(token.type == TOK_ERROR);
        do_test(token.error == TOK_CLOSING_UNOPENED_SUBSHELL);
        do_test(token.error_offset == 4);
    }
    {
        tokenizer_t t(L"abc defg(hij (klm)", 0);
        do_test(t.next(&token));
@ -4420,10 +4429,11 @@ static void test_illegal_command_exit_code() {
    const command_result_tuple_t tests[] = {
        {L"echo -n", STATUS_CMD_OK}, {L"pwd", STATUS_CMD_OK},
-        {L")", STATUS_ILLEGAL_CMD},  {L") ", STATUS_ILLEGAL_CMD},
+        // a `)` without a matching `(` is now a tokenizer error, and cannot be executed even as an illegal command
        // {L")", STATUS_ILLEGAL_CMD},  {L") ", STATUS_ILLEGAL_CMD}, {L") ", STATUS_ILLEGAL_CMD}
        {L"*", STATUS_ILLEGAL_CMD},  {L"**", STATUS_ILLEGAL_CMD},
        {L"?", STATUS_ILLEGAL_CMD},  {L"abc?def", STATUS_ILLEGAL_CMD},
-        {L") ", STATUS_ILLEGAL_CMD}};
+    };
    int res = 0;
    const io_chain_t empty_ios;
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@ -122,9 +122,9 @@ bool is_potential_path(const wcstring &potential_path_fragment, const wcstring_l
        switch (c) {
            case VARIABLE_EXPAND:
            case VARIABLE_EXPAND_SINGLE:
-            case BRACKET_BEGIN:
+            case BRACE_BEGIN:
-            case BRACKET_END:
+            case BRACE_END:
-            case BRACKET_SEP:
+            case BRACE_SEP:
            case ANY_CHAR:
            case ANY_STRING:
            case ANY_STRING_RECURSIVE: {
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@ -169,6 +169,7 @@ enum parse_error_code_t {
    parse_error_tokenizer_unterminated_subshell,
    parse_error_tokenizer_unterminated_slice,
    parse_error_tokenizer_unterminated_escape,
    parse_error_tokenizer_nested_slice,
    parse_error_tokenizer_other,
    parse_error_unbalancing_end,   // end outside of block
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@ -668,35 +668,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
 }
 void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_t &tok) {
-    parse_error_code_t parse_error_code;
+    parse_error_code_t parse_error_code = tok.error->parser_error;
    switch (tok.error) {
        case TOK_UNTERMINATED_QUOTE: {
            parse_error_code = parse_error_tokenizer_unterminated_quote;
            break;
        }
        case TOK_UNTERMINATED_SUBSHELL: {
            parse_error_code = parse_error_tokenizer_unterminated_subshell;
            break;
        }
        case TOK_UNTERMINATED_SLICE: {
            parse_error_code = parse_error_tokenizer_unterminated_slice;
            break;
        }
        case TOK_UNTERMINATED_ESCAPE: {
            parse_error_code = parse_error_tokenizer_unterminated_escape;
            break;
        }
        case TOK_INVALID_REDIRECT:
        case TOK_INVALID_PIPE:
        default: {
            parse_error_code = parse_error_tokenizer_other;
            break;
        }
    }
    this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
                                  parse_error_code, L"%ls",
-                                  error_message_for_code(tok.error).c_str());
+                                  tok.error->Message);
 }
 void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@ -834,14 +834,14 @@ void parse_util_expand_variable_error(const wcstring &token, size_t global_token
    wchar_t char_after_dollar = dollar_pos + 1 >= token.size() ? 0 : token.at(dollar_pos + 1);
    switch (char_after_dollar) {
-        case BRACKET_BEGIN:
+        case BRACE_BEGIN:
        case L'{': {
-            // The BRACKET_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
+            // The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
            // quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
            // report a bracket error. Otherwise just complain about the ${.
            bool looks_like_variable = false;
            size_t closing_bracket =
-                token.find(char_after_dollar == L'{' ? L'}' : wchar_t(BRACKET_END), dollar_pos + 2);
+                token.find(char_after_dollar == L'{' ? L'}' : wchar_t(BRACE_END), dollar_pos + 2);
            wcstring var_name;
            if (closing_bracket != wcstring::npos) {
                size_t var_start = dollar_pos + 2, var_end = closing_bracket;
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -16,46 +16,22 @@
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
-/// Error string for unexpected end of string.
+tokenizer_error *TOK_ERROR_NONE = new tokenizer_error(L"");
-#define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
+tokenizer_error *TOK_UNTERMINATED_QUOTE = new tokenizer_error((L"Unexpected end of string, quotes are not balanced"), parse_error_tokenizer_unterminated_quote);
-
+tokenizer_error *TOK_UNTERMINATED_SUBSHELL = new tokenizer_error((L"Unexpected end of string, expecting ')'"), parse_error_tokenizer_unterminated_subshell);
-/// Error string for mismatched parenthesis.
+tokenizer_error *TOK_UNTERMINATED_SLICE = new tokenizer_error((L"Unexpected end of string, square brackets do not match"), parse_error_tokenizer_unterminated_slice);
-#define PARAN_ERROR _(L"Unexpected end of string, parenthesis do not match")
+tokenizer_error *TOK_UNTERMINATED_ESCAPE = new tokenizer_error((L"Unexpected end of string, incomplete escape sequence"), parse_error_tokenizer_unterminated_escape);
-
+tokenizer_error *TOK_INVALID_REDIRECT = new tokenizer_error((L"Invalid input/output redirection"));
-/// Error string for mismatched square brackets.
+tokenizer_error *TOK_INVALID_PIPE = new tokenizer_error((L"Cannot use stdin (fd 0) as pipe output"));
-#define SQUARE_BRACKET_ERROR _(L"Unexpected end of string, square brackets do not match")
+tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL = new tokenizer_error((L"Unexpected ')' for unopened parenthesis"));
-
+tokenizer_error *TOK_ILLEGAL_SLICE = new tokenizer_error((L"Unexpected '[' at this location"));
-/// Error string for unterminated escape (backslash without continuation).
+tokenizer_error *TOK_CLOSING_UNOPENED_BRACE = new tokenizer_error((L"Unexpected '}' for unopened brace expansion"));
-#define UNTERMINATED_ESCAPE_ERROR _(L"Unexpected end of string, incomplete escape sequence")
+tokenizer_error *TOK_UNTERMINATED_BRACE = new tokenizer_error((L"Unexpected end of string, incomplete parameter expansion"));
-
+tokenizer_error *TOK_EXPECTED_PCLOSE_FOUND_BCLOSE = new tokenizer_error((L"Unexpected '}' found, expecting ')'"));
-/// Error string for invalid redirections.
+tokenizer_error *TOK_EXPECTED_BCLOSE_FOUND_PCLOSE = new tokenizer_error((L"Unexpected ')' found, expecting '}'"));
 #define REDIRECT_ERROR _(L"Invalid input/output redirection")
 /// Error string for when trying to pipe from fd 0.
 #define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
 wcstring error_message_for_code(tokenizer_error err) {
    switch (err) {
        case TOK_UNTERMINATED_QUOTE:
            return QUOTE_ERROR;
        case TOK_UNTERMINATED_SUBSHELL:
            return PARAN_ERROR;
        case TOK_UNTERMINATED_SLICE:
            return SQUARE_BRACKET_ERROR;
        case TOK_UNTERMINATED_ESCAPE:
            return UNTERMINATED_ESCAPE_ERROR;
        case TOK_INVALID_REDIRECT:
            return REDIRECT_ERROR;
        case TOK_INVALID_PIPE:
            return PIPE_ERROR;
        default:
            assert(0 && "Unknown error type");
            return {};
    }
 }
 /// Return an error token and mark that we no longer have a next token.
-tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
+tok_t tokenizer_t::call_error(tokenizer_error *error_type, const wchar_t *token_start,
                              const wchar_t *error_loc) {
    assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
    assert(error_loc >= token_start && "Invalid error location");
@ -119,194 +95,166 @@ static bool tok_is_string_character(wchar_t c, bool is_first) {
 /// Quick test to catch the most common 'non-magical' characters, makes read_string slightly faster
 /// by adding a fast path for the most common characters. This is obviously not a suitable
 /// replacement for iswalpha.
-static int myal(wchar_t c) { return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); }
+static inline int myal(wchar_t c) { return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); }
 ENUM_FLAGS(tok_mode) {
    regular_text = 0,    // regular text
    subshell = 1 << 0,        // inside of subshell parentheses
    array_brackets = 1 << 1,  // inside of array brackets
    curly_braces = 1 << 2,
    char_escape = 1 << 3,
 };
 /// Read the next token as a string.
 tok_t tokenizer_t::read_string() {
-    bool do_loop = true;
+    tok_mode mode { tok_mode::regular_text };
-    size_t paran_count = 0;
+    std::vector<int> paran_offsets;
-    // Up to 96 open parens, before we give up on good error reporting.
+    std::vector<int> brace_offsets;
-    const size_t paran_offsets_max = 96;
+    std::vector<char> expecting;
-    size_t paran_offsets[paran_offsets_max];
+    int slice_offset = 0;
    // Where the open bracket is.
    size_t offset_of_bracket = 0;
    const wchar_t *const buff_start = this->buff;
    bool is_first = true;
-    enum tok_mode_t {
+    while (true) {
-        mode_regular_text = 0,    // regular text
+        wchar_t c = *this->buff;
-        mode_subshell = 1,        // inside of subshell
+#if false
-        mode_array_brackets = 2,  // inside of array brackets
+        wcstring msg = L"Handling 0x%x (%lc)";
-        mode_array_brackets_and_subshell =
+        tok_mode mode_begin = mode;
-            3  // inside of array brackets and subshell, like in '$foo[(ech'
+#endif
    } mode = mode_regular_text;
-    while (1) {
+        if (c == L'\0') {
-        if (!myal(*this->buff)) {
+            break;
            if (*this->buff == L'\\') {
                const wchar_t *error_location = this->buff;
                this->buff++;
                if (*this->buff == L'\0') {
                    if ((!this->accept_unfinished)) {
                        return this->call_error(TOK_UNTERMINATED_ESCAPE, buff_start,
                                                error_location);
                    }
                    // Since we are about to increment tok->buff, decrement it first so the
                    // increment doesn't go past the end of the buffer. See issue #389.
                    this->buff--;
                    do_loop = 0;
                }
                this->buff++;
                continue;
            }
            switch (mode) {
                case mode_regular_text: {
                    switch (*this->buff) {
                        case L'(': {
                            paran_count = 1;
                            paran_offsets[0] = this->buff - this->start;
                            mode = mode_subshell;
                            break;
                        }
                        case L'[': {
                            if (this->buff != buff_start) {
                                mode = mode_array_brackets;
                                offset_of_bracket = this->buff - this->start;
                            }
                            break;
                        }
                        case L'\'':
                        case L'"': {
                            const wchar_t *end = quote_end(this->buff);
                            if (end) {
                                this->buff = end;
                            } else {
                                const wchar_t *error_loc = this->buff;
                                this->buff += wcslen(this->buff);
                                if (!this->accept_unfinished) {
                                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start,
                                                            error_loc);
                                }
                                do_loop = 0;
                            }
                            break;
                        }
                        default: {
                            if (!tok_is_string_character(*(this->buff), is_first)) {
                                do_loop = 0;
                            }
                            break;
                        }
                    }
                    break;
                }
                case mode_array_brackets_and_subshell:
                case mode_subshell: {
                    switch (*this->buff) {
                        case L'\'':
                        case L'\"': {
                            const wchar_t *end = quote_end(this->buff);
                            if (end) {
                                this->buff = end;
                            } else {
                                const wchar_t *error_loc = this->buff;
                                this->buff += wcslen(this->buff);
                                if ((!this->accept_unfinished)) {
                                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start,
                                                            error_loc);
                                }
                                do_loop = 0;
                            }
                            break;
                        }
                        case L'(': {
                            if (paran_count < paran_offsets_max) {
                                paran_offsets[paran_count] = this->buff - this->start;
                            }
                            paran_count++;
                            break;
                        }
                        case L')': {
                            assert(paran_count > 0);
                            paran_count--;
                            if (paran_count == 0) {
                                mode =
                                    (mode == mode_array_brackets_and_subshell ? mode_array_brackets
                                                                              : mode_regular_text);
                            }
                            break;
                        }
                        case L'\0': {
                            do_loop = 0;
                            break;
                        }
                        default: {
                            break;  // ignore other chars
                        }
                    }
                    break;
                }
                case mode_array_brackets: {
                    switch (*this->buff) {
                        case L'(': {
                            paran_count = 1;
                            paran_offsets[0] = this->buff - this->start;
                            mode = mode_array_brackets_and_subshell;
                            break;
                        }
                        case L']': {
                            mode = mode_regular_text;
                            break;
                        }
                        case L'\0': {
                            do_loop = 0;
                            break;
                        }
                        default: {
                            break;  // ignore other chars
                        }
                    }
                    break;
                }
            }
        }
-        if (!do_loop) break;
+        // Make sure this character isn't being escaped before anything else
        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
            mode &= ~(tok_mode::char_escape);
            // and do nothing more
        }
        else if (myal(c)) {
            // Early exit optimization in case the character is just a letter,
            // which has no special meaning to the tokenizer, i.e. the same mode continues.
        }
        // Now proceed with the evaluation of the token, first checking to see if the token
        // has been explicitly ignored (escaped).
        else if (c == L'\\') {
            mode |= tok_mode::char_escape;
        }
        else if (c == L'(') {
            paran_offsets.push_back(this->buff - this->start);
            expecting.push_back(L')');
            mode |= tok_mode::subshell;
        }
        else if (c == L'{') {
            brace_offsets.push_back(this->buff - this->start);
            expecting.push_back(L'}');
            mode |= tok_mode::curly_braces;
        }
        else if (c == L')') {
            if (expecting.size() > 0 && expecting.back() == L'}') {
                return this->call_error(TOK_EXPECTED_BCLOSE_FOUND_PCLOSE, this->start, this->buff);
            }
            switch (paran_offsets.size()) {
                case 0:
                    return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, this->start, this->buff);
                case 1:
                    mode &= ~(tok_mode::subshell);
                default:
                    paran_offsets.pop_back();
            }
            expecting.pop_back();
        }
        else if (c == L'}') {
            if (expecting.size() > 0 && expecting.back() == L')') {
                return this->call_error(TOK_EXPECTED_PCLOSE_FOUND_BCLOSE, this->start, this->buff);
            }
            switch (brace_offsets.size()) {
                case 0:
                    return this->call_error(TOK_CLOSING_UNOPENED_BRACE, this->start, this->buff);
                case 1:
                    mode &= ~(tok_mode::curly_braces);
                default:
                    brace_offsets.pop_back();
            }
            expecting.pop_back();
        }
        else if (c == L'[') {
            if (this->buff != buff_start) {
                if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
                    // Nested brackets should not overwrite the existing slice_offset
                    //mqudsi: TOK_ILLEGAL_SLICE is the right error here, but the shell
                    //prints an error message with the caret pointing at token_start,
                    //not err_loc, making the TOK_ILLEGAL_SLICE message misleading.
                    // return call_error(TOK_ILLEGAL_SLICE, buff_start, this->buff);
                    return this->call_error(TOK_UNTERMINATED_SLICE, this->start, this->buff);
                }
                slice_offset = this->buff - this->start;
                mode |= tok_mode::array_brackets;
            }
            else {
                // This is actually allowed so the test operator `[` can be used as the head of a command
            }
        }
        // Only exit bracket mode if we are in bracket mode.
        // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias.
        // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket
        else if (c == L']' && ((mode & tok_mode::array_brackets) == tok_mode::array_brackets)) {
            mode &= ~(tok_mode::array_brackets);
        }
        else if (c == L'\'' || c == L'"') {
            const wchar_t *end = quote_end(this->buff);
            if (end) {
                this->buff = end;
            } else {
                const wchar_t *error_loc = this->buff;
                this->buff += wcslen(this->buff);
                if ((!this->accept_unfinished)) {
                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start, error_loc);
                }
                break;
            }
        }
        else if (mode == tok_mode::regular_text && !tok_is_string_character(c, is_first)) {
            break;
        }
 #if false
        if (mode != mode_begin) {
            msg.append(L": mode 0x%x -> 0x%x\n");
        } else {
            msg.push_back(L'\n');
        }
        debug(0, msg.c_str(), c, c, int(mode_begin), int(mode));
 #endif
        this->buff++;
        is_first = false;
    }
-    if ((!this->accept_unfinished) && (mode != mode_regular_text)) {
+    if ((!this->accept_unfinished) && (mode != tok_mode::regular_text)) {
        tok_t error;
-        switch (mode) {
+        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
-            case mode_subshell: {
+            error = this->call_error(TOK_UNTERMINATED_ESCAPE, buff_start,
-                // Determine the innermost opening paran offset by interrogating paran_offsets.
+                    this->buff - 1);
-                assert(paran_count > 0);
+        }
-                size_t offset_of_open_paran = 0;
+        else if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
-                if (paran_count <= paran_offsets_max) {
+            error = this->call_error(TOK_UNTERMINATED_SLICE, buff_start,
-                    offset_of_open_paran = paran_offsets[paran_count - 1];
+                    this->start + slice_offset);
-                }
+        }
        else if ((mode & tok_mode::subshell) == tok_mode::subshell) {
            assert(paran_offsets.size() > 0);
            size_t offset_of_open_paran = paran_offsets.back();
-                error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
+            error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
-                                         this->start + offset_of_open_paran);
+                    this->start + offset_of_open_paran);
-                break;
+        }
-            }
+        else if ((mode & tok_mode::curly_braces) == tok_mode::curly_braces) {
-            case mode_array_brackets:
+            assert(brace_offsets.size() > 0);
-            case mode_array_brackets_and_subshell: {
+            size_t offset_of_open_brace = brace_offsets.back();
-                error = this->call_error(TOK_UNTERMINATED_SLICE, buff_start,
+
-                                         this->start + offset_of_bracket);
+            error = this->call_error(TOK_UNTERMINATED_BRACE, buff_start,
-                break;
+                    this->start + offset_of_open_brace);
            }
            default: {
                DIE("unexpected mode in read_string");
                break;
            }
        }
        return error;
    }
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -7,6 +7,7 @@
 #include "common.h"
 #include "maybe.h"
 #include "parse_constants.h"
 /// Token types.
 enum token_type {
@ -22,17 +23,26 @@ enum token_type {
    TOK_COMMENT      /// comment token
 };
-/// Tokenizer error types.
+struct tokenizer_error {
-enum tokenizer_error {
+    const wchar_t *Message;
-    TOK_ERROR_NONE,
+    enum parse_error_code_t parser_error; //the parser error associated with this tokenizer error
-    TOK_UNTERMINATED_QUOTE,
+    tokenizer_error(const wchar_t *msg, enum parse_error_code_t perr = parse_error_tokenizer_other)
-    TOK_UNTERMINATED_SUBSHELL,
+        : Message(msg), parser_error(perr) {}
-    TOK_UNTERMINATED_SLICE,
+    tokenizer_error(const tokenizer_error&) = delete;
    TOK_UNTERMINATED_ESCAPE,
    TOK_INVALID_REDIRECT,
    TOK_INVALID_PIPE
 };
 extern tokenizer_error *TOK_ERROR_NONE;
 extern tokenizer_error *TOK_UNTERMINATED_QUOTE;
 extern tokenizer_error *TOK_UNTERMINATED_SUBSHELL;
 extern tokenizer_error *TOK_UNTERMINATED_SLICE;
 extern tokenizer_error *TOK_UNTERMINATED_ESCAPE;
 extern tokenizer_error *TOK_UNTERMINATED_BRACE;
 extern tokenizer_error *TOK_INVALID_REDIRECT;
 extern tokenizer_error *TOK_INVALID_PIPE;
 extern tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL;
 extern tokenizer_error *TOK_CLOSING_UNOPENED_BRACE;
 extern tokenizer_error *TOK_ILLEGAL_SLICE;
 enum class redirection_type_t {
    overwrite,  // normal redirection: > file.txt
    append,     // appending redirection: >> file.txt
@ -67,7 +77,7 @@ struct tok_t {
    maybe_t<int> redirected_fd{};
    // If an error, this is the error code.
-    enum tokenizer_error error { TOK_ERROR_NONE };
+    tokenizer_error *error { TOK_ERROR_NONE };
    // If an error, this is the offset of the error within the token. A value of 0 means it occurred
    // at 'offset'.
@ -97,7 +107,7 @@ class tokenizer_t {
    /// Whether to continue the previous line after the comment.
    bool continue_line_after_comment{false};
-    tok_t call_error(enum tokenizer_error error_type, const wchar_t *token_start,
+    tok_t call_error(tokenizer_error *error_type, const wchar_t *token_start,
                     const wchar_t *error_loc);
    tok_t read_string();
    maybe_t<tok_t> tok_next();
--- a/src/wcstringutil.cpp
+++ b/src/wcstringutil.cpp
@ -45,3 +45,14 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
    output.push_back(ellipsis_char);
    return output;
 }
 wcstring trim(const wcstring &input, const wchar_t *any_of) {
    auto begin_offset = input.find_first_not_of(any_of);
    if (begin_offset == wcstring::npos) {
        return wcstring{};
    }
    auto end = input.cbegin() + input.find_last_not_of(any_of);
    wcstring result(input.begin() + begin_offset, end + 1);
    return result;
 }
--- a/src/wcstringutil.h
+++ b/src/wcstringutil.h
@ -59,5 +59,6 @@ enum class ellipsis_type {
 };
 wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
 wcstring trim(const wcstring &input, const wchar_t *any_of);
 #endif
--- a/tests/parameter_expansion.err
+++ b/tests/parameter_expansion.err
--- a/tests/parameter_expansion.in
+++ b/tests/parameter_expansion.in
@ -0,0 +1,34 @@
 # basic expansion test
 echo {}
 echo {apple}
 echo {apple,orange}
 # expansion tests with spaces
 echo {apple, orange}
 echo { apple, orange, banana }
 # expansion with spaces and cartesian products
 echo \'{ hello , world }\'
 # expansion with escapes
 for phrase in {good\,,   beautiful ,morning}; echo -n "$phrase "; end | string trim;
 for phrase in {goodbye\,,\ cruel\ ,world\n}; echo -n $phrase; end;
 # whitespace within entries converted to spaces in a single entry
 for foo in { hello
 world }
 	echo \'$foo\'
 end
 # dual expansion cartesian product
 echo { alpha, beta }\ {lambda, gamma }, | sed -r 's/(.*),/\1/'
 # expansion with subshells
 for name in { (echo Meg), (echo Jo) }
 	echo $name
 end
 # subshells with expansion
 for name in (for name in {Beth, Amy}; printf "$name\n"; end); printf "$name\n"; end
 # vim: set ft=fish:
--- a/tests/parameter_expansion.out
+++ b/tests/parameter_expansion.out
@ -0,0 +1,14 @@
 {}
 apple
 apple orange
 apple orange
 apple orange banana
 'hello' 'world'
 good, beautiful morning
 goodbye, cruel world
 'hello world'
 alpha lambda, beta lambda, alpha gamma, beta gamma
 Meg
 Jo
 Beth
 Amy