Merge branch 'fix_brace_parsing'

Closes #3802 and improves tokenizer handling of invalid expressions involving braces, parentheses, and brackets.
2024-12-26 12:53:13 +00:00 · 2018-03-12 07:05:27 -05:00 · 2018-03-12 07:05:27 -05:00 · d385248cc8
commit d385248cc8
parent d367d57ae9 1e5d7d98a8
16 changed files with 360 additions and 315 deletions
--- a/src/common.cpp
+++ b/src/common.cpp
@ -1288,10 +1288,11 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
    const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
    const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);

-    int bracket_count = 0;
+    bool brace_text_start = false;
+    int brace_count = 0;

    bool errored = false;
-    enum { mode_unquoted, mode_single_quotes, mode_double_quotes } mode = mode_unquoted;
+    enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted;

    for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
        const wchar_t c = input[input_position];
@ -1352,21 +1353,32 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                }
                case L'{': {
                    if (unescape_special) {
-                        bracket_count++;
-                        to_append_or_none = BRACKET_BEGIN;
+                        brace_count++;
+                        to_append_or_none = BRACE_BEGIN;
                    }
                    break;
                }
                case L'}': {
                    if (unescape_special) {
-                        bracket_count--;
-                        to_append_or_none = BRACKET_END;
+                        assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
+                        brace_count--;
+                        brace_text_start = brace_text_start && brace_count > 0;
+                        to_append_or_none = BRACE_END;
                    }
                    break;
                }
                case L',': {
-                    if (unescape_special && bracket_count > 0) {
-                        to_append_or_none = BRACKET_SEP;
+                    if (unescape_special && brace_count > 0) {
+                        to_append_or_none = BRACE_SEP;
+                        brace_text_start = false;
+                    }
+                    break;
+                }
+                case L'\n':
+                case L'\t':
+                case L' ': {
+                    if (unescape_special && brace_count > 0) {
+                        to_append_or_none = brace_text_start ? BRACE_SPACE : NOT_A_WCHAR;
                    }
                    break;
                }
@ -1380,7 +1392,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                    to_append_or_none = unescape_special ? wint_t(INTERNAL_SEPARATOR) : NOT_A_WCHAR;
                    break;
                }
-                default: { break; }
+                default: {
+                    if (unescape_special && brace_count > 0) {
+                        brace_text_start = true;
+                    }
+                    break;
+                }
            }
        } else if (mode == mode_single_quotes) {
            if (c == L'\\') {
--- a/src/common.h
+++ b/src/common.h
@ -807,6 +807,19 @@ struct enum_map {
    const wchar_t *const str;
 };

+
+/// Use for scoped enums (i.e. `enum class`) with bitwise operations
+#define ENUM_FLAG_OPERATOR(T,X,Y) \
+inline T operator X (T lhs, T rhs) { return (T) (static_cast<std::underlying_type<T>::type>(lhs) X static_cast<std::underlying_type<T>::type>(rhs)); } \
+inline T operator Y (T &lhs, T rhs) { return lhs = (T) (static_cast<std::underlying_type<T>::type>(lhs) X static_cast<std::underlying_type<T>::type>(rhs)); }
+#define ENUM_FLAGS(T) \
+enum class T; \
+inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type<T>::type>(t)); } \
+ENUM_FLAG_OPERATOR(T,|,|=) \
+ENUM_FLAG_OPERATOR(T,^,^=) \
+ENUM_FLAG_OPERATOR(T,&,&=) \
+enum class T
+
 /// Given a string return the matching enum. Return the sentinal enum if no match is made. The map
 /// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16
 /// elements in the map.
--- a/src/expand.cpp
+++ b/src/expand.cpp
@ -47,6 +47,7 @@
 #include "proc.h"
 #include "reader.h"
 #include "wildcard.h"
+#include "wcstringutil.h"
 #include "wutil.h"  // IWYU pragma: keep
 #ifdef KERN_PROCARGS2
 #else
@ -570,7 +571,7 @@ static void find_process(const wchar_t *proc, expand_flags_t flags,
 static size_t parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector<long> &idx,
                          std::vector<size_t> &source_positions, size_t array_size) {
    const long size = (long)array_size;
-    size_t pos = 1;  // skip past the opening square bracket
+    size_t pos = 1;  // skip past the opening square brace

    while (1) {
        while (iswspace(in[pos]) || (in[pos] == INTERNAL_SEPARATOR)) pos++;
@ -846,39 +847,39 @@ static bool expand_variables(const wcstring &instr, std::vector<completion_t> *o
    return true;
 }

-/// Perform bracket expansion.
-static expand_error_t expand_brackets(const wcstring &instr, expand_flags_t flags,
+/// Perform brace expansion.
+static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
                                      std::vector<completion_t> *out, parse_error_list_t *errors) {
    bool syntax_error = false;
-    int bracket_count = 0;
+    int brace_count = 0;

-    const wchar_t *bracket_begin = NULL, *bracket_end = NULL;
+    const wchar_t *brace_begin = NULL, *brace_end = NULL;
    const wchar_t *last_sep = NULL;

    const wchar_t *item_begin;
-    size_t length_preceding_brackets, length_following_brackets, tot_len;
+    size_t length_preceding_braces, length_following_braces, tot_len;

    const wchar_t *const in = instr.c_str();

-    // Locate the first non-nested bracket pair.
+    // Locate the first non-nested brace pair.
    for (const wchar_t *pos = in; (*pos) && !syntax_error; pos++) {
        switch (*pos) {
-            case BRACKET_BEGIN: {
-                if (bracket_count == 0) bracket_begin = pos;
-                bracket_count++;
+            case BRACE_BEGIN: {
+                if (brace_count == 0) brace_begin = pos;
+                brace_count++;
                break;
            }
-            case BRACKET_END: {
-                bracket_count--;
-                if (bracket_count < 0) {
+            case BRACE_END: {
+                brace_count--;
+                if (brace_count < 0) {
                    syntax_error = true;
-                } else if (bracket_count == 0) {
-                    bracket_end = pos;
+                } else if (brace_count == 0) {
+                    brace_end = pos;
                }
                break;
            }
-            case BRACKET_SEP: {
-                if (bracket_count == 1) last_sep = pos;
+            case BRACE_SEP: {
+                if (brace_count == 1) last_sep = pos;
                break;
            }
            default: {
@ -887,72 +888,80 @@ static expand_error_t expand_brackets(const wcstring &instr, expand_flags_t flag
        }
    }

-    if (bracket_count > 0) {
+    if (brace_count > 0) {
        if (!(flags & EXPAND_FOR_COMPLETIONS)) {
            syntax_error = true;
        } else {
-            // The user hasn't typed an end bracket yet; make one up and append it, then expand
+            // The user hasn't typed an end brace yet; make one up and append it, then expand
            // that.
            wcstring mod;
            if (last_sep) {
-                mod.append(in, bracket_begin - in + 1);
+                mod.append(in, brace_begin - in + 1);
                mod.append(last_sep + 1);
-                mod.push_back(BRACKET_END);
+                mod.push_back(BRACE_END);
            } else {
                mod.append(in);
-                mod.push_back(BRACKET_END);
+                mod.push_back(BRACE_END);
            }

            // Note: this code looks very fishy, apparently it has never worked.
-            return expand_brackets(mod, 1, out, errors);
+            return expand_braces(mod, 1, out, errors);
        }
    }

    // Expand a literal "{}" to itself because it is useless otherwise,
    // and this eases e.g. `find -exec {}`. See #1109.
-    if (bracket_begin + 1 == bracket_end) {
+    if (brace_begin + 1 == brace_end) {
        wcstring newstr = instr;
-        newstr.at(bracket_begin - in) = L'{';
-        newstr.at(bracket_end - in) = L'}';
-        return expand_brackets(newstr, flags, out, errors);
+        newstr.at(brace_begin - in) = L'{';
+        newstr.at(brace_end - in) = L'}';
+        return expand_braces(newstr, flags, out, errors);
    }

    if (syntax_error) {
-        append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, _(L"Mismatched brackets"));
+        append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, _(L"Mismatched braces"));
        return EXPAND_ERROR;
    }

-    if (bracket_begin == NULL) {
+    if (brace_begin == NULL) {
        append_completion(out, instr);
        return EXPAND_OK;
    }

-    length_preceding_brackets = (bracket_begin - in);
-    length_following_brackets = wcslen(bracket_end) - 1;
-    tot_len = length_preceding_brackets + length_following_brackets;
-    item_begin = bracket_begin + 1;
-    for (const wchar_t *pos = (bracket_begin + 1); true; pos++) {
-        if (bracket_count == 0 && ((*pos == BRACKET_SEP) || (pos == bracket_end))) {
+    length_preceding_braces = (brace_begin - in);
+    length_following_braces = wcslen(brace_end) - 1;
+    tot_len = length_preceding_braces + length_following_braces;
+    item_begin = brace_begin + 1;
+    for (const wchar_t *pos = (brace_begin + 1); true; pos++) {
+        if (brace_count == 0 && ((*pos == BRACE_SEP) || (pos == brace_end))) {
            assert(pos >= item_begin);
            size_t item_len = pos - item_begin;
+            wcstring item = wcstring(item_begin, item_len);
+            item = trim(item, (const wchar_t[]) { BRACE_SPACE });
+            for (auto &c : item) {
+                if (c == BRACE_SPACE) {
+                    c = ' ';
+                }
+            }

            wcstring whole_item;
            whole_item.reserve(tot_len + item_len + 2);
-            whole_item.append(in, length_preceding_brackets);
-            whole_item.append(item_begin, item_len);
-            whole_item.append(bracket_end + 1);
-            expand_brackets(whole_item, flags, out, errors);
+            whole_item.append(in, length_preceding_braces);
+            whole_item.append(item.begin(), item.end());
+            whole_item.append(brace_end + 1);
+            whole_item = trim(whole_item, (const wchar_t[]) { BRACE_SPACE });
+            expand_braces(whole_item, flags, out, errors);

            item_begin = pos + 1;
-            if (pos == bracket_end) break;
+            if (pos == brace_end) break;
        }

-        if (*pos == BRACKET_BEGIN) {
-            bracket_count++;
+        if (*pos == BRACE_BEGIN) {
+            brace_count++;
        }

-        if (*pos == BRACKET_END) {
-            bracket_count--;
+        if (*pos == BRACE_END) {
+            brace_count--;
        }
    }
    return EXPAND_OK;
@ -1274,9 +1283,9 @@ static expand_error_t expand_stage_variables(const wcstring &input, std::vector<
    return EXPAND_OK;
 }

-static expand_error_t expand_stage_brackets(const wcstring &input, std::vector<completion_t> *out,
+static expand_error_t expand_stage_braces(const wcstring &input, std::vector<completion_t> *out,
                                            expand_flags_t flags, parse_error_list_t *errors) {
-    return expand_brackets(input, flags, out, errors);
+    return expand_braces(input, flags, out, errors);
 }

 static expand_error_t expand_stage_home(const wcstring &input,
@ -1393,7 +1402,7 @@ expand_error_t expand_string(const wcstring &input, std::vector<completion_t> *o

    // Our expansion stages.
    const expand_stage_t stages[] = {expand_stage_cmdsubst, expand_stage_variables,
-                                     expand_stage_brackets, expand_stage_home,
+                                     expand_stage_braces, expand_stage_home,
                                     expand_stage_wildcards};

    // Load up our single initial completion.
--- a/src/expand.h
+++ b/src/expand.h
@ -65,11 +65,13 @@ enum {
    /// Character representing variable expansion into a single element.
    VARIABLE_EXPAND_SINGLE,
    /// Character representing the start of a bracket expansion.
-    BRACKET_BEGIN,
+    BRACE_BEGIN,
    /// Character representing the end of a bracket expansion.
-    BRACKET_END,
+    BRACE_END,
    /// Character representing separation between two bracket elements.
-    BRACKET_SEP,
+    BRACE_SEP,
+    /// Character that takes the place of any whitespace within non-quoted text in braces
+    BRACE_SPACE,
    /// Separate subtokens in a token with this character.
    INTERNAL_SEPARATOR,
    /// Character representing an empty variable expansion. Only used transitively while expanding
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -578,6 +578,15 @@ static void test_tokenizer() {
        do_test(token.error_offset == 3);
    }

+    {
+        tokenizer_t t(L"abc )defg(hij", 0);
+        do_test(t.next(&token));
+        do_test(t.next(&token));
+        do_test(token.type == TOK_ERROR);
+        do_test(token.error == TOK_CLOSING_UNOPENED_SUBSHELL);
+        do_test(token.error_offset == 4);
+    }
+
    {
        tokenizer_t t(L"abc defg(hij (klm)", 0);
        do_test(t.next(&token));
@ -4420,10 +4429,11 @@ static void test_illegal_command_exit_code() {

    const command_result_tuple_t tests[] = {
        {L"echo -n", STATUS_CMD_OK}, {L"pwd", STATUS_CMD_OK},
-        {L")", STATUS_ILLEGAL_CMD},  {L") ", STATUS_ILLEGAL_CMD},
+        // a `)` without a matching `(` is now a tokenizer error, and cannot be executed even as an illegal command
+        // {L")", STATUS_ILLEGAL_CMD},  {L") ", STATUS_ILLEGAL_CMD}, {L") ", STATUS_ILLEGAL_CMD}
        {L"*", STATUS_ILLEGAL_CMD},  {L"**", STATUS_ILLEGAL_CMD},
        {L"?", STATUS_ILLEGAL_CMD},  {L"abc?def", STATUS_ILLEGAL_CMD},
-        {L") ", STATUS_ILLEGAL_CMD}};
+    };

    int res = 0;
    const io_chain_t empty_ios;
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@ -122,9 +122,9 @@ bool is_potential_path(const wcstring &potential_path_fragment, const wcstring_l
        switch (c) {
            case VARIABLE_EXPAND:
            case VARIABLE_EXPAND_SINGLE:
-            case BRACKET_BEGIN:
-            case BRACKET_END:
-            case BRACKET_SEP:
+            case BRACE_BEGIN:
+            case BRACE_END:
+            case BRACE_SEP:
            case ANY_CHAR:
            case ANY_STRING:
            case ANY_STRING_RECURSIVE: {
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@ -169,6 +169,7 @@ enum parse_error_code_t {
    parse_error_tokenizer_unterminated_subshell,
    parse_error_tokenizer_unterminated_slice,
    parse_error_tokenizer_unterminated_escape,
+    parse_error_tokenizer_nested_slice,
    parse_error_tokenizer_other,

    parse_error_unbalancing_end,   // end outside of block
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@ -668,35 +668,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
 }

 void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_t &tok) {
-    parse_error_code_t parse_error_code;
-    switch (tok.error) {
-        case TOK_UNTERMINATED_QUOTE: {
-            parse_error_code = parse_error_tokenizer_unterminated_quote;
-            break;
-        }
-        case TOK_UNTERMINATED_SUBSHELL: {
-            parse_error_code = parse_error_tokenizer_unterminated_subshell;
-            break;
-        }
-        case TOK_UNTERMINATED_SLICE: {
-            parse_error_code = parse_error_tokenizer_unterminated_slice;
-            break;
-        }
-        case TOK_UNTERMINATED_ESCAPE: {
-            parse_error_code = parse_error_tokenizer_unterminated_escape;
-            break;
-        }
-        case TOK_INVALID_REDIRECT:
-        case TOK_INVALID_PIPE:
-        default: {
-            parse_error_code = parse_error_tokenizer_other;
-            break;
-        }
-    }
-
+    parse_error_code_t parse_error_code = tok.error->parser_error;
    this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
                                  parse_error_code, L"%ls",
-                                  error_message_for_code(tok.error).c_str());
+                                  tok.error->Message);
 }

 void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@ -834,14 +834,14 @@ void parse_util_expand_variable_error(const wcstring &token, size_t global_token
    wchar_t char_after_dollar = dollar_pos + 1 >= token.size() ? 0 : token.at(dollar_pos + 1);

    switch (char_after_dollar) {
-        case BRACKET_BEGIN:
+        case BRACE_BEGIN:
        case L'{': {
-            // The BRACKET_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
+            // The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
            // quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
            // report a bracket error. Otherwise just complain about the ${.
            bool looks_like_variable = false;
            size_t closing_bracket =
-                token.find(char_after_dollar == L'{' ? L'}' : wchar_t(BRACKET_END), dollar_pos + 2);
+                token.find(char_after_dollar == L'{' ? L'}' : wchar_t(BRACE_END), dollar_pos + 2);
            wcstring var_name;
            if (closing_bracket != wcstring::npos) {
                size_t var_start = dollar_pos + 2, var_end = closing_bracket;
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -16,46 +16,22 @@
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep

-/// Error string for unexpected end of string.
-#define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
-
-/// Error string for mismatched parenthesis.
-#define PARAN_ERROR _(L"Unexpected end of string, parenthesis do not match")
-
-/// Error string for mismatched square brackets.
-#define SQUARE_BRACKET_ERROR _(L"Unexpected end of string, square brackets do not match")
-
-/// Error string for unterminated escape (backslash without continuation).
-#define UNTERMINATED_ESCAPE_ERROR _(L"Unexpected end of string, incomplete escape sequence")
-
-/// Error string for invalid redirections.
-#define REDIRECT_ERROR _(L"Invalid input/output redirection")
-
-/// Error string for when trying to pipe from fd 0.
-#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
-
-wcstring error_message_for_code(tokenizer_error err) {
-    switch (err) {
-        case TOK_UNTERMINATED_QUOTE:
-            return QUOTE_ERROR;
-        case TOK_UNTERMINATED_SUBSHELL:
-            return PARAN_ERROR;
-        case TOK_UNTERMINATED_SLICE:
-            return SQUARE_BRACKET_ERROR;
-        case TOK_UNTERMINATED_ESCAPE:
-            return UNTERMINATED_ESCAPE_ERROR;
-        case TOK_INVALID_REDIRECT:
-            return REDIRECT_ERROR;
-        case TOK_INVALID_PIPE:
-            return PIPE_ERROR;
-        default:
-            assert(0 && "Unknown error type");
-            return {};
-    }
-}
+tokenizer_error *TOK_ERROR_NONE = new tokenizer_error(L"");
+tokenizer_error *TOK_UNTERMINATED_QUOTE = new tokenizer_error((L"Unexpected end of string, quotes are not balanced"), parse_error_tokenizer_unterminated_quote);
+tokenizer_error *TOK_UNTERMINATED_SUBSHELL = new tokenizer_error((L"Unexpected end of string, expecting ')'"), parse_error_tokenizer_unterminated_subshell);
+tokenizer_error *TOK_UNTERMINATED_SLICE = new tokenizer_error((L"Unexpected end of string, square brackets do not match"), parse_error_tokenizer_unterminated_slice);
+tokenizer_error *TOK_UNTERMINATED_ESCAPE = new tokenizer_error((L"Unexpected end of string, incomplete escape sequence"), parse_error_tokenizer_unterminated_escape);
+tokenizer_error *TOK_INVALID_REDIRECT = new tokenizer_error((L"Invalid input/output redirection"));
+tokenizer_error *TOK_INVALID_PIPE = new tokenizer_error((L"Cannot use stdin (fd 0) as pipe output"));
+tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL = new tokenizer_error((L"Unexpected ')' for unopened parenthesis"));
+tokenizer_error *TOK_ILLEGAL_SLICE = new tokenizer_error((L"Unexpected '[' at this location"));
+tokenizer_error *TOK_CLOSING_UNOPENED_BRACE = new tokenizer_error((L"Unexpected '}' for unopened brace expansion"));
+tokenizer_error *TOK_UNTERMINATED_BRACE = new tokenizer_error((L"Unexpected end of string, incomplete parameter expansion"));
+tokenizer_error *TOK_EXPECTED_PCLOSE_FOUND_BCLOSE = new tokenizer_error((L"Unexpected '}' found, expecting ')'"));
+tokenizer_error *TOK_EXPECTED_BCLOSE_FOUND_PCLOSE = new tokenizer_error((L"Unexpected ')' found, expecting '}'"));

 /// Return an error token and mark that we no longer have a next token.
-tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
+tok_t tokenizer_t::call_error(tokenizer_error *error_type, const wchar_t *token_start,
                              const wchar_t *error_loc) {
    assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
    assert(error_loc >= token_start && "Invalid error location");
@ -119,194 +95,166 @@ static bool tok_is_string_character(wchar_t c, bool is_first) {
 /// Quick test to catch the most common 'non-magical' characters, makes read_string slightly faster
 /// by adding a fast path for the most common characters. This is obviously not a suitable
 /// replacement for iswalpha.
-static int myal(wchar_t c) { return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); }
+static inline int myal(wchar_t c) { return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); }
+
+ENUM_FLAGS(tok_mode) {
+    regular_text = 0,    // regular text
+    subshell = 1 << 0,        // inside of subshell parentheses
+    array_brackets = 1 << 1,  // inside of array brackets
+    curly_braces = 1 << 2,
+    char_escape = 1 << 3,
+};

 /// Read the next token as a string.
 tok_t tokenizer_t::read_string() {
-    bool do_loop = true;
-    size_t paran_count = 0;
-    // Up to 96 open parens, before we give up on good error reporting.
-    const size_t paran_offsets_max = 96;
-    size_t paran_offsets[paran_offsets_max];
-    // Where the open bracket is.
-    size_t offset_of_bracket = 0;
+    tok_mode mode { tok_mode::regular_text };
+    std::vector<int> paran_offsets;
+    std::vector<int> brace_offsets;
+    std::vector<char> expecting;
+    int slice_offset = 0;
    const wchar_t *const buff_start = this->buff;
    bool is_first = true;

-    enum tok_mode_t {
-        mode_regular_text = 0,    // regular text
-        mode_subshell = 1,        // inside of subshell
-        mode_array_brackets = 2,  // inside of array brackets
-        mode_array_brackets_and_subshell =
-            3  // inside of array brackets and subshell, like in '$foo[(ech'
-    } mode = mode_regular_text;
+    while (true) {
+        wchar_t c = *this->buff;
+#if false
+        wcstring msg = L"Handling 0x%x (%lc)";
+        tok_mode mode_begin = mode;
+#endif

-    while (1) {
-        if (!myal(*this->buff)) {
-            if (*this->buff == L'\\') {
-                const wchar_t *error_location = this->buff;
-                this->buff++;
-                if (*this->buff == L'\0') {
-                    if ((!this->accept_unfinished)) {
-                        return this->call_error(TOK_UNTERMINATED_ESCAPE, buff_start,
-                                                error_location);
-                    }
-                    // Since we are about to increment tok->buff, decrement it first so the
-                    // increment doesn't go past the end of the buffer. See issue #389.
-                    this->buff--;
-                    do_loop = 0;
-                }
-
-                this->buff++;
-                continue;
-            }
-
-            switch (mode) {
-                case mode_regular_text: {
-                    switch (*this->buff) {
-                        case L'(': {
-                            paran_count = 1;
-                            paran_offsets[0] = this->buff - this->start;
-                            mode = mode_subshell;
-                            break;
-                        }
-                        case L'[': {
-                            if (this->buff != buff_start) {
-                                mode = mode_array_brackets;
-                                offset_of_bracket = this->buff - this->start;
-                            }
-                            break;
-                        }
-                        case L'\'':
-                        case L'"': {
-                            const wchar_t *end = quote_end(this->buff);
-                            if (end) {
-                                this->buff = end;
-                            } else {
-                                const wchar_t *error_loc = this->buff;
-                                this->buff += wcslen(this->buff);
-
-                                if (!this->accept_unfinished) {
-                                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start,
-                                                            error_loc);
-                                }
-                                do_loop = 0;
-                            }
-                            break;
-                        }
-                        default: {
-                            if (!tok_is_string_character(*(this->buff), is_first)) {
-                                do_loop = 0;
-                            }
-                            break;
-                        }
-                    }
-                    break;
-                }
-
-                case mode_array_brackets_and_subshell:
-                case mode_subshell: {
-                    switch (*this->buff) {
-                        case L'\'':
-                        case L'\"': {
-                            const wchar_t *end = quote_end(this->buff);
-                            if (end) {
-                                this->buff = end;
-                            } else {
-                                const wchar_t *error_loc = this->buff;
-                                this->buff += wcslen(this->buff);
-                                if ((!this->accept_unfinished)) {
-                                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start,
-                                                            error_loc);
-                                }
-                                do_loop = 0;
-                            }
-                            break;
-                        }
-                        case L'(': {
-                            if (paran_count < paran_offsets_max) {
-                                paran_offsets[paran_count] = this->buff - this->start;
-                            }
-                            paran_count++;
-                            break;
-                        }
-                        case L')': {
-                            assert(paran_count > 0);
-                            paran_count--;
-                            if (paran_count == 0) {
-                                mode =
-                                    (mode == mode_array_brackets_and_subshell ? mode_array_brackets
-                                                                              : mode_regular_text);
-                            }
-                            break;
-                        }
-                        case L'\0': {
-                            do_loop = 0;
-                            break;
-                        }
-                        default: {
-                            break;  // ignore other chars
-                        }
-                    }
-                    break;
-                }
-
-                case mode_array_brackets: {
-                    switch (*this->buff) {
-                        case L'(': {
-                            paran_count = 1;
-                            paran_offsets[0] = this->buff - this->start;
-                            mode = mode_array_brackets_and_subshell;
-                            break;
-                        }
-                        case L']': {
-                            mode = mode_regular_text;
-                            break;
-                        }
-                        case L'\0': {
-                            do_loop = 0;
-                            break;
-                        }
-                        default: {
-                            break;  // ignore other chars
-                        }
-                    }
-                    break;
-                }
-            }
+        if (c == L'\0') {
+            break;
        }

-        if (!do_loop) break;
+        // Make sure this character isn't being escaped before anything else
+        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
+            mode &= ~(tok_mode::char_escape);
+            // and do nothing more
+        }
+        else if (myal(c)) {
+            // Early exit optimization in case the character is just a letter,
+            // which has no special meaning to the tokenizer, i.e. the same mode continues.
+        }
+
+        // Now proceed with the evaluation of the token, first checking to see if the token
+        // has been explicitly ignored (escaped).
+        else if (c == L'\\') {
+            mode |= tok_mode::char_escape;
+        }
+        else if (c == L'(') {
+            paran_offsets.push_back(this->buff - this->start);
+            expecting.push_back(L')');
+            mode |= tok_mode::subshell;
+        }
+        else if (c == L'{') {
+            brace_offsets.push_back(this->buff - this->start);
+            expecting.push_back(L'}');
+            mode |= tok_mode::curly_braces;
+        }
+        else if (c == L')') {
+            if (expecting.size() > 0 && expecting.back() == L'}') {
+                return this->call_error(TOK_EXPECTED_BCLOSE_FOUND_PCLOSE, this->start, this->buff);
+            }
+            switch (paran_offsets.size()) {
+                case 0:
+                    return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, this->start, this->buff);
+                case 1:
+                    mode &= ~(tok_mode::subshell);
+                default:
+                    paran_offsets.pop_back();
+            }
+            expecting.pop_back();
+        }
+        else if (c == L'}') {
+            if (expecting.size() > 0 && expecting.back() == L')') {
+                return this->call_error(TOK_EXPECTED_PCLOSE_FOUND_BCLOSE, this->start, this->buff);
+            }
+            switch (brace_offsets.size()) {
+                case 0:
+                    return this->call_error(TOK_CLOSING_UNOPENED_BRACE, this->start, this->buff);
+                case 1:
+                    mode &= ~(tok_mode::curly_braces);
+                default:
+                    brace_offsets.pop_back();
+            }
+            expecting.pop_back();
+        }
+        else if (c == L'[') {
+            if (this->buff != buff_start) {
+                if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
+                    // Nested brackets should not overwrite the existing slice_offset
+                    //mqudsi: TOK_ILLEGAL_SLICE is the right error here, but the shell
+                    //prints an error message with the caret pointing at token_start,
+                    //not err_loc, making the TOK_ILLEGAL_SLICE message misleading.
+                    // return call_error(TOK_ILLEGAL_SLICE, buff_start, this->buff);
+                    return this->call_error(TOK_UNTERMINATED_SLICE, this->start, this->buff);
+                }
+                slice_offset = this->buff - this->start;
+                mode |= tok_mode::array_brackets;
+            }
+            else {
+                // This is actually allowed so the test operator `[` can be used as the head of a command
+            }
+        }
+        // Only exit bracket mode if we are in bracket mode.
+        // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias.
+        // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket
+        else if (c == L']' && ((mode & tok_mode::array_brackets) == tok_mode::array_brackets)) {
+            mode &= ~(tok_mode::array_brackets);
+        }
+        else if (c == L'\'' || c == L'"') {
+            const wchar_t *end = quote_end(this->buff);
+            if (end) {
+                this->buff = end;
+            } else {
+                const wchar_t *error_loc = this->buff;
+                this->buff += wcslen(this->buff);
+                if ((!this->accept_unfinished)) {
+                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start, error_loc);
+                }
+                break;
+            }
+        }
+        else if (mode == tok_mode::regular_text && !tok_is_string_character(c, is_first)) {
+            break;
+        }
+
+#if false
+        if (mode != mode_begin) {
+            msg.append(L": mode 0x%x -> 0x%x\n");
+        } else {
+            msg.push_back(L'\n');
+        }
+        debug(0, msg.c_str(), c, c, int(mode_begin), int(mode));
+#endif

        this->buff++;
        is_first = false;
    }

-    if ((!this->accept_unfinished) && (mode != mode_regular_text)) {
+    if ((!this->accept_unfinished) && (mode != tok_mode::regular_text)) {
        tok_t error;
-        switch (mode) {
-            case mode_subshell: {
-                // Determine the innermost opening paran offset by interrogating paran_offsets.
-                assert(paran_count > 0);
-                size_t offset_of_open_paran = 0;
-                if (paran_count <= paran_offsets_max) {
-                    offset_of_open_paran = paran_offsets[paran_count - 1];
-                }
+        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
+            error = this->call_error(TOK_UNTERMINATED_ESCAPE, buff_start,
+                    this->buff - 1);
+        }
+        else if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
+            error = this->call_error(TOK_UNTERMINATED_SLICE, buff_start,
+                    this->start + slice_offset);
+        }
+        else if ((mode & tok_mode::subshell) == tok_mode::subshell) {
+            assert(paran_offsets.size() > 0);
+            size_t offset_of_open_paran = paran_offsets.back();

-                error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
-                                         this->start + offset_of_open_paran);
-                break;
-            }
-            case mode_array_brackets:
-            case mode_array_brackets_and_subshell: {
-                error = this->call_error(TOK_UNTERMINATED_SLICE, buff_start,
-                                         this->start + offset_of_bracket);
-                break;
-            }
-            default: {
-                DIE("unexpected mode in read_string");
-                break;
-            }
+            error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
+                    this->start + offset_of_open_paran);
+        }
+        else if ((mode & tok_mode::curly_braces) == tok_mode::curly_braces) {
+            assert(brace_offsets.size() > 0);
+            size_t offset_of_open_brace = brace_offsets.back();
+
+            error = this->call_error(TOK_UNTERMINATED_BRACE, buff_start,
+                    this->start + offset_of_open_brace);
        }
        return error;
    }
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -7,6 +7,7 @@

 #include "common.h"
 #include "maybe.h"
+#include "parse_constants.h"

 /// Token types.
 enum token_type {
@ -22,17 +23,26 @@ enum token_type {
    TOK_COMMENT      /// comment token
 };

-/// Tokenizer error types.
-enum tokenizer_error {
-    TOK_ERROR_NONE,
-    TOK_UNTERMINATED_QUOTE,
-    TOK_UNTERMINATED_SUBSHELL,
-    TOK_UNTERMINATED_SLICE,
-    TOK_UNTERMINATED_ESCAPE,
-    TOK_INVALID_REDIRECT,
-    TOK_INVALID_PIPE
+struct tokenizer_error {
+    const wchar_t *Message;
+    enum parse_error_code_t parser_error; //the parser error associated with this tokenizer error
+    tokenizer_error(const wchar_t *msg, enum parse_error_code_t perr = parse_error_tokenizer_other)
+        : Message(msg), parser_error(perr) {}
+    tokenizer_error(const tokenizer_error&) = delete;
 };

+extern tokenizer_error *TOK_ERROR_NONE;
+extern tokenizer_error *TOK_UNTERMINATED_QUOTE;
+extern tokenizer_error *TOK_UNTERMINATED_SUBSHELL;
+extern tokenizer_error *TOK_UNTERMINATED_SLICE;
+extern tokenizer_error *TOK_UNTERMINATED_ESCAPE;
+extern tokenizer_error *TOK_UNTERMINATED_BRACE;
+extern tokenizer_error *TOK_INVALID_REDIRECT;
+extern tokenizer_error *TOK_INVALID_PIPE;
+extern tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL;
+extern tokenizer_error *TOK_CLOSING_UNOPENED_BRACE;
+extern tokenizer_error *TOK_ILLEGAL_SLICE;
+
 enum class redirection_type_t {
    overwrite,  // normal redirection: > file.txt
    append,     // appending redirection: >> file.txt
@ -67,7 +77,7 @@ struct tok_t {
    maybe_t<int> redirected_fd{};

    // If an error, this is the error code.
-    enum tokenizer_error error { TOK_ERROR_NONE };
+    tokenizer_error *error { TOK_ERROR_NONE };

    // If an error, this is the offset of the error within the token. A value of 0 means it occurred
    // at 'offset'.
@ -97,7 +107,7 @@ class tokenizer_t {
    /// Whether to continue the previous line after the comment.
    bool continue_line_after_comment{false};

-    tok_t call_error(enum tokenizer_error error_type, const wchar_t *token_start,
+    tok_t call_error(tokenizer_error *error_type, const wchar_t *token_start,
                     const wchar_t *error_loc);
    tok_t read_string();
    maybe_t<tok_t> tok_next();
--- a/src/wcstringutil.cpp
+++ b/src/wcstringutil.cpp
@ -45,3 +45,14 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
    output.push_back(ellipsis_char);
    return output;
 }
+
+wcstring trim(const wcstring &input, const wchar_t *any_of) {
+    auto begin_offset = input.find_first_not_of(any_of);
+    if (begin_offset == wcstring::npos) {
+        return wcstring{};
+    }
+    auto end = input.cbegin() + input.find_last_not_of(any_of);
+
+    wcstring result(input.begin() + begin_offset, end + 1);
+    return result;
+}
--- a/src/wcstringutil.h
+++ b/src/wcstringutil.h
@ -59,5 +59,6 @@ enum class ellipsis_type {
 };

 wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
+wcstring trim(const wcstring &input, const wchar_t *any_of);

 #endif
--- a/tests/parameter_expansion.err
+++ b/tests/parameter_expansion.err
--- a/tests/parameter_expansion.in
+++ b/tests/parameter_expansion.in
@ -0,0 +1,34 @@
+# basic expansion test
+echo {}
+echo {apple}
+echo {apple,orange}
+
+# expansion tests with spaces
+echo {apple, orange}
+echo { apple, orange, banana }
+
+# expansion with spaces and cartesian products
+echo \'{ hello , world }\'
+
+# expansion with escapes
+for phrase in {good\,,   beautiful ,morning}; echo -n "$phrase "; end | string trim;
+for phrase in {goodbye\,,\ cruel\ ,world\n}; echo -n $phrase; end;
+
+# whitespace within entries converted to spaces in a single entry
+for foo in { hello
+world }
+	echo \'$foo\'
+end
+
+# dual expansion cartesian product
+echo { alpha, beta }\ {lambda, gamma }, | sed -r 's/(.*),/\1/'
+
+# expansion with subshells
+for name in { (echo Meg), (echo Jo) }
+	echo $name
+end
+
+# subshells with expansion
+for name in (for name in {Beth, Amy}; printf "$name\n"; end); printf "$name\n"; end
+
+# vim: set ft=fish:
--- a/tests/parameter_expansion.out
+++ b/tests/parameter_expansion.out
@ -0,0 +1,14 @@
+{}
+apple
+apple orange
+apple orange
+apple orange banana
+'hello' 'world'
+good, beautiful morning
+goodbye, cruel world
+'hello world'
+alpha lambda, beta lambda, alpha gamma, beta gamma
+Meg
+Jo
+Beth
+Amy