Handle whitespace within parameter expansion tokens

From the discussion in #3802, handling spaces within braces more gracefully. Leading and trailing whitespace that isn't quoted or escaped is stripped, whitespace in the middle is preserved. Any whitespace encountered within expansion tokens is treated as a single space, similar to how programming languages that don't hard break tokens/quotes on line endings would.
2024-12-26 12:53:13 +00:00 · 2018-03-11 22:02:43 -05:00 · 2018-03-11 22:02:43 -05:00 · 24afff1c77
commit 24afff1c77
parent 364115f818
5 changed files with 30 additions and 10 deletions
--- a/src/common.cpp
+++ b/src/common.cpp
@ -1288,6 +1288,7 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
    const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
    const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);

+    bool brace_text_start = false;
    int brace_count = 0;

    bool errored = false;
@ -1359,7 +1360,9 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                }
                case L'}': {
                    if (unescape_special) {
+                        assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
                        brace_count--;
+                        brace_text_start = brace_text_start && brace_count > 0;
                        to_append_or_none = BRACE_END;
                    }
                    break;
@ -1367,14 +1370,16 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                case L',': {
                    if (unescape_special && brace_count > 0) {
                        to_append_or_none = BRACE_SEP;
+                        brace_text_start = false;
                    }
                    break;
                }
+                case L'\n':
+                case L'\t':
                case L' ': {
-                    //spaces, unless quoted or escaped, are ignored within braces
-                    // if (unescape_special && brace_count > 0) {
-                    //     input_position++; //skip the space
-                    // }
+                    if (unescape_special && brace_count > 0) {
+                        to_append_or_none = brace_text_start ? BRACE_SPACE : NOT_A_WCHAR;
+                    }
                    break;
                }
                case L'\'': {
@ -1387,7 +1392,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                    to_append_or_none = unescape_special ? wint_t(INTERNAL_SEPARATOR) : NOT_A_WCHAR;
                    break;
                }
-                default: { break; }
+                default: {
+                    if (unescape_special && brace_count > 0) {
+                        brace_text_start = true;
+                    }
+                    break;
+                }
            }
        } else if (mode == mode_single_quotes) {
            if (c == L'\\') {
--- a/src/expand.cpp
+++ b/src/expand.cpp
@ -936,12 +936,20 @@ static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
        if (brace_count == 0 && ((*pos == BRACE_SEP) || (pos == brace_end))) {
            assert(pos >= item_begin);
            size_t item_len = pos - item_begin;
+            wcstring item = wcstring(item_begin, item_len);
+            item = trim(item, (const wchar_t[]) { BRACE_SPACE });
+            for (auto &c : item) {
+                if (c == BRACE_SPACE) {
+                    c = ' ';
+                }
+            }

            wcstring whole_item;
            whole_item.reserve(tot_len + item_len + 2);
            whole_item.append(in, length_preceding_braces);
-            whole_item.append(item_begin, item_len);
+            whole_item.append(item.begin(), item.end());
            whole_item.append(brace_end + 1);
+            whole_item = trim(whole_item, (const wchar_t[]) { BRACE_SPACE });
            expand_braces(whole_item, flags, out, errors);

            item_begin = pos + 1;
--- a/src/expand.h
+++ b/src/expand.h
@ -70,6 +70,8 @@ enum {
    BRACE_END,
    /// Character representing separation between two bracket elements.
    BRACE_SEP,
+    /// Character that takes the place of any whitespace within non-quoted text in braces
+    BRACE_SPACE,
    /// Separate subtokens in a token with this character.
    INTERNAL_SEPARATOR,
    /// Character representing an empty variable expansion. Only used transitively while expanding
--- a/src/wcstringutil.cpp
+++ b/src/wcstringutil.cpp
@ -46,12 +46,12 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
    return output;
 }

-wcstring trim(const wcstring &input) {
-    auto begin_offset = input.find_first_not_of(whitespace);
+wcstring trim(const wcstring &input, const wchar_t *any_of) {
+    auto begin_offset = input.find_first_not_of(any_of);
    if (begin_offset == wcstring::npos) {
        return wcstring{};
    }
-    auto end = input.cbegin() + input.find_last_not_of(whitespace);
+    auto end = input.cbegin() + input.find_last_not_of(any_of);

    wcstring result(input.begin() + begin_offset, end + 1);
    return result;
--- a/src/wcstringutil.h
+++ b/src/wcstringutil.h
@ -59,6 +59,6 @@ enum class ellipsis_type {
 };

 wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
-wcstring trim(const wcstring &input);
+wcstring trim(const wcstring &input, const wchar_t *any_of);

 #endif