Handle whitespace within parameter expansion tokens

From the discussion in #3802, handling spaces within braces more
gracefully. Leading and trailing whitespace that isn't quoted or escaped
is stripped, whitespace in the middle is preserved. Any whitespace
encountered within expansion tokens is treated as a single space,
similar to how programming languages that don't hard break tokens/quotes
on line endings would.
This commit is contained in:
Mahmoud Al-Qudsi 2018-03-11 22:02:43 -05:00
parent 364115f818
commit 24afff1c77
5 changed files with 30 additions and 10 deletions

View file

@ -1288,6 +1288,7 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
bool brace_text_start = false;
int brace_count = 0;
bool errored = false;
@ -1359,7 +1360,9 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
}
case L'}': {
if (unescape_special) {
assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
brace_count--;
brace_text_start = brace_text_start && brace_count > 0;
to_append_or_none = BRACE_END;
}
break;
@ -1367,14 +1370,16 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
case L',': {
if (unescape_special && brace_count > 0) {
to_append_or_none = BRACE_SEP;
brace_text_start = false;
}
break;
}
case L'\n':
case L'\t':
case L' ': {
//spaces, unless quoted or escaped, are ignored within braces
// if (unescape_special && brace_count > 0) {
// input_position++; //skip the space
// }
if (unescape_special && brace_count > 0) {
to_append_or_none = brace_text_start ? BRACE_SPACE : NOT_A_WCHAR;
}
break;
}
case L'\'': {
@ -1387,7 +1392,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
to_append_or_none = unescape_special ? wint_t(INTERNAL_SEPARATOR) : NOT_A_WCHAR;
break;
}
default: { break; }
default: {
if (unescape_special && brace_count > 0) {
brace_text_start = true;
}
break;
}
}
} else if (mode == mode_single_quotes) {
if (c == L'\\') {

View file

@ -936,12 +936,20 @@ static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
if (brace_count == 0 && ((*pos == BRACE_SEP) || (pos == brace_end))) {
assert(pos >= item_begin);
size_t item_len = pos - item_begin;
wcstring item = wcstring(item_begin, item_len);
item = trim(item, (const wchar_t[]) { BRACE_SPACE });
for (auto &c : item) {
if (c == BRACE_SPACE) {
c = ' ';
}
}
wcstring whole_item;
whole_item.reserve(tot_len + item_len + 2);
whole_item.append(in, length_preceding_braces);
whole_item.append(item_begin, item_len);
whole_item.append(item.begin(), item.end());
whole_item.append(brace_end + 1);
whole_item = trim(whole_item, (const wchar_t[]) { BRACE_SPACE });
expand_braces(whole_item, flags, out, errors);
item_begin = pos + 1;

View file

@ -70,6 +70,8 @@ enum {
BRACE_END,
/// Character representing separation between two bracket elements.
BRACE_SEP,
/// Character that takes the place of any whitespace within non-quoted text in braces
BRACE_SPACE,
/// Separate subtokens in a token with this character.
INTERNAL_SEPARATOR,
/// Character representing an empty variable expansion. Only used transitively while expanding

View file

@ -46,12 +46,12 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
return output;
}
wcstring trim(const wcstring &input) {
auto begin_offset = input.find_first_not_of(whitespace);
wcstring trim(const wcstring &input, const wchar_t *any_of) {
auto begin_offset = input.find_first_not_of(any_of);
if (begin_offset == wcstring::npos) {
return wcstring{};
}
auto end = input.cbegin() + input.find_last_not_of(whitespace);
auto end = input.cbegin() + input.find_last_not_of(any_of);
wcstring result(input.begin() + begin_offset, end + 1);
return result;

View file

@ -59,6 +59,6 @@ enum class ellipsis_type {
};
wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
wcstring trim(const wcstring &input);
wcstring trim(const wcstring &input, const wchar_t *any_of);
#endif