mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-26 21:03:12 +00:00
Support "$(cmd)" command substitution without line splitting
This adds a hack to the parser. Given a command echo "x$()y z" we virtually insert double quotes before and after the command substitution, so the command internally looks like echo "x"$()"y z" This hack allows to reuse the existing logic for handling (recursive) command substitutions. This makes the quoting syntax more complex; external highlighters should consider adding this if possible. The upside (more Bash compatibility) seems worth it. Closes #159
This commit is contained in:
parent
4437a0d02a
commit
ec3d3a481b
9 changed files with 201 additions and 28 deletions
|
@ -501,9 +501,7 @@ void append_format(wcstring &str, const wchar_t *format, ...) {
|
|||
va_end(va);
|
||||
}
|
||||
|
||||
wchar_t *quote_end(const wchar_t *pos) {
|
||||
wchar_t c = *pos;
|
||||
|
||||
wchar_t *quote_end(const wchar_t *pos, wchar_t quote) {
|
||||
while (true) {
|
||||
pos++;
|
||||
|
||||
|
@ -513,7 +511,10 @@ wchar_t *quote_end(const wchar_t *pos) {
|
|||
pos++;
|
||||
if (!*pos) return nullptr;
|
||||
} else {
|
||||
if (*pos == c) {
|
||||
if (*pos == quote ||
|
||||
// Command substitutions also end a double quoted string. This is how we
|
||||
// support command substitutions inside double quotes.
|
||||
(quote == L'"' && *pos == L'$' && *(pos + 1) == L'(')) {
|
||||
return const_cast<wchar_t *>(pos);
|
||||
}
|
||||
}
|
||||
|
@ -854,6 +855,22 @@ static bool unescape_string_var(const wchar_t *in, wcstring *out) {
|
|||
return true;
|
||||
}
|
||||
|
||||
wcstring escape_string_for_double_quotes(wcstring in) {
|
||||
// We need to escape backslashes, double quotes, and dollars only.
|
||||
wcstring result = std::move(in);
|
||||
size_t idx = result.size();
|
||||
while (idx--) {
|
||||
switch (result[idx]) {
|
||||
case L'\\':
|
||||
case L'$':
|
||||
case L'"':
|
||||
result.insert(idx, 1, L'\\');
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
|
||||
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
|
||||
escape_flags_t flags) {
|
||||
|
|
12
src/common.h
12
src/common.h
|
@ -441,11 +441,11 @@ std::unique_ptr<T> make_unique(Args &&...args) {
|
|||
}
|
||||
#endif
|
||||
|
||||
/// This functions returns the end of the quoted substring beginning at \c in. The type of quoting
|
||||
/// character is detemrined by examining \c in. Returns 0 on error.
|
||||
/// This functions returns the end of the quoted substring beginning at \c pos. Returns 0 on error.
|
||||
///
|
||||
/// \param in the position of the opening quote.
|
||||
wchar_t *quote_end(const wchar_t *pos);
|
||||
/// \param pos the position of the opening quote.
|
||||
/// \param quote the quote to use, usually pointed to by \c pos.
|
||||
wchar_t *quote_end(const wchar_t *pos, wchar_t quote);
|
||||
|
||||
/// This function should be called after calling `setlocale()` to perform fish specific locale
|
||||
/// initialization.
|
||||
|
@ -473,6 +473,10 @@ wcstring escape_string(const wchar_t *in, escape_flags_t flags,
|
|||
wcstring escape_string(const wcstring &in, escape_flags_t flags,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
/// Escape a string so that it may be inserted into a double-quoted string.
|
||||
/// This permits ownership transfer.
|
||||
wcstring escape_string_for_double_quotes(wcstring in);
|
||||
|
||||
/// \return a string representation suitable for debugging (not for presenting to the user). This
|
||||
/// replaces non-ASCII characters with either tokens like <BRACE_SEP> or <\xfdd7>. No other escapes
|
||||
/// are made (i.e. this is a lossy escape).
|
||||
|
|
|
@ -620,8 +620,9 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
|
|||
size_t paren_end = 0;
|
||||
wcstring subcmd;
|
||||
|
||||
bool is_quoted = false;
|
||||
switch (parse_util_locate_cmdsubst_range(input, &cursor, &subcmd, &paren_begin, &paren_end,
|
||||
false)) {
|
||||
false, &is_quoted)) {
|
||||
case -1: {
|
||||
append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, L"Mismatched parenthesis");
|
||||
return expand_result_t::make_error(STATUS_EXPAND_ERROR);
|
||||
|
@ -640,6 +641,8 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
|
|||
}
|
||||
}
|
||||
|
||||
bool have_dollar = paren_begin > 0 && input.at(paren_begin - 1) == L'$';
|
||||
|
||||
wcstring_list_t sub_res;
|
||||
int subshell_status = exec_subshell_for_expand(subcmd, *ctx.parser, ctx.job_group, sub_res);
|
||||
if (subshell_status != 0) {
|
||||
|
@ -702,12 +705,56 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
|
|||
// Recursively call ourselves to expand any remaining command substitutions. The result of this
|
||||
// recursive call using the tail of the string is inserted into the tail_expand array list
|
||||
completion_receiver_t tail_expand_recv = out->subreceiver();
|
||||
expand_cmdsubst(input.substr(tail_begin), ctx, &tail_expand_recv,
|
||||
wcstring tail = input.substr(tail_begin);
|
||||
// A command substitution inside double quotes magically closes the quoted string.
|
||||
// Reopen the quotes just after the command substitution.
|
||||
if (is_quoted) {
|
||||
tail.insert(0, L"\"");
|
||||
}
|
||||
expand_cmdsubst(std::move(tail), ctx, &tail_expand_recv,
|
||||
errors); // TODO: offset error locations
|
||||
completion_list_t tail_expand = tail_expand_recv.take();
|
||||
|
||||
// Combine the result of the current command substitution with the result of the recursive tail
|
||||
// expansion.
|
||||
|
||||
if (is_quoted) {
|
||||
// Awkwardly reconstruct the command output.
|
||||
size_t approx_size = 0;
|
||||
for (const wcstring &sub_item : sub_res) {
|
||||
approx_size += sub_item.size() + 1;
|
||||
}
|
||||
wcstring sub_res_joined;
|
||||
sub_res_joined.reserve(approx_size);
|
||||
for (size_t i = 0; i < sub_res.size(); i++) {
|
||||
sub_res_joined.append(escape_string_for_double_quotes(std::move(sub_res.at(i))));
|
||||
sub_res_joined.push_back(L'\n');
|
||||
}
|
||||
// Mimic POSIX shells by stripping all trailing newlines.
|
||||
if (!sub_res_joined.empty()) {
|
||||
size_t i;
|
||||
for (i = sub_res_joined.size(); i > 0; i--) {
|
||||
if (sub_res_joined[i - 1] != L'\n') break;
|
||||
}
|
||||
sub_res_joined.erase(i);
|
||||
}
|
||||
// Instead of performing cartesian product expansion, we directly insert the command
|
||||
// substitution output into the current expansion results.
|
||||
for (const completion_t &tail_item : tail_expand) {
|
||||
wcstring whole_item;
|
||||
whole_item.reserve(paren_begin + 1 + sub_res_joined.size() + 1 +
|
||||
tail_item.completion.size());
|
||||
whole_item.append(input, 0, paren_begin - have_dollar);
|
||||
whole_item.append(sub_res_joined);
|
||||
whole_item.append(tail_item.completion.substr(const_strlen(L"\"")));
|
||||
if (!out->add(std::move(whole_item))) {
|
||||
return append_overflow_error(errors);
|
||||
}
|
||||
}
|
||||
|
||||
return expand_result_t::ok;
|
||||
}
|
||||
|
||||
for (const wcstring &sub_item : sub_res) {
|
||||
wcstring sub_item2 = escape_string(sub_item, ESCAPE_ALL);
|
||||
for (const completion_t &tail_item : tail_expand) {
|
||||
|
|
|
@ -5103,8 +5103,7 @@ static void test_error_messages() {
|
|||
{L"echo foo\"$\"bar", ERROR_NO_VAR_NAME},
|
||||
{L"echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME},
|
||||
{L"echo foo $ bar", ERROR_NO_VAR_NAME},
|
||||
{L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1},
|
||||
{L"echo \"foo$(foo)bar\"", ERROR_BAD_VAR_SUBCOMMAND1}};
|
||||
{L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1}};
|
||||
|
||||
parse_error_list_t errors;
|
||||
for (const auto &test : error_tests) {
|
||||
|
@ -5194,6 +5193,16 @@ static void test_highlighting() {
|
|||
{L"|", highlight_role_t::statement_terminator},
|
||||
{L"cat", highlight_role_t::command},
|
||||
});
|
||||
highlight_tests.push_back({
|
||||
{L"true", highlight_role_t::command},
|
||||
{L"\"before", highlight_role_t::quote},
|
||||
{L"$(", highlight_role_t::operat},
|
||||
{L"true", highlight_role_t::command},
|
||||
{L"param1", highlight_role_t::param},
|
||||
{L")", highlight_role_t::operat},
|
||||
{L"after\"", highlight_role_t::quote},
|
||||
{L"param2", highlight_role_t::param},
|
||||
});
|
||||
|
||||
// Redirections substitutions.
|
||||
highlight_tests.push_back({
|
||||
|
|
|
@ -494,6 +494,9 @@ static size_t color_variable(const wchar_t *in, size_t in_len,
|
|||
wchar_t next = in[idx + 1];
|
||||
if (next == L'$' || valid_var_name_char(next)) {
|
||||
colors[idx] = highlight_role_t::operat;
|
||||
} else if (next == L'(') {
|
||||
colors[idx] = highlight_role_t::operat;
|
||||
return idx + 1;
|
||||
} else {
|
||||
colors[idx] = highlight_role_t::error;
|
||||
}
|
||||
|
|
|
@ -106,12 +106,13 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) {
|
|||
|
||||
static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin, wchar_t **end,
|
||||
bool allow_incomplete, wchar_t open_type,
|
||||
wchar_t close_type) {
|
||||
wchar_t close_type, bool *is_quoted = nullptr) {
|
||||
// open_type is typically ( or [, and close type is the corresponding value.
|
||||
wchar_t *pos;
|
||||
bool escaped = false;
|
||||
bool syntax_error = false;
|
||||
int paran_count = 0;
|
||||
std::vector<int> quoted_cmdsubs;
|
||||
|
||||
wchar_t *paran_begin = nullptr, *paran_end = nullptr;
|
||||
|
||||
|
@ -120,8 +121,14 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin
|
|||
for (pos = const_cast<wchar_t *>(in); *pos; pos++) {
|
||||
if (!escaped) {
|
||||
if (std::wcschr(L"'\"", *pos)) {
|
||||
wchar_t *q_end = quote_end(pos);
|
||||
wchar_t *q_end = quote_end(pos, *pos);
|
||||
if (q_end && *q_end) {
|
||||
if (open_type == L'(' && *q_end == L'$') {
|
||||
quoted_cmdsubs.push_back(paran_count);
|
||||
// We want to report if the outermost comand substitution between
|
||||
// paran_begin..paran_end is quoted.
|
||||
if (paran_count == 0 && is_quoted) *is_quoted = true;
|
||||
}
|
||||
pos = q_end;
|
||||
} else {
|
||||
break;
|
||||
|
@ -145,6 +152,25 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin
|
|||
syntax_error = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if the ) did complete a quoted command substituion.
|
||||
if (open_type == L'(' && !quoted_cmdsubs.empty() &&
|
||||
quoted_cmdsubs.back() == paran_count) {
|
||||
quoted_cmdsubs.pop_back();
|
||||
// Quoted command substitutions temporarily close double quotes.
|
||||
// In "foo$(bar)baz$(qux)"
|
||||
// We are here ^
|
||||
// After the ) in a quoted command substitution, we need to act as if
|
||||
// there was an invisible double quote.
|
||||
wchar_t *q_end = quote_end(pos, L'"');
|
||||
if (q_end && *q_end) { // Found a valid closing quote.
|
||||
// Stop at $(qux), which is another quoted command substitution.
|
||||
if (*q_end == L'$') quoted_cmdsubs.push_back(paran_count);
|
||||
pos = q_end;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -185,7 +211,8 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end,
|
|||
static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_cursor_offset,
|
||||
wcstring *out_contents, size_t *out_start,
|
||||
size_t *out_end, bool accept_incomplete,
|
||||
wchar_t open_type, wchar_t close_type) {
|
||||
wchar_t open_type, wchar_t close_type,
|
||||
bool *out_is_quoted) {
|
||||
// Clear the return values.
|
||||
if (out_contents != nullptr) out_contents->clear();
|
||||
*out_start = 0;
|
||||
|
@ -201,7 +228,7 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c
|
|||
wchar_t *bracket_range_begin = nullptr, *bracket_range_end = nullptr;
|
||||
int ret = parse_util_locate_brackets_of_type(valid_range_start, &bracket_range_begin,
|
||||
&bracket_range_end, accept_incomplete, open_type,
|
||||
close_type);
|
||||
close_type, out_is_quoted);
|
||||
if (ret <= 0) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -231,9 +258,9 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c
|
|||
|
||||
int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset,
|
||||
wcstring *out_contents, size_t *out_start, size_t *out_end,
|
||||
bool accept_incomplete) {
|
||||
bool accept_incomplete, bool *out_is_quoted) {
|
||||
return parse_util_locate_brackets_range(str, inout_cursor_offset, out_contents, out_start,
|
||||
out_end, accept_incomplete, L'(', L')');
|
||||
out_end, accept_incomplete, L'(', L')', out_is_quoted);
|
||||
}
|
||||
|
||||
void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a,
|
||||
|
@ -465,7 +492,7 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) {
|
|||
i++;
|
||||
} else {
|
||||
if (cmd[i] == L'\'' || cmd[i] == L'\"') {
|
||||
const wchar_t *end = quote_end(&cmd[i]);
|
||||
const wchar_t *end = quote_end(&cmd[i], cmd[i]);
|
||||
// std::fwprintf( stderr, L"Jump %d\n", end-cmd );
|
||||
if ((end == nullptr) || (!*end) || (end > cmd + len)) {
|
||||
res = cmd[i];
|
||||
|
@ -1046,7 +1073,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
|
|||
|
||||
wchar_t next_char = idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0';
|
||||
if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE &&
|
||||
!valid_var_name_char(next_char)) {
|
||||
next_char != '(' && !valid_var_name_char(next_char)) {
|
||||
err = 1;
|
||||
if (out_errors) {
|
||||
// We have something like $$$^.... Back up until we reach the first $.
|
||||
|
|
|
@ -31,7 +31,7 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end,
|
|||
/// \return -1 on syntax error, 0 if no subshells exist and 1 on success
|
||||
int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset,
|
||||
wcstring *out_contents, size_t *out_start, size_t *out_end,
|
||||
bool accept_incomplete);
|
||||
bool accept_incomplete, bool *out_is_quoted = nullptr);
|
||||
|
||||
/// Find the beginning and end of the command substitution under the cursor. If no subshell is
|
||||
/// found, the entire string is returned. If the current command substitution is not ended, i.e. the
|
||||
|
|
|
@ -145,10 +145,24 @@ tok_t tokenizer_t::read_string() {
|
|||
std::vector<int> paran_offsets;
|
||||
std::vector<int> brace_offsets;
|
||||
std::vector<char> expecting;
|
||||
std::vector<size_t> quoted_cmdsubs;
|
||||
int slice_offset = 0;
|
||||
const wchar_t *const buff_start = this->token_cursor;
|
||||
bool is_first = true;
|
||||
|
||||
auto process_opening_quote = [&](wchar_t quote) -> const wchar_t * {
|
||||
const wchar_t *end = quote_end(this->token_cursor, quote);
|
||||
if (end) {
|
||||
if (*end == L'$') quoted_cmdsubs.push_back(paran_offsets.size());
|
||||
this->token_cursor = end;
|
||||
return nullptr;
|
||||
} else {
|
||||
const wchar_t *error_loc = this->token_cursor;
|
||||
this->token_cursor += std::wcslen(this->token_cursor);
|
||||
return error_loc;
|
||||
}
|
||||
};
|
||||
|
||||
while (true) {
|
||||
wchar_t c = *this->token_cursor;
|
||||
#if false
|
||||
|
@ -195,6 +209,19 @@ tok_t tokenizer_t::read_string() {
|
|||
mode &= ~(tok_modes::subshell);
|
||||
}
|
||||
expecting.pop_back();
|
||||
// Check if the ) did complete a quoted command substituion.
|
||||
if (!quoted_cmdsubs.empty() && quoted_cmdsubs.back() == paran_offsets.size()) {
|
||||
quoted_cmdsubs.pop_back();
|
||||
// Quoted command substitutions temporarily close double quotes, after ),
|
||||
// we need to act as if there was an invisible double quote.
|
||||
if (const wchar_t *error_loc = process_opening_quote(L'"')) {
|
||||
if (!this->accept_unfinished) {
|
||||
return this->call_error(tokenizer_error_t::unterminated_quote, buff_start,
|
||||
error_loc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (c == L'}') {
|
||||
if (!expecting.empty() && expecting.back() == L')') {
|
||||
return this->call_error(tokenizer_error_t::expected_pclose_found_bclose,
|
||||
|
@ -225,13 +252,8 @@ tok_t tokenizer_t::read_string() {
|
|||
else if (c == L']' && ((mode & tok_modes::array_brackets) == tok_modes::array_brackets)) {
|
||||
mode &= ~(tok_modes::array_brackets);
|
||||
} else if (c == L'\'' || c == L'"') {
|
||||
const wchar_t *end = quote_end(this->token_cursor);
|
||||
if (end) {
|
||||
this->token_cursor = end;
|
||||
} else {
|
||||
const wchar_t *error_loc = this->token_cursor;
|
||||
this->token_cursor += std::wcslen(this->token_cursor);
|
||||
if ((!this->accept_unfinished)) {
|
||||
if (const wchar_t *error_loc = process_opening_quote(c)) {
|
||||
if (!this->accept_unfinished) {
|
||||
return this->call_error(tokenizer_error_t::unterminated_quote, buff_start,
|
||||
error_loc);
|
||||
}
|
||||
|
|
44
tests/checks/cmdsub.fish
Normal file
44
tests/checks/cmdsub.fish
Normal file
|
@ -0,0 +1,44 @@
|
|||
#RUN: %fish %s
|
||||
|
||||
# Command substitution inside double quotes strips trailing newline.
|
||||
echo "a$(echo b)c"
|
||||
# CHECK: abc
|
||||
|
||||
# Nesting
|
||||
echo "$(echo "$(echo a)")"
|
||||
# CHECK: a
|
||||
echo "$(echo $(echo b))"
|
||||
# CHECK: b
|
||||
|
||||
echo "$(echo multiple).$(echo command).$(echo substitutions)"
|
||||
# CHECK: multiple.command.substitutions
|
||||
|
||||
test -n "$()" || echo "empty list is interpolated to empty string"
|
||||
# CHECK: empty list is interpolated to empty string
|
||||
|
||||
# Variables in command substitution output are not expanded.
|
||||
echo "$(echo \~ \$HOME)"
|
||||
# CHECK: ~ $HOME
|
||||
|
||||
echo "$(printf %s 'quoted command substitution multiline output
|
||||
line 2
|
||||
line 3
|
||||
')"
|
||||
# CHECK: quoted command substitution multiline output
|
||||
# CHECK: line 2
|
||||
# CHECK: line 3
|
||||
|
||||
echo trim any newlines "$(echo \n\n\n)" after cmdsub
|
||||
#CHECK: trim any newlines after cmdsub
|
||||
|
||||
echo i{1, (echo 2), "$(echo 3)"}
|
||||
# CHECK: i1 i2 i3
|
||||
|
||||
echo "$(echo index\nrange\nexpansion)[2]"
|
||||
#CHECK: range
|
||||
|
||||
echo "$(echo '"')"
|
||||
#CHECK: "
|
||||
|
||||
echo "$(echo $(echo 1) ())"
|
||||
#CHECK: 1
|
Loading…
Reference in a new issue