From 4a575b26f5ace10b2ab50eecb9059337292571d8 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Mon, 29 Nov 2021 21:08:26 +0100 Subject: [PATCH] Fix error check for repeated quoted command substitution Commit e40eba358 (Treat text following quoted command substitution as quoted) made parse_util_locate_cmdsubst_range() aware of quoted command substitutions, by skipping surrounding text via quote_end(). However, it was not quite right. We fail to properly parse two consecutive command substitutions in the same string, because we don't maintain the quoting context across calls to parse_util_locate_cmdsubst_range(). Let's track that bit in a parameter. This allows us to get rid of the quote_end() hack. Also apply this to the other place where we call parse_util_locate_cmdsubst_range() in a loop (highlighting). Fixes #8500 --- src/highlight.cpp | 3 +- src/parse_util.cpp | 64 ++++++++++++++++++---------------------- src/parse_util.h | 3 +- tests/checks/cmdsub.fish | 6 ++++ 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/highlight.cpp b/src/highlight.cpp index 9f7d7f41f..67e582b81 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -878,9 +878,10 @@ void highlighter_t::color_as_argument(const ast::node_t &node, bool options_allo // Now do command substitutions. size_t cmdsub_cursor = 0, cmdsub_start = 0, cmdsub_end = 0; wcstring cmdsub_contents; + bool is_quoted = false; while (parse_util_locate_cmdsubst_range(arg_str, &cmdsub_cursor, &cmdsub_contents, &cmdsub_start, &cmdsub_end, - true /* accept incomplete */) > 0) { + true /* accept incomplete */, &is_quoted) > 0) { // The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of // the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end. assert(cmdsub_end > cmdsub_start); diff --git a/src/parse_util.cpp b/src/parse_util.cpp index be9d30843..1bb85a827 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -88,7 +88,7 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) { } static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, const wchar_t **end, - bool allow_incomplete, bool *is_quoted) { + bool allow_incomplete, bool *inout_is_quoted) { bool escaped = false; bool syntax_error = false; int paran_count = 0; @@ -97,21 +97,31 @@ static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, co const wchar_t *paran_begin = nullptr, *paran_end = nullptr; assert(in && "null parameter"); - for (const wchar_t *pos = in; *pos; pos++) { + + const wchar_t *pos = in; + auto process_opening_quote = [&](wchar_t quote) -> bool /* ok */ { + const wchar_t *q_end = quote_end(pos, quote); + if (!q_end) return false; + if (*q_end == L'$') { + quoted_cmdsubs.push_back(paran_count); + } + // We want to report whether the outermost comand substitution between + // paran_begin..paran_end is quoted. + if (paran_count == 0 && inout_is_quoted) { + *inout_is_quoted = *q_end == L'$'; + } + pos = q_end; + return true; + }; + + if (inout_is_quoted && *inout_is_quoted && *pos) { + if (!process_opening_quote(L'"')) pos += std::wcslen(pos); + } + + for (; *pos; pos++) { if (!escaped) { if (*pos == L'\'' || *pos == L'"') { - const wchar_t *q_end = quote_end(pos, *pos); - if (q_end && *q_end) { - if (*q_end == L'$') { - quoted_cmdsubs.push_back(paran_count); - // We want to report if the outermost comand substitution between - // paran_begin..paran_end is quoted. - if (paran_count == 0 && is_quoted) *is_quoted = true; - } - pos = q_end; - } else { - break; - } + if (!process_opening_quote(*pos)) break; } else { if (*pos == L'(') { if ((paran_count == 0) && (paran_begin == nullptr)) { @@ -225,12 +235,11 @@ long parse_util_slice_length(const wchar_t *in) { int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, - bool accept_incomplete, bool *out_is_quoted) { + bool accept_incomplete, bool *inout_is_quoted) { // Clear the return values. if (out_contents != nullptr) out_contents->clear(); *out_start = 0; *out_end = str.size(); - bool cmdsub_is_quoted = false; // Nothing to do if the offset is at or past the end of the string. if (*inout_cursor_offset >= str.size()) return 0; @@ -243,7 +252,7 @@ int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_o const wchar_t *bracket_range_end = nullptr; int ret = parse_util_locate_cmdsub(valid_range_start, &bracket_range_begin, &bracket_range_end, - accept_incomplete, &cmdsub_is_quoted); + accept_incomplete, inout_is_quoted); if (ret <= 0) { return ret; } @@ -264,28 +273,10 @@ int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_o // Return the start and end. *out_start = bracket_range_begin - buff; *out_end = bracket_range_end - buff; - if (out_is_quoted) *out_is_quoted = cmdsub_is_quoted; // Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though // overflow is not likely. *inout_cursor_offset = 1 + *out_end; - if (cmdsub_is_quoted && *bracket_range_end) { - // We are usually called in a loop, to process all command substitutions in this string. - // If we just located a quoted cmdsub like $(A) inside "$(A)B"(C), the B part is also - // quoted but the naïve next caller wouldn't know. Since next caller only cares about - // the next command substitution - (C) - and not about the B part, just advance the - // cursor to the closing quote. - if (auto *q_end = quote_end(bracket_range_end, L'"')) { - *inout_cursor_offset = 1 + q_end - buff; - } else { - if (accept_incomplete) { - // We want to skip quoted text, so if there is no closing quote, skip to the end. - *inout_cursor_offset = bracket_range_end + std::wcslen(bracket_range_end) - buff; - } else { - return -1; - } - } - } return ret; } @@ -950,11 +941,12 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen wcstring subst; bool do_loop = true; + bool is_quoted = false; while (do_loop) { size_t paren_begin = 0; size_t paren_end = 0; switch (parse_util_locate_cmdsubst_range(arg_src, &cursor, &subst, &paren_begin, &paren_end, - false)) { + false, &is_quoted)) { case -1: { err |= PARSER_TEST_ERROR; if (out_errors) { diff --git a/src/parse_util.h b/src/parse_util.h index dff4f06e1..1338ed8e4 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -29,10 +29,11 @@ long parse_util_slice_length(const wchar_t *in); /// \param out_end On output, the offset of the end of the command substitution (close paren), or /// the end of the string if it was incomplete /// \param accept_incomplete whether to permit missing closing parenthesis +/// \param inout_is_quoted whether the cursor is in a double-quoted context. /// \return -1 on syntax error, 0 if no subshells exist and 1 on success int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, - bool accept_incomplete, bool *out_is_quoted = nullptr); + bool accept_incomplete, bool *inout_is_quoted = nullptr); /// Find the beginning and end of the command substitution under the cursor. If no subshell is /// found, the entire string is returned. If the current command substitution is not ended, i.e. the diff --git a/tests/checks/cmdsub.fish b/tests/checks/cmdsub.fish index e6d475615..fca2b6b45 100644 --- a/tests/checks/cmdsub.fish +++ b/tests/checks/cmdsub.fish @@ -57,3 +57,9 @@ echo "$(echo 1) ( $(echo 2)" echo "$(echo A)B$(echo C)D"(echo E) # CHECK: ABCDE + +echo "($(echo A)B$(echo C))" +# CHECK: (ABC) + +echo "quoted1""quoted2"(echo unquoted3)"$(echo quoted4)_$(echo quoted5)" +# CHECK: quoted1quoted2unquoted3quoted4_quoted5