2016-05-02 23:53:10 +00:00
|
|
|
|
// Various mostly unrelated utility functions related to parsing, loading and evaluating fish code.
|
|
|
|
|
//
|
|
|
|
|
// This library can be seen as a 'toolbox' for functions that are used in many places in fish and
|
|
|
|
|
// that are somehow related to parsing the code.
|
2016-05-18 22:30:21 +00:00
|
|
|
|
#include "config.h" // IWYU pragma: keep
|
|
|
|
|
|
2019-10-13 22:50:48 +00:00
|
|
|
|
#include "parse_util.h"
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include <stdarg.h>
|
2017-02-11 02:47:02 +00:00
|
|
|
|
|
2022-08-21 06:14:48 +00:00
|
|
|
|
#include <algorithm>
|
2019-10-13 22:50:48 +00:00
|
|
|
|
#include <cwchar>
|
2016-04-21 06:00:54 +00:00
|
|
|
|
#include <memory>
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include <string>
|
2022-08-21 06:14:48 +00:00
|
|
|
|
#include <utility>
|
2006-01-30 16:51:50 +00:00
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
#include "ast.h"
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include "builtin.h"
|
2006-01-30 16:51:50 +00:00
|
|
|
|
#include "common.h"
|
2006-02-08 09:20:05 +00:00
|
|
|
|
#include "expand.h"
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include "fallback.h" // IWYU pragma: keep
|
2018-05-06 02:11:57 +00:00
|
|
|
|
#include "future_feature_flags.h"
|
2022-08-21 06:14:48 +00:00
|
|
|
|
#include "operation_context.h"
|
2016-04-21 06:00:54 +00:00
|
|
|
|
#include "parse_constants.h"
|
2022-08-21 06:14:48 +00:00
|
|
|
|
#include "parse_tree.h"
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include "tokenizer.h"
|
2019-09-19 17:32:07 +00:00
|
|
|
|
#include "wcstringutil.h"
|
2016-05-02 23:53:10 +00:00
|
|
|
|
#include "wildcard.h"
|
|
|
|
|
#include "wutil.h" // IWYU pragma: keep
|
2006-02-08 09:20:05 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Error message for use of backgrounded commands before and/or.
|
|
|
|
|
#define BOOL_AFTER_BACKGROUND_ERROR_MSG \
|
|
|
|
|
_(L"The '%ls' command can not be used immediately after a backgrounded job")
|
2014-11-02 21:11:27 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Error message for backgrounded commands as conditionals.
|
|
|
|
|
#define BACKGROUND_IN_CONDITIONAL_ERROR_MSG \
|
|
|
|
|
_(L"Backgrounded commands can not be used as conditionals")
|
2014-11-02 21:11:27 +00:00
|
|
|
|
|
2018-01-22 21:31:39 +00:00
|
|
|
|
/// Error message for arguments to 'end'
|
|
|
|
|
#define END_ARG_ERR_MSG _(L"'end' does not take arguments. Did you forget a ';'?")
|
|
|
|
|
|
2022-03-31 23:14:59 +00:00
|
|
|
|
/// Error message when 'time' is in a pipeline.
|
|
|
|
|
#define TIME_IN_PIPELINE_ERR_MSG _(L"The 'time' command may only be at the beginning of a pipeline")
|
|
|
|
|
|
2019-09-19 17:32:07 +00:00
|
|
|
|
/// Maximum length of a variable name to show in error reports before truncation
|
2022-09-19 22:42:11 +00:00
|
|
|
|
static constexpr int var_err_len = 16;
|
2019-09-19 17:32:07 +00:00
|
|
|
|
|
2021-07-27 15:59:52 +00:00
|
|
|
|
int parse_util_lineno(const wcstring &str, size_t offset) {
|
|
|
|
|
// Return the line number of position offset, starting with 1.
|
2021-07-27 18:11:32 +00:00
|
|
|
|
if (str.empty()) {
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2021-07-27 15:59:52 +00:00
|
|
|
|
auto end = offset > str.length() ? str.end() : str.begin() + offset;
|
|
|
|
|
return std::count(str.begin(), end, L'\n') + 1;
|
2006-02-05 13:10:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
int parse_util_get_line_from_offset(const wcstring &str, size_t pos) {
|
2021-07-27 15:59:52 +00:00
|
|
|
|
// Return the line pos is on, or -1 if it's after the end.
|
2021-08-01 15:41:56 +00:00
|
|
|
|
if (pos > str.length()) return -1;
|
|
|
|
|
return std::count(str.begin(), str.begin() + pos, L'\n');
|
2007-09-21 14:05:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
size_t parse_util_get_offset_from_line(const wcstring &str, int line) {
|
2021-07-27 15:59:52 +00:00
|
|
|
|
// Return the first position on line X, counting from 0.
|
2019-11-19 01:08:16 +00:00
|
|
|
|
if (line < 0) return static_cast<size_t>(-1);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (line == 0) return 0;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
|
2021-08-01 15:41:56 +00:00
|
|
|
|
size_t pos = -1;
|
|
|
|
|
int count = 0;
|
|
|
|
|
while ((pos = str.find(L'\n', pos + 1)) != wcstring::npos) {
|
|
|
|
|
count++;
|
|
|
|
|
if (count == line) return pos + 1;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
2021-07-27 15:59:52 +00:00
|
|
|
|
return static_cast<size_t>(-1);
|
2007-09-21 14:05:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) {
|
2018-10-20 20:25:55 +00:00
|
|
|
|
size_t off = parse_util_get_offset_from_line(str, line);
|
|
|
|
|
size_t off2 = parse_util_get_offset_from_line(str, line + 1);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-11-19 01:08:16 +00:00
|
|
|
|
if (off == static_cast<size_t>(-1)) return static_cast<size_t>(-1);
|
|
|
|
|
if (off2 == static_cast<size_t>(-1)) off2 = str.length() + 1;
|
2016-10-21 01:53:31 +00:00
|
|
|
|
if (line_offset < 0) line_offset = 0; //!OCLINT(parameter reassignment)
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-11-19 01:08:16 +00:00
|
|
|
|
if (static_cast<size_t>(line_offset) >= off2 - off - 1) {
|
2016-10-21 01:53:31 +00:00
|
|
|
|
line_offset = off2 - off - 1; //!OCLINT(parameter reassignment)
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-10-09 21:36:08 +00:00
|
|
|
|
return off + line_offset;
|
2007-09-21 14:05:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-26 20:15:10 +00:00
|
|
|
|
static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, const wchar_t **end,
|
2022-04-03 13:14:12 +00:00
|
|
|
|
bool allow_incomplete, bool *inout_is_quoted,
|
|
|
|
|
bool *out_has_dollar) {
|
2021-02-05 21:00:31 +00:00
|
|
|
|
bool escaped = false;
|
2022-02-04 20:44:45 +00:00
|
|
|
|
bool is_token_begin = true;
|
2020-09-24 15:21:49 +00:00
|
|
|
|
bool syntax_error = false;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
int paran_count = 0;
|
2021-07-02 21:11:03 +00:00
|
|
|
|
std::vector<int> quoted_cmdsubs;
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2021-06-26 04:16:03 +00:00
|
|
|
|
const wchar_t *paran_begin = nullptr, *paran_end = nullptr;
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2019-05-28 00:24:19 +00:00
|
|
|
|
assert(in && "null parameter");
|
2021-11-29 20:08:26 +00:00
|
|
|
|
|
|
|
|
|
const wchar_t *pos = in;
|
2022-04-03 13:14:12 +00:00
|
|
|
|
const wchar_t *last_dollar = nullptr;
|
2021-11-29 20:08:26 +00:00
|
|
|
|
auto process_opening_quote = [&](wchar_t quote) -> bool /* ok */ {
|
|
|
|
|
const wchar_t *q_end = quote_end(pos, quote);
|
|
|
|
|
if (!q_end) return false;
|
|
|
|
|
if (*q_end == L'$') {
|
2022-04-03 13:14:12 +00:00
|
|
|
|
last_dollar = q_end;
|
2021-11-29 20:08:26 +00:00
|
|
|
|
quoted_cmdsubs.push_back(paran_count);
|
|
|
|
|
}
|
|
|
|
|
// We want to report whether the outermost comand substitution between
|
|
|
|
|
// paran_begin..paran_end is quoted.
|
|
|
|
|
if (paran_count == 0 && inout_is_quoted) {
|
|
|
|
|
*inout_is_quoted = *q_end == L'$';
|
|
|
|
|
}
|
|
|
|
|
pos = q_end;
|
|
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (inout_is_quoted && *inout_is_quoted && *pos) {
|
|
|
|
|
if (!process_opening_quote(L'"')) pos += std::wcslen(pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; *pos; pos++) {
|
2021-02-05 21:00:31 +00:00
|
|
|
|
if (!escaped) {
|
2021-06-26 04:16:03 +00:00
|
|
|
|
if (*pos == L'\'' || *pos == L'"') {
|
2021-11-29 20:08:26 +00:00
|
|
|
|
if (!process_opening_quote(*pos)) break;
|
2022-02-04 20:44:45 +00:00
|
|
|
|
} else if (*pos == L'\\') {
|
|
|
|
|
escaped = true;
|
|
|
|
|
} else if (*pos == L'#' && is_token_begin) {
|
|
|
|
|
pos = comment_end(pos) - 1;
|
2022-04-03 13:14:12 +00:00
|
|
|
|
} else if (*pos == L'$') {
|
|
|
|
|
last_dollar = pos;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
2021-06-26 20:15:10 +00:00
|
|
|
|
if (*pos == L'(') {
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if ((paran_count == 0) && (paran_begin == nullptr)) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
paran_begin = pos;
|
2022-04-03 13:14:12 +00:00
|
|
|
|
if (out_has_dollar) {
|
|
|
|
|
*out_has_dollar = last_dollar == pos - 1;
|
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
|
paran_count++;
|
2021-06-26 20:15:10 +00:00
|
|
|
|
} else if (*pos == L')') {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
paran_count--;
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if ((paran_count == 0) && (paran_end == nullptr)) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
paran_end = pos;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (paran_count < 0) {
|
2020-09-24 15:21:49 +00:00
|
|
|
|
syntax_error = true;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2021-07-02 21:11:03 +00:00
|
|
|
|
|
2022-04-03 13:14:12 +00:00
|
|
|
|
// Check if the ) did complete a quoted command substitution.
|
2021-06-26 20:15:10 +00:00
|
|
|
|
if (!quoted_cmdsubs.empty() && quoted_cmdsubs.back() == paran_count) {
|
2021-07-02 21:11:03 +00:00
|
|
|
|
quoted_cmdsubs.pop_back();
|
|
|
|
|
// Quoted command substitutions temporarily close double quotes.
|
|
|
|
|
// In "foo$(bar)baz$(qux)"
|
|
|
|
|
// We are here ^
|
|
|
|
|
// After the ) in a quoted command substitution, we need to act as if
|
|
|
|
|
// there was an invisible double quote.
|
2021-06-26 04:16:03 +00:00
|
|
|
|
const wchar_t *q_end = quote_end(pos, L'"');
|
2021-07-02 21:11:03 +00:00
|
|
|
|
if (q_end && *q_end) { // Found a valid closing quote.
|
|
|
|
|
// Stop at $(qux), which is another quoted command substitution.
|
|
|
|
|
if (*q_end == L'$') quoted_cmdsubs.push_back(paran_count);
|
|
|
|
|
pos = q_end;
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-04-16 17:45:38 +00:00
|
|
|
|
is_token_begin = is_token_delimiter(pos[0], pos[1]);
|
2021-02-05 21:00:31 +00:00
|
|
|
|
} else {
|
|
|
|
|
escaped = false;
|
2022-02-04 20:44:45 +00:00
|
|
|
|
is_token_begin = false;
|
2021-02-05 21:00:31 +00:00
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
|
syntax_error |= (paran_count < 0);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
syntax_error |= ((paran_count > 0) && (!allow_incomplete));
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (syntax_error) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return -1;
|
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if (paran_begin == nullptr) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (begin) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
*begin = paran_begin;
|
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (end) {
|
2021-06-26 04:16:03 +00:00
|
|
|
|
*end = paran_count ? in + std::wcslen(in) : paran_end;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2014-03-31 17:01:39 +00:00
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return 1;
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-26 20:02:19 +00:00
|
|
|
|
long parse_util_slice_length(const wchar_t *in) {
|
|
|
|
|
assert(in && "null parameter");
|
|
|
|
|
const wchar_t openc = L'[';
|
|
|
|
|
const wchar_t closec = L']';
|
|
|
|
|
bool escaped = false;
|
|
|
|
|
|
|
|
|
|
// Check for initial opening [
|
|
|
|
|
if (*in != openc) return 0;
|
|
|
|
|
int bracket_count = 1;
|
|
|
|
|
|
|
|
|
|
assert(in && "null parameter");
|
|
|
|
|
for (const wchar_t *pos = in + 1; *pos; pos++) {
|
|
|
|
|
if (!escaped) {
|
|
|
|
|
if (*pos == L'\'' || *pos == L'"') {
|
|
|
|
|
const wchar_t *q_end = quote_end(pos, *pos);
|
|
|
|
|
if (q_end && *q_end) {
|
|
|
|
|
pos = q_end;
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (*pos == openc) {
|
|
|
|
|
bracket_count++;
|
|
|
|
|
} else if (*pos == closec) {
|
|
|
|
|
bracket_count--;
|
|
|
|
|
if (bracket_count == 0) {
|
|
|
|
|
// pos points at the closing ], so add 1.
|
|
|
|
|
return pos - in + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (*pos == '\\') {
|
|
|
|
|
escaped = !escaped;
|
|
|
|
|
} else {
|
|
|
|
|
escaped = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(bracket_count > 0 && "Should have unclosed brackets");
|
|
|
|
|
return -1;
|
2014-02-03 22:13:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-26 20:15:10 +00:00
|
|
|
|
int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset,
|
|
|
|
|
wcstring *out_contents, size_t *out_start, size_t *out_end,
|
2022-04-03 13:14:12 +00:00
|
|
|
|
bool accept_incomplete, bool *inout_is_quoted,
|
|
|
|
|
bool *out_has_dollar) {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Clear the return values.
|
2019-04-11 21:28:27 +00:00
|
|
|
|
if (out_contents != nullptr) out_contents->clear();
|
2013-10-09 01:41:35 +00:00
|
|
|
|
*out_start = 0;
|
|
|
|
|
*out_end = str.size();
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Nothing to do if the offset is at or past the end of the string.
|
|
|
|
|
if (*inout_cursor_offset >= str.size()) return 0;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Defer to the wonky version.
|
|
|
|
|
const wchar_t *const buff = str.c_str();
|
|
|
|
|
const wchar_t *const valid_range_start = buff + *inout_cursor_offset,
|
|
|
|
|
*valid_range_end = buff + str.size();
|
2021-06-26 04:16:03 +00:00
|
|
|
|
const wchar_t *bracket_range_begin = nullptr;
|
|
|
|
|
const wchar_t *bracket_range_end = nullptr;
|
2021-06-26 20:15:10 +00:00
|
|
|
|
|
|
|
|
|
int ret = parse_util_locate_cmdsub(valid_range_start, &bracket_range_begin, &bracket_range_end,
|
2022-04-03 13:14:12 +00:00
|
|
|
|
accept_incomplete, inout_is_quoted, out_has_dollar);
|
2016-10-31 04:05:27 +00:00
|
|
|
|
if (ret <= 0) {
|
|
|
|
|
return ret;
|
2013-10-09 01:41:35 +00:00
|
|
|
|
}
|
2016-10-31 04:05:27 +00:00
|
|
|
|
|
|
|
|
|
// The command substitutions must not be NULL and must be in the valid pointer range, and
|
|
|
|
|
// the end must be bigger than the beginning.
|
2019-11-19 02:34:50 +00:00
|
|
|
|
assert(bracket_range_begin != nullptr && bracket_range_begin >= valid_range_start &&
|
2016-12-04 04:12:53 +00:00
|
|
|
|
bracket_range_begin <= valid_range_end);
|
2019-11-19 02:34:50 +00:00
|
|
|
|
assert(bracket_range_end != nullptr && bracket_range_end > bracket_range_begin &&
|
2016-12-04 04:12:53 +00:00
|
|
|
|
bracket_range_end >= valid_range_start && bracket_range_end <= valid_range_end);
|
2016-10-31 04:05:27 +00:00
|
|
|
|
|
|
|
|
|
// Assign the substring to the out_contents.
|
|
|
|
|
const wchar_t *interior_begin = bracket_range_begin + 1;
|
2019-04-11 21:28:27 +00:00
|
|
|
|
if (out_contents != nullptr) {
|
|
|
|
|
out_contents->assign(interior_begin, bracket_range_end - interior_begin);
|
|
|
|
|
}
|
2016-10-31 04:05:27 +00:00
|
|
|
|
|
|
|
|
|
// Return the start and end.
|
|
|
|
|
*out_start = bracket_range_begin - buff;
|
|
|
|
|
*out_end = bracket_range_end - buff;
|
|
|
|
|
|
|
|
|
|
// Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though
|
2021-10-30 15:32:48 +00:00
|
|
|
|
// overflow is not likely.
|
|
|
|
|
*inout_cursor_offset = 1 + *out_end;
|
|
|
|
|
|
2013-10-09 01:41:35 +00:00
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a,
|
|
|
|
|
const wchar_t **b) {
|
2019-05-28 00:24:19 +00:00
|
|
|
|
assert(buff && "Null buffer");
|
2016-05-02 23:53:10 +00:00
|
|
|
|
const wchar_t *const cursor = buff + cursor_pos;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-03-12 21:06:01 +00:00
|
|
|
|
const size_t bufflen = std::wcslen(buff);
|
2013-07-17 08:35:30 +00:00
|
|
|
|
assert(cursor_pos <= bufflen);
|
2013-07-23 01:26:15 +00:00
|
|
|
|
|
2019-11-25 11:03:25 +00:00
|
|
|
|
// ap and bp are the beginning and end of the tightest command substitution found so far.
|
2013-07-17 08:35:30 +00:00
|
|
|
|
const wchar_t *ap = buff, *bp = buff + bufflen;
|
|
|
|
|
const wchar_t *pos = buff;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
for (;;) {
|
2021-06-26 04:16:03 +00:00
|
|
|
|
const wchar_t *begin = nullptr, *end = nullptr;
|
2022-04-03 13:14:12 +00:00
|
|
|
|
if (parse_util_locate_cmdsub(pos, &begin, &end, true, nullptr, nullptr) <= 0) {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// No subshell found, all done.
|
2012-11-19 00:30:30 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Interpret NULL to mean the end.
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if (end == nullptr) {
|
2013-07-17 08:35:30 +00:00
|
|
|
|
end = const_cast<wchar_t *>(buff) + bufflen;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2013-07-23 01:26:15 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (begin < cursor && end >= cursor) {
|
|
|
|
|
// This command substitution surrounds the cursor, so it's a tighter fit.
|
2012-11-19 00:30:30 +00:00
|
|
|
|
begin++;
|
2013-07-17 08:35:30 +00:00
|
|
|
|
ap = begin;
|
|
|
|
|
bp = end;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// pos is where to begin looking for the next one. But if we reached the end there's no
|
|
|
|
|
// next one.
|
|
|
|
|
if (begin >= end) break;
|
2013-07-17 08:35:30 +00:00
|
|
|
|
pos = begin + 1;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else if (begin >= cursor) {
|
|
|
|
|
// This command substitution starts at or after the cursor. Since it was the first
|
|
|
|
|
// command substitution in the string, we're done.
|
2012-11-19 00:30:30 +00:00
|
|
|
|
break;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
|
|
|
|
// This command substitution ends before the cursor. Skip it.
|
2013-07-17 08:35:30 +00:00
|
|
|
|
assert(end < cursor);
|
|
|
|
|
pos = end + 1;
|
|
|
|
|
assert(pos <= buff + bufflen);
|
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2013-07-23 01:26:15 +00:00
|
|
|
|
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if (a != nullptr) *a = ap;
|
|
|
|
|
if (b != nullptr) *b = bp;
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Get the beginning and end of the job or process definition under the cursor.
|
2019-10-29 12:32:26 +00:00
|
|
|
|
static void job_or_process_extent(bool process, const wchar_t *buff, size_t cursor_pos,
|
|
|
|
|
const wchar_t **a, const wchar_t **b,
|
|
|
|
|
std::vector<tok_t> *tokens) {
|
2019-05-28 00:24:19 +00:00
|
|
|
|
assert(buff && "Null buffer");
|
2019-10-18 22:24:28 +00:00
|
|
|
|
const wchar_t *begin = nullptr, *end = nullptr;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
int finished = 0;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-10-18 22:24:28 +00:00
|
|
|
|
if (a) *a = nullptr;
|
|
|
|
|
if (b) *b = nullptr;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
parse_util_cmdsubst_extent(buff, cursor_pos, &begin, &end);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (!end || !begin) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2020-04-08 23:56:59 +00:00
|
|
|
|
assert(cursor_pos >= static_cast<size_t>(begin - buff));
|
2015-07-26 07:12:36 +00:00
|
|
|
|
const size_t pos = cursor_pos - (begin - buff);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (a) *a = begin;
|
|
|
|
|
if (b) *b = end;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-10-18 22:24:28 +00:00
|
|
|
|
const wcstring buffcpy(begin, end);
|
|
|
|
|
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
|
|
|
|
|
maybe_t<tok_t> token{};
|
|
|
|
|
while ((token = tok.next()) && !finished) {
|
|
|
|
|
size_t tok_begin = token->offset;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
|
2019-10-18 22:24:28 +00:00
|
|
|
|
switch (token->type) {
|
|
|
|
|
case token_type_t::pipe: {
|
|
|
|
|
if (!process) {
|
2012-11-19 08:31:03 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2019-10-18 22:24:28 +00:00
|
|
|
|
}
|
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
case token_type_t::end:
|
|
|
|
|
case token_type_t::background:
|
|
|
|
|
case token_type_t::andand:
|
2019-10-29 12:32:26 +00:00
|
|
|
|
case token_type_t::oror:
|
|
|
|
|
case token_type_t::comment: {
|
2019-10-18 22:24:28 +00:00
|
|
|
|
if (tok_begin >= pos) {
|
|
|
|
|
finished = 1;
|
2019-11-19 01:08:16 +00:00
|
|
|
|
if (b) *b = const_cast<wchar_t *>(begin) + tok_begin;
|
2019-10-18 22:24:28 +00:00
|
|
|
|
} else {
|
2019-10-29 12:32:26 +00:00
|
|
|
|
// Statement at cursor might start after this token.
|
2019-11-19 01:08:16 +00:00
|
|
|
|
if (a) *a = const_cast<wchar_t *>(begin) + tok_begin + token->length;
|
2019-10-29 12:32:26 +00:00
|
|
|
|
if (tokens) tokens->clear();
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2019-10-29 12:32:26 +00:00
|
|
|
|
continue; // Do not add this to tokens
|
2019-10-18 22:24:28 +00:00
|
|
|
|
}
|
|
|
|
|
default: {
|
|
|
|
|
break;
|
2019-05-05 10:09:25 +00:00
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
2019-10-29 12:32:26 +00:00
|
|
|
|
if (tokens) tokens->push_back(*token);
|
2019-10-18 22:24:28 +00:00
|
|
|
|
}
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
void parse_util_process_extent(const wchar_t *buff, size_t pos, const wchar_t **a,
|
2019-10-29 12:32:26 +00:00
|
|
|
|
const wchar_t **b, std::vector<tok_t> *tokens) {
|
|
|
|
|
job_or_process_extent(true, buff, pos, a, b, tokens);
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
void parse_util_job_extent(const wchar_t *buff, size_t pos, const wchar_t **a, const wchar_t **b) {
|
2019-10-29 12:32:26 +00:00
|
|
|
|
job_or_process_extent(false, buff, pos, a, b, nullptr);
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **tok_begin,
|
|
|
|
|
const wchar_t **tok_end, const wchar_t **prev_begin,
|
|
|
|
|
const wchar_t **prev_end) {
|
2019-05-28 00:24:19 +00:00
|
|
|
|
assert(buff && "Null buffer");
|
2019-11-19 02:34:50 +00:00
|
|
|
|
const wchar_t *a = nullptr, *b = nullptr, *pa = nullptr, *pb = nullptr;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2013-09-21 23:38:57 +00:00
|
|
|
|
const wchar_t *cmdsubst_begin, *cmdsubst_end;
|
|
|
|
|
parse_util_cmdsubst_extent(buff, cursor_pos, &cmdsubst_begin, &cmdsubst_end);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (!cmdsubst_end || !cmdsubst_begin) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// pos is equivalent to cursor_pos within the range of the command substitution {begin, end}.
|
2016-10-09 21:36:08 +00:00
|
|
|
|
size_t offset_within_cmdsubst = cursor_pos - (cmdsubst_begin - buff);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-03-12 21:06:01 +00:00
|
|
|
|
size_t bufflen = std::wcslen(buff);
|
2018-10-20 20:25:55 +00:00
|
|
|
|
|
2013-09-21 23:38:57 +00:00
|
|
|
|
a = cmdsubst_begin + offset_within_cmdsubst;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
b = a;
|
2013-09-21 23:38:57 +00:00
|
|
|
|
pa = cmdsubst_begin + offset_within_cmdsubst;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
pb = pa;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2013-09-21 23:38:57 +00:00
|
|
|
|
assert(cmdsubst_begin >= buff);
|
2018-10-20 20:25:55 +00:00
|
|
|
|
assert(cmdsubst_begin <= (buff + bufflen));
|
2013-09-21 23:38:57 +00:00
|
|
|
|
assert(cmdsubst_end >= cmdsubst_begin);
|
2018-10-20 20:25:55 +00:00
|
|
|
|
assert(cmdsubst_end <= (buff + bufflen));
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2018-02-24 01:28:12 +00:00
|
|
|
|
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
|
2019-10-13 23:06:16 +00:00
|
|
|
|
while (maybe_t<tok_t> token = tok.next()) {
|
|
|
|
|
size_t tok_begin = token->offset;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
size_t tok_end = tok_begin;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Calculate end of token.
|
2019-10-13 23:06:16 +00:00
|
|
|
|
if (token->type == token_type_t::string) {
|
|
|
|
|
tok_end += token->length;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Cursor was before beginning of this token, means that the cursor is between two tokens,
|
|
|
|
|
// so we set it to a zero element string and break.
|
|
|
|
|
if (tok_begin > offset_within_cmdsubst) {
|
2013-09-21 23:38:57 +00:00
|
|
|
|
a = b = cmdsubst_begin + offset_within_cmdsubst;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// If cursor is inside the token, this is the token we are looking for. If so, set a and b
|
|
|
|
|
// and break.
|
2019-10-13 23:06:16 +00:00
|
|
|
|
if (token->type == token_type_t::string && tok_end >= offset_within_cmdsubst) {
|
|
|
|
|
a = cmdsubst_begin + token->offset;
|
|
|
|
|
b = a + token->length;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Remember previous string token.
|
2019-10-13 23:06:16 +00:00
|
|
|
|
if (token->type == token_type_t::string) {
|
|
|
|
|
pa = cmdsubst_begin + token->offset;
|
|
|
|
|
pb = pa + token->length;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (tok_begin) *tok_begin = a;
|
|
|
|
|
if (tok_end) *tok_end = b;
|
|
|
|
|
if (prev_begin) *prev_begin = pa;
|
|
|
|
|
if (prev_end) *prev_end = pb;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
|
assert(pa >= buff);
|
2018-10-20 20:25:55 +00:00
|
|
|
|
assert(pa <= (buff + bufflen));
|
2012-11-19 00:30:30 +00:00
|
|
|
|
assert(pb >= pa);
|
2018-10-20 20:25:55 +00:00
|
|
|
|
assert(pb <= (buff + bufflen));
|
2006-01-30 16:51:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
wcstring parse_util_unescape_wildcards(const wcstring &str) {
|
2015-08-19 18:35:24 +00:00
|
|
|
|
wcstring result;
|
|
|
|
|
result.reserve(str.size());
|
2018-05-06 02:44:57 +00:00
|
|
|
|
bool unesc_qmark = !feature_test(features_t::qmark_noglob);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
|
|
|
|
const wchar_t *const cs = str.c_str();
|
|
|
|
|
for (size_t i = 0; cs[i] != L'\0'; i++) {
|
|
|
|
|
if (cs[i] == L'*') {
|
2015-08-19 18:35:24 +00:00
|
|
|
|
result.push_back(ANY_STRING);
|
2018-05-06 02:11:57 +00:00
|
|
|
|
} else if (cs[i] == L'?' && unesc_qmark) {
|
|
|
|
|
result.push_back(ANY_CHAR);
|
2018-03-31 23:48:57 +00:00
|
|
|
|
} else if (cs[i] == L'\\' && cs[i + 1] == L'*') {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
result.push_back(cs[i + 1]);
|
2015-08-19 18:35:24 +00:00
|
|
|
|
i += 1;
|
2018-05-06 02:11:57 +00:00
|
|
|
|
} else if (cs[i] == L'\\' && cs[i + 1] == L'?' && unesc_qmark) {
|
|
|
|
|
result.push_back(cs[i + 1]);
|
|
|
|
|
i += 1;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else if (cs[i] == L'\\' && cs[i + 1] == L'\\') {
|
|
|
|
|
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
|
2015-08-19 18:35:24 +00:00
|
|
|
|
result.append(L"\\\\");
|
|
|
|
|
i += 1;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
2015-08-19 18:35:24 +00:00
|
|
|
|
result.push_back(cs[i]);
|
2012-11-19 00:30:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-19 18:35:24 +00:00
|
|
|
|
return result;
|
2006-02-19 01:14:32 +00:00
|
|
|
|
}
|
2006-02-14 19:56:36 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Find the outermost quoting style of current token. Returns 0 if token is not quoted.
|
|
|
|
|
static wchar_t get_quote(const wcstring &cmd_str, size_t len) {
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
wchar_t res = 0;
|
2021-06-26 05:40:43 +00:00
|
|
|
|
const wchar_t *cmd = cmd_str.c_str();
|
2012-11-18 10:23:22 +00:00
|
|
|
|
|
2019-11-26 00:36:13 +00:00
|
|
|
|
while (true) {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (!cmd[i]) break;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (cmd[i] == L'\\') {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
i++;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (!cmd[i]) break;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
i++;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
|
|
|
|
if (cmd[i] == L'\'' || cmd[i] == L'\"') {
|
2021-07-02 21:11:03 +00:00
|
|
|
|
const wchar_t *end = quote_end(&cmd[i], cmd[i]);
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if ((end == nullptr) || (!*end) || (end > cmd + len)) {
|
2012-11-19 00:30:30 +00:00
|
|
|
|
res = cmd[i];
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
i = end - cmd + 1;
|
|
|
|
|
} else
|
2012-11-19 00:30:30 +00:00
|
|
|
|
i++;
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
|
return res;
|
2012-07-06 21:34:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-26 05:40:43 +00:00
|
|
|
|
wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos) {
|
2018-02-24 01:28:12 +00:00
|
|
|
|
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
|
2019-10-13 23:06:16 +00:00
|
|
|
|
while (auto token = tok.next()) {
|
2021-06-26 05:40:43 +00:00
|
|
|
|
if (token->type == token_type_t::string &&
|
|
|
|
|
token->location_in_or_at_end_of_source_range(pos)) {
|
|
|
|
|
return get_quote(tok.text_of(*token), pos - token->offset);
|
2012-11-18 10:23:22 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2021-06-26 05:40:43 +00:00
|
|
|
|
return L'\0';
|
2012-07-06 21:34:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-02-17 22:36:43 +00:00
|
|
|
|
wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, bool no_tilde) {
|
2012-07-06 21:34:53 +00:00
|
|
|
|
wcstring result;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (quote == L'\0') {
|
2022-07-25 14:25:04 +00:00
|
|
|
|
escape_flags_t flags = ESCAPE_NO_QUOTED | (no_tilde ? ESCAPE_NO_TILDE : 0);
|
2018-02-17 22:36:43 +00:00
|
|
|
|
result = escape_string(cmd, flags);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
2018-02-17 23:04:31 +00:00
|
|
|
|
// Here we are going to escape a string with quotes.
|
|
|
|
|
// A few characters cannot be represented inside quotes, e.g. newlines. In that case,
|
|
|
|
|
// terminate the quote and then re-enter it.
|
|
|
|
|
result.reserve(cmd.size());
|
|
|
|
|
for (wchar_t c : cmd) {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
switch (c) {
|
2012-11-19 08:31:03 +00:00
|
|
|
|
case L'\n':
|
2018-02-17 23:04:31 +00:00
|
|
|
|
result.append({quote, L'\\', L'n', quote});
|
|
|
|
|
break;
|
2012-11-19 08:31:03 +00:00
|
|
|
|
case L'\t':
|
2018-02-17 23:04:31 +00:00
|
|
|
|
result.append({quote, L'\\', L't', quote});
|
|
|
|
|
break;
|
2012-11-19 08:31:03 +00:00
|
|
|
|
case L'\b':
|
2018-02-17 23:04:31 +00:00
|
|
|
|
result.append({quote, L'\\', L'b', quote});
|
2012-11-19 08:31:03 +00:00
|
|
|
|
break;
|
2018-02-17 23:04:31 +00:00
|
|
|
|
case L'\r':
|
|
|
|
|
result.append({quote, L'\\', L'r', quote});
|
|
|
|
|
break;
|
|
|
|
|
case L'\\':
|
|
|
|
|
result.append({L'\\', L'\\'});
|
|
|
|
|
break;
|
2019-08-24 08:31:36 +00:00
|
|
|
|
case L'$':
|
|
|
|
|
if (quote == L'"') result.push_back(L'\\');
|
|
|
|
|
result.push_back(L'$');
|
|
|
|
|
break;
|
2018-02-17 23:04:31 +00:00
|
|
|
|
default:
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (c == quote) result.push_back(L'\\');
|
2012-11-19 08:31:03 +00:00
|
|
|
|
result.push_back(c);
|
|
|
|
|
break;
|
2012-07-06 21:34:53 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2013-12-08 21:41:12 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
std::vector<int> parse_util_compute_indents(const wcstring &src) {
|
|
|
|
|
// Make a vector the same size as the input string, which contains the indents. Initialize them
|
2020-06-21 01:22:11 +00:00
|
|
|
|
// to 0.
|
2013-12-08 21:41:12 +00:00
|
|
|
|
const size_t src_size = src.size();
|
2020-06-21 01:22:11 +00:00
|
|
|
|
std::vector<int> indents(src_size, 0);
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2020-03-03 09:24:05 +00:00
|
|
|
|
// Simple trick: if our source does not contain a newline, then all indents are 0.
|
|
|
|
|
if (src.find('\n') == wcstring::npos) {
|
|
|
|
|
return indents;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
|
|
|
|
|
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
|
|
|
|
|
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
|
|
|
|
|
// were a case item list.
|
2020-06-21 01:22:11 +00:00
|
|
|
|
using namespace ast;
|
|
|
|
|
auto ast =
|
|
|
|
|
ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
|
|
|
|
|
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
|
|
|
|
|
|
|
|
|
|
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
|
|
|
|
|
// visiting its children.
|
|
|
|
|
struct indent_visitor_t {
|
2020-08-09 19:15:14 +00:00
|
|
|
|
indent_visitor_t(const wcstring &src, std::vector<int> &indents)
|
|
|
|
|
: src(src), indents(indents) {}
|
2020-06-21 01:22:11 +00:00
|
|
|
|
|
|
|
|
|
void visit(const node_t &node) {
|
|
|
|
|
int inc = 0;
|
|
|
|
|
int dec = 0;
|
|
|
|
|
switch (node.type) {
|
|
|
|
|
case type_t::job_list:
|
|
|
|
|
case type_t::andor_job_list:
|
|
|
|
|
// Job lists are never unwound.
|
|
|
|
|
inc = 1;
|
|
|
|
|
dec = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// Increment indents for conditions in headers (#1665).
|
|
|
|
|
case type_t::job_conjunction:
|
|
|
|
|
if (node.parent->type == type_t::while_header ||
|
|
|
|
|
node.parent->type == type_t::if_clause) {
|
|
|
|
|
inc = 1;
|
|
|
|
|
dec = 1;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2020-08-09 19:15:14 +00:00
|
|
|
|
// Increment indents for job_continuation_t if it contains a newline.
|
|
|
|
|
// This is a bit of a hack - it indents cases like:
|
|
|
|
|
// cmd1 |
|
|
|
|
|
// ....cmd2
|
|
|
|
|
// but avoids "double indenting" if there's no newline:
|
|
|
|
|
// cmd1 | while cmd2
|
|
|
|
|
// ....cmd3
|
|
|
|
|
// end
|
|
|
|
|
// See #7252.
|
|
|
|
|
case type_t::job_continuation:
|
|
|
|
|
if (has_newline(node.as<job_continuation_t>()->newlines)) {
|
|
|
|
|
inc = 1;
|
|
|
|
|
dec = 1;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// Likewise for && and ||.
|
|
|
|
|
case type_t::job_conjunction_continuation:
|
|
|
|
|
if (has_newline(node.as<job_conjunction_continuation_t>()->newlines)) {
|
2020-06-21 01:22:11 +00:00
|
|
|
|
inc = 1;
|
|
|
|
|
dec = 1;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case type_t::case_item_list:
|
|
|
|
|
// Here's a hack. Consider:
|
|
|
|
|
// switch abc
|
|
|
|
|
// cas
|
|
|
|
|
//
|
|
|
|
|
// fish will see that 'cas' is not valid inside a switch statement because it is
|
|
|
|
|
// not "case". It will then unwind back to the top level job list, producing a
|
|
|
|
|
// parse tree like:
|
|
|
|
|
//
|
|
|
|
|
// job_list
|
|
|
|
|
// switch_job
|
|
|
|
|
// <err>
|
|
|
|
|
// normal_job
|
|
|
|
|
// cas
|
|
|
|
|
//
|
|
|
|
|
// And so we will think that the 'cas' job is at the same level as the switch.
|
|
|
|
|
// To address this, if we see that the switch statement was not closed, do not
|
|
|
|
|
// decrement the indent afterwards.
|
|
|
|
|
inc = 1;
|
|
|
|
|
dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1;
|
|
|
|
|
break;
|
2021-02-08 03:42:24 +00:00
|
|
|
|
case type_t::token_base: {
|
|
|
|
|
auto tok = node.as<token_base_t>();
|
|
|
|
|
if (node.parent->type == type_t::begin_header &&
|
|
|
|
|
tok->type == parse_token_type_t::end) {
|
|
|
|
|
// The newline after "begin" is optional, so it is part of the header.
|
|
|
|
|
// The header is not in the indented block, so indent the newline here.
|
|
|
|
|
if (node.source(src) == L"\n") {
|
|
|
|
|
inc = 1;
|
|
|
|
|
dec = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-06-21 01:22:11 +00:00
|
|
|
|
default:
|
|
|
|
|
break;
|
2014-09-29 18:29:50 +00:00
|
|
|
|
}
|
2021-02-08 03:42:24 +00:00
|
|
|
|
|
|
|
|
|
auto range = node.source_range();
|
|
|
|
|
if (range.length > 0 && node.category == category_t::leaf) {
|
2021-02-08 04:01:17 +00:00
|
|
|
|
record_line_continuations_until(range.start);
|
2021-02-08 03:42:24 +00:00
|
|
|
|
std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start,
|
|
|
|
|
last_indent);
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
indent += inc;
|
2014-09-29 18:29:50 +00:00
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
// If we increased the indentation, apply it to the remainder of the string, even if the
|
|
|
|
|
// list is empty. For example (where _ represents the cursor):
|
|
|
|
|
//
|
|
|
|
|
// if foo
|
|
|
|
|
// _
|
|
|
|
|
//
|
|
|
|
|
// we want to indent the newline.
|
|
|
|
|
if (inc) {
|
|
|
|
|
last_indent = indent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If this is a leaf node, apply the current indentation.
|
2021-02-13 06:48:19 +00:00
|
|
|
|
if (node.category == category_t::leaf && range.length > 0) {
|
|
|
|
|
std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent);
|
|
|
|
|
last_leaf_end = range.start + range.length;
|
|
|
|
|
last_indent = indent;
|
2013-12-08 21:41:12 +00:00
|
|
|
|
}
|
2020-06-21 01:22:11 +00:00
|
|
|
|
|
|
|
|
|
node_visitor(*this).accept_children_of(&node);
|
|
|
|
|
indent -= dec;
|
2013-12-08 21:41:12 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-08-09 19:15:14 +00:00
|
|
|
|
/// \return whether a maybe_newlines node contains at least one newline.
|
|
|
|
|
bool has_newline(const maybe_newlines_t &nls) const {
|
|
|
|
|
return nls.source(src).find(L'\n') != wcstring::npos;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-08 04:01:17 +00:00
|
|
|
|
void record_line_continuations_until(size_t offset) {
|
|
|
|
|
wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end);
|
|
|
|
|
size_t escaped_nl = gap_text.find(L"\\\n");
|
|
|
|
|
if (escaped_nl == wcstring::npos) return;
|
2021-02-16 17:16:05 +00:00
|
|
|
|
auto line_end = gap_text.begin() + escaped_nl;
|
|
|
|
|
if (std::find(gap_text.begin(), line_end, L'#') != line_end) return;
|
2021-02-08 04:01:17 +00:00
|
|
|
|
auto end = src.begin() + offset;
|
|
|
|
|
auto newline = src.begin() + last_leaf_end + escaped_nl + 1;
|
|
|
|
|
// The gap text might contain multiple newlines if there are multiple lines that
|
|
|
|
|
// don't contain an AST node, for example, comment lines, or lines containing only
|
|
|
|
|
// the escaped newline.
|
|
|
|
|
do {
|
|
|
|
|
line_continuations.push_back(newline - src.begin());
|
|
|
|
|
newline = std::find(newline + 1, end, L'\n');
|
|
|
|
|
} while (newline != end);
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
// The one-past-the-last index of the most recently encountered leaf node.
|
|
|
|
|
// We use this to populate the indents even if there's no tokens in the range.
|
|
|
|
|
size_t last_leaf_end{0};
|
|
|
|
|
|
|
|
|
|
// The last indent which we assigned.
|
|
|
|
|
int last_indent{-1};
|
|
|
|
|
|
2020-08-09 19:15:14 +00:00
|
|
|
|
// The source we are indenting.
|
|
|
|
|
const wcstring &src;
|
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
// List of indents, which we populate.
|
|
|
|
|
std::vector<int> &indents;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2020-06-21 01:22:11 +00:00
|
|
|
|
// Initialize our starting indent to -1, as our top-level node is a job list which
|
|
|
|
|
// will immediately increment it.
|
|
|
|
|
int indent{-1};
|
2021-02-08 04:01:17 +00:00
|
|
|
|
|
|
|
|
|
// List of locations of escaped newline characters.
|
|
|
|
|
std::vector<size_t> line_continuations;
|
2020-06-21 01:22:11 +00:00
|
|
|
|
};
|
|
|
|
|
|
2020-08-09 19:15:14 +00:00
|
|
|
|
indent_visitor_t iv(src, indents);
|
2020-06-21 01:22:11 +00:00
|
|
|
|
node_visitor(iv).accept(ast.top());
|
2021-02-08 04:01:17 +00:00
|
|
|
|
iv.record_line_continuations_until(indents.size());
|
2021-02-08 03:42:24 +00:00
|
|
|
|
std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent);
|
2020-06-21 01:22:11 +00:00
|
|
|
|
|
|
|
|
|
// All newlines now get the *next* indent.
|
|
|
|
|
// For example, in this code:
|
|
|
|
|
// if true
|
|
|
|
|
// stuff
|
|
|
|
|
// the newline "belongs" to the if statement as it ends its job.
|
|
|
|
|
// But when rendered, it visually belongs to the job list.
|
|
|
|
|
|
|
|
|
|
size_t idx = src_size;
|
|
|
|
|
int next_indent = iv.last_indent;
|
|
|
|
|
while (idx--) {
|
|
|
|
|
if (src.at(idx) == L'\n') {
|
2021-02-08 03:37:50 +00:00
|
|
|
|
bool empty_middle_line = idx + 1 < src_size && src.at(idx + 1) == L'\n';
|
|
|
|
|
if (!empty_middle_line) {
|
|
|
|
|
indents.at(idx) = next_indent;
|
|
|
|
|
}
|
2020-06-21 01:22:11 +00:00
|
|
|
|
} else {
|
|
|
|
|
next_indent = indents.at(idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-02-08 04:01:17 +00:00
|
|
|
|
// Add an extra level of indentation to continuation lines.
|
|
|
|
|
for (size_t idx : iv.line_continuations) {
|
|
|
|
|
do {
|
|
|
|
|
indents.at(idx)++;
|
|
|
|
|
} while (++idx < src_size && src.at(idx) != L'\n');
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-08 21:41:12 +00:00
|
|
|
|
return indents;
|
|
|
|
|
}
|
2013-12-16 00:05:37 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Append a syntax error to the given error list.
|
2022-08-21 21:51:33 +00:00
|
|
|
|
static bool append_syntax_error(parse_error_list_t *errors, size_t source_location,
|
|
|
|
|
size_t source_length, const wchar_t *fmt, ...) {
|
2020-07-12 20:55:51 +00:00
|
|
|
|
if (!errors) return true;
|
2015-04-29 23:53:02 +00:00
|
|
|
|
parse_error_t error;
|
|
|
|
|
error.source_start = source_location;
|
2022-08-12 14:53:31 +00:00
|
|
|
|
error.source_length = source_length;
|
2015-04-29 23:53:02 +00:00
|
|
|
|
error.code = parse_error_syntax;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2015-04-29 23:53:02 +00:00
|
|
|
|
va_list va;
|
|
|
|
|
va_start(va, fmt);
|
|
|
|
|
error.text = vformat_string(fmt, va);
|
|
|
|
|
va_end(va);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
|
errors->push_back(std::move(error));
|
2015-04-29 23:53:02 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-11 14:30:19 +00:00
|
|
|
|
/// Returns true if the specified command is a builtin that may not be used in a pipeline.
|
2018-09-29 04:58:44 +00:00
|
|
|
|
static const wchar_t *const forbidden_pipe_commands[] = {L"exec", L"case", L"break", L"return",
|
|
|
|
|
L"continue"};
|
2021-12-09 08:52:45 +00:00
|
|
|
|
static bool parser_is_pipe_forbidden(const wcstring &word) {
|
2017-04-05 04:28:57 +00:00
|
|
|
|
return contains(forbidden_pipe_commands, word);
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-02-14 21:15:29 +00:00
|
|
|
|
bool parse_util_argument_is_help(const wcstring &s) { return s == L"-h" || s == L"--help"; }
|
2014-03-18 21:42:38 +00:00
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
|
|
|
|
|
// there are no arguments.
|
2020-09-08 20:04:44 +00:00
|
|
|
|
static const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
|
2020-07-02 04:06:58 +00:00
|
|
|
|
for (const ast::argument_or_redirection_t &v : list) {
|
|
|
|
|
if (v.is_argument()) return &v.argument();
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
2020-07-02 04:06:58 +00:00
|
|
|
|
return nullptr;
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
|
|
|
|
|
/// For example, if wc is @, then the variable name was $@ and we suggest $argv.
|
|
|
|
|
static const wchar_t *error_format_for_character(wchar_t wc) {
|
|
|
|
|
switch (wc) {
|
2016-05-03 23:23:30 +00:00
|
|
|
|
case L'?': {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
return ERROR_NOT_STATUS;
|
2016-05-03 23:23:30 +00:00
|
|
|
|
}
|
|
|
|
|
case L'#': {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
return ERROR_NOT_ARGV_COUNT;
|
2016-05-03 23:23:30 +00:00
|
|
|
|
}
|
|
|
|
|
case L'@': {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
return ERROR_NOT_ARGV_AT;
|
2016-05-03 23:23:30 +00:00
|
|
|
|
}
|
|
|
|
|
case L'*': {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
return ERROR_NOT_ARGV_STAR;
|
2016-05-03 23:23:30 +00:00
|
|
|
|
}
|
2015-04-29 23:53:02 +00:00
|
|
|
|
case L'$':
|
|
|
|
|
case VARIABLE_EXPAND:
|
|
|
|
|
case VARIABLE_EXPAND_SINGLE:
|
2016-05-03 23:23:30 +00:00
|
|
|
|
case VARIABLE_EXPAND_EMPTY: {
|
2015-04-29 23:53:02 +00:00
|
|
|
|
return ERROR_NOT_PID;
|
2016-05-03 23:23:30 +00:00
|
|
|
|
}
|
2019-05-05 10:09:25 +00:00
|
|
|
|
default: {
|
|
|
|
|
return ERROR_BAD_VAR_CHAR1;
|
|
|
|
|
}
|
2015-04-29 23:53:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
void parse_util_expand_variable_error(const wcstring &token, size_t global_token_pos,
|
|
|
|
|
size_t dollar_pos, parse_error_list_t *errors) {
|
|
|
|
|
// Note that dollar_pos is probably VARIABLE_EXPAND or VARIABLE_EXPAND_SINGLE, not a literal
|
|
|
|
|
// dollar sign.
|
2019-11-19 02:34:50 +00:00
|
|
|
|
assert(errors != nullptr);
|
2015-04-29 23:53:02 +00:00
|
|
|
|
assert(dollar_pos < token.size());
|
2016-05-04 04:31:32 +00:00
|
|
|
|
const bool double_quotes = token.at(dollar_pos) == VARIABLE_EXPAND_SINGLE;
|
2015-04-29 23:53:02 +00:00
|
|
|
|
const size_t start_error_count = errors->size();
|
|
|
|
|
const size_t global_dollar_pos = global_token_pos + dollar_pos;
|
|
|
|
|
const size_t global_after_dollar_pos = global_dollar_pos + 1;
|
2016-04-04 21:34:28 +00:00
|
|
|
|
wchar_t char_after_dollar = dollar_pos + 1 >= token.size() ? 0 : token.at(dollar_pos + 1);
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
switch (char_after_dollar) {
|
2018-03-10 19:16:07 +00:00
|
|
|
|
case BRACE_BEGIN:
|
2016-12-12 20:35:22 +00:00
|
|
|
|
case L'{': {
|
2018-03-10 19:16:07 +00:00
|
|
|
|
// The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
|
|
|
|
|
// report a bracket error. Otherwise just complain about the ${.
|
2015-04-29 23:53:02 +00:00
|
|
|
|
bool looks_like_variable = false;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
size_t closing_bracket =
|
2018-03-10 19:16:07 +00:00
|
|
|
|
token.find(char_after_dollar == L'{' ? L'}' : wchar_t(BRACE_END), dollar_pos + 2);
|
2015-04-29 23:53:02 +00:00
|
|
|
|
wcstring var_name;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (closing_bracket != wcstring::npos) {
|
2015-04-29 23:53:02 +00:00
|
|
|
|
size_t var_start = dollar_pos + 2, var_end = closing_bracket;
|
|
|
|
|
var_name = wcstring(token, var_start, var_end - var_start);
|
2017-04-20 06:43:02 +00:00
|
|
|
|
looks_like_variable = valid_var_name(var_name);
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (looks_like_variable) {
|
|
|
|
|
append_syntax_error(
|
2022-08-12 14:53:31 +00:00
|
|
|
|
errors, global_after_dollar_pos, 1,
|
2016-05-02 23:53:10 +00:00
|
|
|
|
double_quotes ? ERROR_BRACKETED_VARIABLE_QUOTED1 : ERROR_BRACKETED_VARIABLE1,
|
2019-09-19 17:32:07 +00:00
|
|
|
|
truncate(var_name, var_err_len).c_str());
|
2016-05-02 23:53:10 +00:00
|
|
|
|
} else {
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(errors, global_after_dollar_pos, 1, ERROR_BAD_VAR_CHAR1, L'{');
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
case INTERNAL_SEPARATOR: {
|
|
|
|
|
// e.g.: echo foo"$"baz
|
|
|
|
|
// These are only ever quotes, not command substitutions. Command substitutions are
|
|
|
|
|
// handled earlier.
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(errors, global_dollar_pos, 1, ERROR_NO_VAR_NAME);
|
2014-03-04 10:53:34 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
case L'\0': {
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(errors, global_dollar_pos, 1, ERROR_NO_VAR_NAME);
|
2014-03-04 10:53:34 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
default: {
|
2015-04-29 23:53:02 +00:00
|
|
|
|
wchar_t token_stop_char = char_after_dollar;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Unescape (see issue #50).
|
2018-05-06 02:11:57 +00:00
|
|
|
|
if (token_stop_char == ANY_CHAR)
|
|
|
|
|
token_stop_char = L'?';
|
|
|
|
|
else if (token_stop_char == ANY_STRING || token_stop_char == ANY_STRING_RECURSIVE)
|
2014-03-04 10:53:34 +00:00
|
|
|
|
token_stop_char = L'*';
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
|
|
|
|
// Determine which error message to use. The format string may not consume all the
|
|
|
|
|
// arguments we pass but that's harmless.
|
2015-04-29 23:53:02 +00:00
|
|
|
|
const wchar_t *error_fmt = error_format_for_character(token_stop_char);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(errors, global_after_dollar_pos, 1, error_fmt, token_stop_char);
|
2014-03-04 10:53:34 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
|
|
|
|
// We should have appended exactly one error.
|
2015-04-29 23:53:02 +00:00
|
|
|
|
assert(errors->size() == start_error_count + 1);
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
/// Test if this argument contains any errors. Detected errors include syntax errors in command
|
|
|
|
|
/// substitutions, improperly escaped characters and improper use of the variable expansion
|
|
|
|
|
/// operator.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
|
2016-05-02 23:53:10 +00:00
|
|
|
|
const wcstring &arg_src,
|
|
|
|
|
parse_error_list_t *out_errors) {
|
2020-07-02 04:06:58 +00:00
|
|
|
|
maybe_t<source_range_t> source_range = arg.try_source_range();
|
|
|
|
|
if (!source_range.has_value()) return 0;
|
|
|
|
|
|
|
|
|
|
size_t source_start = source_range->start;
|
2020-07-14 22:51:12 +00:00
|
|
|
|
parser_test_error_bits_t err = 0;
|
2014-03-04 10:53:34 +00:00
|
|
|
|
|
2022-04-03 13:30:31 +00:00
|
|
|
|
auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int {
|
|
|
|
|
wcstring unesc;
|
|
|
|
|
if (!unescape_string(arg_src.c_str() + begin, end - begin, &unesc, UNESCAPE_SPECIAL)) {
|
|
|
|
|
if (out_errors) {
|
2022-09-16 20:28:55 +00:00
|
|
|
|
const wchar_t *fmt = L"Invalid token '%ls'";
|
|
|
|
|
if (arg_src.length() == 2 && arg_src[0] == L'\\' &&
|
|
|
|
|
(arg_src[1] == L'c' || towlower(arg_src[1]) == L'u'
|
|
|
|
|
|| towlower(arg_src[1]) == L'x')) {
|
|
|
|
|
fmt = L"Incomplete escape sequence '%ls'";
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(out_errors, source_start + begin, end - begin,
|
2022-09-16 20:28:55 +00:00
|
|
|
|
fmt, arg_src.c_str());
|
2022-04-03 13:30:31 +00:00
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parser_test_error_bits_t err = 0;
|
|
|
|
|
// Check for invalid variable expansions.
|
|
|
|
|
const size_t unesc_size = unesc.size();
|
|
|
|
|
for (size_t idx = 0; idx < unesc_size; idx++) {
|
|
|
|
|
if (unesc.at(idx) != VARIABLE_EXPAND && unesc.at(idx) != VARIABLE_EXPAND_SINGLE) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wchar_t next_char = idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0';
|
|
|
|
|
if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE &&
|
|
|
|
|
next_char != '(' && !valid_var_name_char(next_char)) {
|
|
|
|
|
err = 1;
|
|
|
|
|
if (out_errors) {
|
|
|
|
|
// We have something like $$$^.... Back up until we reach the first $.
|
|
|
|
|
size_t first_dollar = idx;
|
|
|
|
|
while (first_dollar > 0 &&
|
|
|
|
|
(unesc.at(first_dollar - 1) == VARIABLE_EXPAND ||
|
|
|
|
|
unesc.at(first_dollar - 1) == VARIABLE_EXPAND_SINGLE)) {
|
|
|
|
|
first_dollar--;
|
|
|
|
|
}
|
|
|
|
|
parse_util_expand_variable_error(unesc, source_start, first_dollar, out_errors);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return err;
|
|
|
|
|
};
|
|
|
|
|
|
2020-07-14 22:51:12 +00:00
|
|
|
|
size_t cursor = 0;
|
2022-04-03 13:30:31 +00:00
|
|
|
|
size_t checked = 0;
|
2020-07-14 22:51:12 +00:00
|
|
|
|
wcstring subst;
|
|
|
|
|
|
|
|
|
|
bool do_loop = true;
|
2021-11-29 20:08:26 +00:00
|
|
|
|
bool is_quoted = false;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
while (do_loop) {
|
2020-07-14 22:34:26 +00:00
|
|
|
|
size_t paren_begin = 0;
|
|
|
|
|
size_t paren_end = 0;
|
2022-04-03 13:30:31 +00:00
|
|
|
|
bool has_dollar = false;
|
2020-07-14 22:51:12 +00:00
|
|
|
|
switch (parse_util_locate_cmdsubst_range(arg_src, &cursor, &subst, &paren_begin, &paren_end,
|
2022-04-03 13:30:31 +00:00
|
|
|
|
false, &is_quoted, &has_dollar)) {
|
2016-05-02 23:53:10 +00:00
|
|
|
|
case -1: {
|
2020-07-14 22:51:12 +00:00
|
|
|
|
err |= PARSER_TEST_ERROR;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (out_errors) {
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(out_errors, source_start, 1, L"Mismatched parenthesis");
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
|
|
|
|
return err;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
case 0: {
|
2020-07-14 22:51:12 +00:00
|
|
|
|
do_loop = false;
|
2014-03-04 10:53:34 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
case 1: {
|
2022-04-03 13:30:31 +00:00
|
|
|
|
err |= check_subtoken(checked, paren_begin - has_dollar);
|
|
|
|
|
|
2020-07-14 22:34:26 +00:00
|
|
|
|
assert(paren_begin < paren_end && "Parens out of order?");
|
2014-03-18 21:14:32 +00:00
|
|
|
|
parse_error_list_t subst_errors;
|
2020-07-12 20:55:51 +00:00
|
|
|
|
err |= parse_util_detect_errors(subst, &subst_errors);
|
2014-03-18 21:14:32 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Our command substitution produced error offsets relative to its source. Tweak the
|
|
|
|
|
// offsets of the errors in the command substitution to account for both its offset
|
|
|
|
|
// within the string, and the offset of the node.
|
2020-07-14 22:34:26 +00:00
|
|
|
|
size_t error_offset = paren_begin + 1 + source_start;
|
2014-03-22 00:13:33 +00:00
|
|
|
|
parse_error_offset_source_start(&subst_errors, error_offset);
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2019-11-19 02:34:50 +00:00
|
|
|
|
if (out_errors != nullptr) {
|
2014-03-18 21:14:32 +00:00
|
|
|
|
out_errors->insert(out_errors->end(), subst_errors.begin(), subst_errors.end());
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
2022-04-03 13:30:31 +00:00
|
|
|
|
|
|
|
|
|
checked = paren_end + 1;
|
2014-03-04 10:53:34 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2016-10-30 00:25:48 +00:00
|
|
|
|
default: {
|
|
|
|
|
DIE("unexpected parse_util_locate_cmdsubst() return value");
|
|
|
|
|
}
|
2014-03-04 10:53:34 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-04-03 13:30:31 +00:00
|
|
|
|
err |= check_subtoken(checked, arg_src.size());
|
2014-03-04 10:53:34 +00:00
|
|
|
|
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-12 19:15:35 +00:00
|
|
|
|
/// Given that the job given by node should be backgrounded, return true if we detect any errors.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
static bool detect_errors_in_backgrounded_job(const ast::job_t &job,
|
2018-01-12 19:15:35 +00:00
|
|
|
|
parse_error_list_t *parse_errors) {
|
2020-07-02 04:06:58 +00:00
|
|
|
|
using namespace ast;
|
|
|
|
|
auto source_range = job.try_source_range();
|
2018-01-14 00:24:21 +00:00
|
|
|
|
if (!source_range) return false;
|
|
|
|
|
|
2018-01-12 19:15:35 +00:00
|
|
|
|
bool errored = false;
|
|
|
|
|
// Disallow background in the following cases:
|
|
|
|
|
// foo & ; and bar
|
|
|
|
|
// foo & ; or bar
|
|
|
|
|
// if foo & ; end
|
|
|
|
|
// while foo & ; end
|
2020-07-02 04:06:58 +00:00
|
|
|
|
const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>();
|
|
|
|
|
if (!job_conj) return false;
|
|
|
|
|
|
|
|
|
|
if (job_conj->parent->try_as<if_clause_t>()) {
|
2022-08-12 14:53:31 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_range->start, source_range->length,
|
2018-01-14 00:24:21 +00:00
|
|
|
|
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
2020-07-02 04:06:58 +00:00
|
|
|
|
} else if (job_conj->parent->try_as<while_header_t>()) {
|
2022-08-12 14:53:31 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_range->start, source_range->length,
|
2018-01-14 00:24:21 +00:00
|
|
|
|
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
2020-07-02 04:06:58 +00:00
|
|
|
|
} else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) {
|
2018-01-14 00:24:21 +00:00
|
|
|
|
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// Find the index of ourselves in the job list.
|
|
|
|
|
size_t index;
|
|
|
|
|
for (index = 0; index < jlist->count(); index++) {
|
|
|
|
|
if (jlist->at(index) == job_conj) break;
|
|
|
|
|
}
|
|
|
|
|
assert(index < jlist->count() && "Should have found the job in the list");
|
|
|
|
|
|
|
|
|
|
// Try getting the next job and check its decorator.
|
|
|
|
|
if (const job_conjunction_t *next = jlist->at(index + 1)) {
|
|
|
|
|
if (const keyword_base_t *deco = next->decorator.contents.get()) {
|
|
|
|
|
assert(
|
|
|
|
|
(deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) &&
|
|
|
|
|
"Unexpected decorator keyword");
|
|
|
|
|
const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or");
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, deco->source_range().start,
|
|
|
|
|
deco->source_range().length,
|
2020-07-02 04:06:58 +00:00
|
|
|
|
BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
|
2018-01-12 19:15:35 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return errored;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-12 19:51:17 +00:00
|
|
|
|
/// Given a source buffer \p buff_src and decorated statement \p dst within it, return true if there
|
|
|
|
|
/// is an error and false if not. \p storage may be used to reduce allocations.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
|
|
|
|
|
const ast::decorated_statement_t &dst,
|
2020-07-12 19:51:17 +00:00
|
|
|
|
wcstring *storage,
|
2020-07-02 04:06:58 +00:00
|
|
|
|
parse_error_list_t *parse_errors) {
|
|
|
|
|
using namespace ast;
|
2018-02-18 21:00:46 +00:00
|
|
|
|
bool errored = false;
|
2020-07-02 04:06:58 +00:00
|
|
|
|
auto source_start = dst.source_range().start;
|
2022-08-12 14:53:31 +00:00
|
|
|
|
auto source_length = dst.source_range().length;
|
2020-07-07 23:28:39 +00:00
|
|
|
|
const statement_decoration_t decoration = dst.decoration();
|
2020-07-02 04:06:58 +00:00
|
|
|
|
|
|
|
|
|
// Determine if the first argument is help.
|
|
|
|
|
bool first_arg_is_help = false;
|
|
|
|
|
if (const auto *arg = get_first_arg(dst.args_or_redirs)) {
|
2020-07-12 19:51:17 +00:00
|
|
|
|
const wcstring &arg_src = arg->source(buff_src, storage);
|
2021-02-14 22:09:59 +00:00
|
|
|
|
first_arg_is_help = parse_util_argument_is_help(arg_src);
|
2020-07-02 04:06:58 +00:00
|
|
|
|
}
|
2018-02-18 21:00:46 +00:00
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// Get the statement we are part of.
|
|
|
|
|
const statement_t *st = dst.parent->as<statement_t>();
|
2018-02-18 21:00:46 +00:00
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// Walk up to the job.
|
|
|
|
|
const ast::job_t *job = nullptr;
|
|
|
|
|
for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) {
|
|
|
|
|
assert(cursor && "Reached root without finding a job");
|
|
|
|
|
job = cursor->try_as<ast::job_t>();
|
|
|
|
|
}
|
|
|
|
|
assert(job && "Should have found the job");
|
|
|
|
|
|
|
|
|
|
// Check our pipeline position.
|
|
|
|
|
pipeline_position_t pipe_pos;
|
|
|
|
|
if (job->continuation.empty()) {
|
|
|
|
|
pipe_pos = pipeline_position_t::none;
|
|
|
|
|
} else if (&job->statement == st) {
|
|
|
|
|
pipe_pos = pipeline_position_t::first;
|
|
|
|
|
} else {
|
|
|
|
|
pipe_pos = pipeline_position_t::subsequent;
|
|
|
|
|
}
|
2018-02-18 21:00:46 +00:00
|
|
|
|
|
|
|
|
|
// Check that we don't try to pipe through exec.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
|
2020-07-07 23:28:39 +00:00
|
|
|
|
if (is_in_pipeline && decoration == statement_decoration_t::exec) {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_start, source_length,
|
|
|
|
|
INVALID_PIPELINE_CMD_ERR_MSG, L"exec");
|
2018-02-18 21:00:46 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-03-03 02:09:16 +00:00
|
|
|
|
// This is a somewhat stale check that 'and' and 'or' are not in pipelines, except at the
|
|
|
|
|
// beginning. We can't disallow them as commands entirely because we need to support 'and
|
|
|
|
|
// --help', etc.
|
|
|
|
|
if (pipe_pos == pipeline_position_t::subsequent) {
|
|
|
|
|
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
|
|
|
|
|
// commands.
|
2020-07-12 19:51:17 +00:00
|
|
|
|
const wcstring &command = dst.command.source(buff_src, storage);
|
2018-03-03 02:09:16 +00:00
|
|
|
|
if (command == L"and" || command == L"or") {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_start, source_length,
|
|
|
|
|
INVALID_PIPELINE_CMD_ERR_MSG, command.c_str());
|
2018-03-03 02:09:16 +00:00
|
|
|
|
}
|
2022-03-31 23:14:59 +00:00
|
|
|
|
|
|
|
|
|
// Similarly for time (#8841).
|
|
|
|
|
if (command == L"time") {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_start, source_length,
|
|
|
|
|
TIME_IN_PIPELINE_ERR_MSG);
|
2022-03-31 23:14:59 +00:00
|
|
|
|
}
|
2018-03-03 02:09:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-07-26 19:29:02 +00:00
|
|
|
|
// $status specifically is invalid as a command,
|
|
|
|
|
// to avoid people trying `if $status`.
|
|
|
|
|
// We see this surprisingly regularly.
|
|
|
|
|
const wcstring &com = dst.command.source(buff_src, storage);
|
|
|
|
|
if (com == L"$status") {
|
2021-08-01 15:41:56 +00:00
|
|
|
|
errored =
|
2022-08-12 14:53:31 +00:00
|
|
|
|
append_syntax_error(parse_errors, source_start, source_length,
|
2021-08-01 15:41:56 +00:00
|
|
|
|
_(L"$status is not valid as a command. See `help conditions`"));
|
2021-07-26 19:29:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-12 19:51:17 +00:00
|
|
|
|
const wcstring &unexp_command = dst.command.source(buff_src, storage);
|
|
|
|
|
if (!unexp_command.empty()) {
|
2018-02-18 21:00:46 +00:00
|
|
|
|
// Check that we can expand the command.
|
2021-09-28 15:59:59 +00:00
|
|
|
|
// Make a new error list so we can fix the offset for just those, then append later.
|
|
|
|
|
wcstring command;
|
|
|
|
|
parse_error_list_t new_errors;
|
2020-07-12 19:51:17 +00:00
|
|
|
|
if (expand_to_command_and_args(unexp_command, operation_context_t::empty(), &command,
|
2021-09-28 15:59:59 +00:00
|
|
|
|
nullptr, &new_errors,
|
2020-12-20 21:36:12 +00:00
|
|
|
|
true /* skip wildcards */) == expand_result_t::error) {
|
2018-08-26 08:41:45 +00:00
|
|
|
|
errored = true;
|
2018-02-18 21:00:46 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check that pipes are sound.
|
|
|
|
|
if (!errored && parser_is_pipe_forbidden(command) && is_in_pipeline) {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_start, source_length,
|
|
|
|
|
INVALID_PIPELINE_CMD_ERR_MSG, command.c_str());
|
2018-02-18 21:00:46 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check that we don't break or continue from outside a loop.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) {
|
2018-02-18 21:00:46 +00:00
|
|
|
|
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
|
|
|
|
|
// stop the search; we can't break an outer loop from inside a function.
|
|
|
|
|
// This is a little funny because we can't tell if it's a 'for' or 'while'
|
|
|
|
|
// loop from the ancestor alone; we need the header. That is, we hit a
|
|
|
|
|
// block_statement, and have to check its header.
|
|
|
|
|
bool found_loop = false;
|
2020-07-02 04:06:58 +00:00
|
|
|
|
for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) {
|
|
|
|
|
const auto *block = ancestor->try_as<block_statement_t>();
|
|
|
|
|
if (!block) continue;
|
|
|
|
|
if (block->header->type == type_t::for_header ||
|
|
|
|
|
block->header->type == type_t::while_header) {
|
2018-02-18 21:00:46 +00:00
|
|
|
|
// This is a loop header, so we can break or continue.
|
|
|
|
|
found_loop = true;
|
|
|
|
|
break;
|
2020-07-02 04:06:58 +00:00
|
|
|
|
} else if (block->header->type == type_t::function_header) {
|
2018-02-18 21:00:46 +00:00
|
|
|
|
// This is a function header, so we cannot break or
|
|
|
|
|
// continue. We stop our search here.
|
|
|
|
|
found_loop = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
if (!found_loop) {
|
2018-02-18 21:00:46 +00:00
|
|
|
|
errored = append_syntax_error(
|
2022-08-12 14:53:31 +00:00
|
|
|
|
parse_errors, source_start, source_length,
|
2018-02-18 21:00:46 +00:00
|
|
|
|
(command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check that we don't do an invalid builtin (issue #1252).
|
2020-07-12 19:51:17 +00:00
|
|
|
|
if (!errored && decoration == statement_decoration_t::builtin) {
|
|
|
|
|
wcstring command = unexp_command;
|
|
|
|
|
if (expand_one(command, expand_flag::skip_cmdsubst, operation_context_t::empty(),
|
|
|
|
|
parse_errors) &&
|
|
|
|
|
!builtin_exists(unexp_command)) {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
errored = append_syntax_error(parse_errors, source_start, source_length,
|
|
|
|
|
UNKNOWN_BUILTIN_ERR_MSG, unexp_command.c_str());
|
2020-07-12 19:51:17 +00:00
|
|
|
|
}
|
2018-02-18 21:00:46 +00:00
|
|
|
|
}
|
2021-09-28 15:59:59 +00:00
|
|
|
|
|
|
|
|
|
if (parse_errors) {
|
2022-08-11 16:06:05 +00:00
|
|
|
|
// The expansion errors here go from the *command* onwards,
|
|
|
|
|
// so we need to offset them by the *command* offset,
|
|
|
|
|
// excluding the decoration.
|
|
|
|
|
parse_error_offset_source_start(&new_errors, dst.command.source_range().start);
|
2021-09-28 15:59:59 +00:00
|
|
|
|
vec_append(*parse_errors, std::move(new_errors));
|
|
|
|
|
}
|
2018-02-18 21:00:46 +00:00
|
|
|
|
}
|
|
|
|
|
return errored;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
|
|
|
|
|
// there are no arguments in the list.
|
|
|
|
|
static bool detect_errors_in_block_redirection_list(
|
|
|
|
|
const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) {
|
|
|
|
|
if (const auto *first_arg = get_first_arg(args_or_redirs)) {
|
2022-08-21 21:51:33 +00:00
|
|
|
|
return append_syntax_error(out_errors, first_arg->source_range().start,
|
|
|
|
|
first_arg->source_range().length, END_ARG_ERR_MSG);
|
2020-07-02 04:06:58 +00:00
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
|
parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const wcstring &buff_src,
|
|
|
|
|
parse_error_list_t *out_errors) {
|
|
|
|
|
using namespace ast;
|
2014-03-04 10:53:34 +00:00
|
|
|
|
parser_test_error_bits_t res = 0;
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Whether we encountered a parse error.
|
2013-12-16 00:05:37 +00:00
|
|
|
|
bool errored = false;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Whether we encountered an unclosed block. We detect this via an 'end_command' block without
|
|
|
|
|
// source.
|
2013-12-16 00:05:37 +00:00
|
|
|
|
bool has_unclosed_block = false;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2018-02-18 21:13:58 +00:00
|
|
|
|
// Whether we encounter a missing statement, i.e. a newline after a pipe. This is found by
|
|
|
|
|
// detecting job_continuations that have source for pipes but not the statement.
|
|
|
|
|
bool has_unclosed_pipe = false;
|
|
|
|
|
|
2020-08-04 19:39:37 +00:00
|
|
|
|
// Whether we encounter a missing job, i.e. a newline after && or ||. This is found by
|
|
|
|
|
// detecting job_conjunction_continuations that have source for && or || but not the job.
|
|
|
|
|
bool has_unclosed_conjunction = false;
|
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
// Expand all commands.
|
|
|
|
|
// Verify 'or' and 'and' not used inside pipelines.
|
|
|
|
|
// Verify return only within a function.
|
|
|
|
|
// Verify no variable expansions.
|
2020-07-12 19:51:17 +00:00
|
|
|
|
wcstring storage;
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2020-08-04 19:41:14 +00:00
|
|
|
|
for (const node_t &node : ast) {
|
|
|
|
|
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
|
|
|
|
|
// Somewhat clumsy way of checking for a statement without source in a pipeline.
|
|
|
|
|
// See if our pipe has source but our statement does not.
|
|
|
|
|
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
|
|
|
|
|
has_unclosed_pipe = true;
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
2020-08-04 19:39:37 +00:00
|
|
|
|
} else if (const auto *jcc = node.try_as<job_conjunction_continuation_t>()) {
|
|
|
|
|
// Somewhat clumsy way of checking for a job without source in a conjunction.
|
|
|
|
|
// See if our conjunction operator (&& or ||) has source but our job does not.
|
|
|
|
|
if (!jcc->conjunction.unsourced && !jcc->job.try_source_range().has_value()) {
|
|
|
|
|
has_unclosed_conjunction = true;
|
|
|
|
|
}
|
2020-08-04 19:41:14 +00:00
|
|
|
|
} else if (const argument_t *arg = node.try_as<argument_t>()) {
|
|
|
|
|
const wcstring &arg_src = arg->source(buff_src, &storage);
|
|
|
|
|
res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors);
|
|
|
|
|
} else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
|
|
|
|
|
// Disallow background in the following cases:
|
|
|
|
|
//
|
|
|
|
|
// foo & ; and bar
|
|
|
|
|
// foo & ; or bar
|
|
|
|
|
// if foo & ; end
|
|
|
|
|
// while foo & ; end
|
|
|
|
|
// If it's not a background job, nothing to do.
|
|
|
|
|
if (job->bg) {
|
|
|
|
|
errored |= detect_errors_in_backgrounded_job(*job, out_errors);
|
|
|
|
|
}
|
|
|
|
|
} else if (const ast::decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) {
|
|
|
|
|
errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &storage, out_errors);
|
|
|
|
|
} else if (const auto *block = node.try_as<block_statement_t>()) {
|
|
|
|
|
// If our 'end' had no source, we are unsourced.
|
|
|
|
|
if (block->end.unsourced) has_unclosed_block = true;
|
|
|
|
|
errored |= detect_errors_in_block_redirection_list(block->args_or_redirs, out_errors);
|
|
|
|
|
} else if (const auto *ifs = node.try_as<if_statement_t>()) {
|
|
|
|
|
// If our 'end' had no source, we are unsourced.
|
|
|
|
|
if (ifs->end.unsourced) has_unclosed_block = true;
|
|
|
|
|
errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs, out_errors);
|
|
|
|
|
} else if (const auto *switchs = node.try_as<switch_statement_t>()) {
|
|
|
|
|
// If our 'end' had no source, we are unsourced.
|
|
|
|
|
if (switchs->end.unsourced) has_unclosed_block = true;
|
|
|
|
|
errored |= detect_errors_in_block_redirection_list(switchs->args_or_redirs, out_errors);
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
2020-08-04 19:41:14 +00:00
|
|
|
|
}
|
2013-12-16 00:05:37 +00:00
|
|
|
|
|
2016-05-02 23:53:10 +00:00
|
|
|
|
if (errored) res |= PARSER_TEST_ERROR;
|
2013-12-16 00:05:37 +00:00
|
|
|
|
|
2020-08-04 19:39:37 +00:00
|
|
|
|
if (has_unclosed_block || has_unclosed_pipe || has_unclosed_conjunction)
|
|
|
|
|
res |= PARSER_TEST_INCOMPLETE;
|
2020-07-12 20:55:51 +00:00
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
|
|
|
|
parse_error_list_t *out_errors,
|
|
|
|
|
bool allow_incomplete) {
|
|
|
|
|
// Whether there's an unclosed quote or subshell, and therefore unfinished. This is only set if
|
|
|
|
|
// allow_incomplete is set.
|
|
|
|
|
bool has_unclosed_quote_or_subshell = false;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
|
const parse_tree_flags_t parse_flags =
|
|
|
|
|
allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none;
|
|
|
|
|
|
|
|
|
|
// Parse the input string into an ast. Some errors are detected here.
|
|
|
|
|
using namespace ast;
|
|
|
|
|
parse_error_list_t parse_errors;
|
|
|
|
|
auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
|
|
|
|
|
if (allow_incomplete) {
|
|
|
|
|
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
|
|
|
|
|
// successfully.
|
|
|
|
|
size_t idx = parse_errors.size();
|
|
|
|
|
while (idx--) {
|
|
|
|
|
if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote ||
|
|
|
|
|
parse_errors.at(idx).code == parse_error_tokenizer_unterminated_subshell) {
|
|
|
|
|
// Remove this error, since we don't consider it a real error.
|
|
|
|
|
has_unclosed_quote_or_subshell = true;
|
|
|
|
|
parse_errors.erase(parse_errors.begin() + idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
2016-05-02 23:53:10 +00:00
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
|
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
|
|
|
|
|
assert(!has_unclosed_quote_or_subshell || allow_incomplete);
|
|
|
|
|
if (has_unclosed_quote_or_subshell) {
|
|
|
|
|
// We do not bother to validate the rest of the tree in this case.
|
|
|
|
|
return PARSER_TEST_INCOMPLETE;
|
2016-02-28 08:44:20 +00:00
|
|
|
|
}
|
2013-12-16 00:05:37 +00:00
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
|
// Early parse error, stop here.
|
|
|
|
|
if (!parse_errors.empty()) {
|
|
|
|
|
if (out_errors) vec_append(*out_errors, std::move(parse_errors));
|
|
|
|
|
return PARSER_TEST_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Defer to the tree-walking version.
|
|
|
|
|
return parse_util_detect_errors(ast, buff_src, out_errors);
|
2013-12-16 00:05:37 +00:00
|
|
|
|
}
|
2019-08-04 21:49:56 +00:00
|
|
|
|
|
|
|
|
|
maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_list_src,
|
|
|
|
|
const wcstring &prefix) {
|
|
|
|
|
// Helper to return a description of the first error.
|
|
|
|
|
auto get_error_text = [&](const parse_error_list_t &errors) {
|
|
|
|
|
assert(!errors.empty() && "Expected an error");
|
|
|
|
|
return errors.at(0).describe_with_prefix(arg_list_src, prefix, false /* not interactive */,
|
|
|
|
|
false /* don't skip caret */);
|
|
|
|
|
};
|
|
|
|
|
|
2020-07-02 04:06:58 +00:00
|
|
|
|
// Parse the string as a freestanding argument list.
|
|
|
|
|
using namespace ast;
|
2019-08-04 21:49:56 +00:00
|
|
|
|
parse_error_list_t errors;
|
2020-07-02 04:06:58 +00:00
|
|
|
|
auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors);
|
|
|
|
|
if (!errors.empty()) {
|
2019-08-04 21:49:56 +00:00
|
|
|
|
return get_error_text(errors);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get the root argument list and extract arguments from it.
|
|
|
|
|
// Test each of these.
|
2020-07-02 04:06:58 +00:00
|
|
|
|
for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) {
|
|
|
|
|
const wcstring arg_src = arg.source(arg_list_src);
|
2019-08-04 21:49:56 +00:00
|
|
|
|
if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) {
|
|
|
|
|
return get_error_text(errors);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return none();
|
|
|
|
|
}
|