2016-05-02 23:09:46 +00:00
|
|
|
// Programmatic representation of fish code.
|
2016-05-18 22:30:21 +00:00
|
|
|
#include "config.h" // IWYU pragma: keep
|
|
|
|
|
2019-10-13 22:50:48 +00:00
|
|
|
#include "parse_tree.h"
|
|
|
|
|
2015-07-25 15:14:25 +00:00
|
|
|
#include <stddef.h>
|
2017-02-11 02:47:02 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
#include <string>
|
2022-08-21 06:14:48 +00:00
|
|
|
#include <utility>
|
2016-04-21 06:00:54 +00:00
|
|
|
|
2020-07-02 21:51:45 +00:00
|
|
|
#include "ast.h"
|
2015-07-25 15:14:25 +00:00
|
|
|
#include "common.h"
|
2022-08-21 06:14:48 +00:00
|
|
|
#include "enum_map.h"
|
2022-08-21 21:51:33 +00:00
|
|
|
#include "fallback.h"
|
2022-08-21 06:14:48 +00:00
|
|
|
#include "maybe.h"
|
2015-07-25 15:14:25 +00:00
|
|
|
#include "parse_constants.h"
|
2013-06-09 02:20:26 +00:00
|
|
|
#include "tokenizer.h"
|
2016-06-24 05:44:58 +00:00
|
|
|
#include "wutil.h" // IWYU pragma: keep
|
2013-06-02 05:14:47 +00:00
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
|
2018-09-28 01:25:49 +00:00
|
|
|
switch (err) {
|
|
|
|
case tokenizer_error_t::none:
|
|
|
|
return parse_error_none;
|
|
|
|
case tokenizer_error_t::unterminated_quote:
|
|
|
|
return parse_error_tokenizer_unterminated_quote;
|
|
|
|
case tokenizer_error_t::unterminated_subshell:
|
|
|
|
return parse_error_tokenizer_unterminated_subshell;
|
|
|
|
case tokenizer_error_t::unterminated_slice:
|
|
|
|
return parse_error_tokenizer_unterminated_slice;
|
|
|
|
case tokenizer_error_t::unterminated_escape:
|
|
|
|
return parse_error_tokenizer_unterminated_escape;
|
|
|
|
default:
|
|
|
|
return parse_error_tokenizer_other;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Returns a string description of this parse error.
|
|
|
|
wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix,
|
|
|
|
bool is_interactive, bool skip_caret) const {
|
2017-04-30 04:33:50 +00:00
|
|
|
wcstring result = prefix;
|
2022-08-12 15:04:30 +00:00
|
|
|
// Some errors don't have their message passed in, so we construct them here.
|
|
|
|
// This affects e.g. `eval "a=(foo)"`
|
2019-11-25 11:47:33 +00:00
|
|
|
switch (code) {
|
|
|
|
default:
|
2020-07-05 18:36:13 +00:00
|
|
|
if (skip_caret && this->text.empty()) return L"";
|
2022-08-12 15:04:30 +00:00
|
|
|
result.append(this->text);
|
2019-11-25 11:47:33 +00:00
|
|
|
break;
|
|
|
|
case parse_error_andor_in_pipeline:
|
2022-03-31 22:49:15 +00:00
|
|
|
append_format(result, INVALID_PIPELINE_CMD_ERR_MSG,
|
2019-11-25 11:47:33 +00:00
|
|
|
src.substr(this->source_start, this->source_length).c_str());
|
2022-08-12 15:04:30 +00:00
|
|
|
break;
|
2019-11-25 11:47:33 +00:00
|
|
|
case parse_error_bare_variable_assignment: {
|
|
|
|
wcstring assignment_src = src.substr(this->source_start, this->source_length);
|
|
|
|
maybe_t<size_t> equals_pos = variable_assignment_equals_pos(assignment_src);
|
2022-10-08 16:56:38 +00:00
|
|
|
assert(equals_pos.has_value());
|
2019-11-25 11:47:33 +00:00
|
|
|
wcstring variable = assignment_src.substr(0, *equals_pos);
|
|
|
|
wcstring value = assignment_src.substr(*equals_pos + 1);
|
|
|
|
append_format(result, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable.c_str(),
|
|
|
|
value.c_str());
|
2022-08-12 15:04:30 +00:00
|
|
|
break;
|
2019-11-25 11:47:33 +00:00
|
|
|
}
|
2019-11-25 08:19:53 +00:00
|
|
|
}
|
2022-08-11 17:24:41 +00:00
|
|
|
|
|
|
|
size_t start = source_start;
|
|
|
|
size_t len = source_length;
|
|
|
|
if (start >= src.size()) {
|
|
|
|
// If we are past the source, we clamp it to the end.
|
|
|
|
start = src.size() - 1;
|
|
|
|
len = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start + len > src.size()) {
|
|
|
|
len = src.size() - source_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skip_caret) {
|
2017-06-18 05:36:56 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-10-30 03:51:03 +00:00
|
|
|
// Locate the beginning of this line of source.
|
|
|
|
size_t line_start = 0;
|
|
|
|
|
|
|
|
// Look for a newline prior to source_start. If we don't find one, start at the beginning of
|
|
|
|
// the string; otherwise start one past the newline. Note that source_start may itself point
|
|
|
|
// at a newline; we want to find the newline before it.
|
2022-08-11 17:24:41 +00:00
|
|
|
if (start > 0) {
|
|
|
|
size_t newline = src.find_last_of(L'\n', start - 1);
|
2016-10-30 03:51:03 +00:00
|
|
|
if (newline != wcstring::npos) {
|
|
|
|
line_start = newline + 1;
|
2013-06-15 22:21:35 +00:00
|
|
|
}
|
2016-10-30 03:51:03 +00:00
|
|
|
}
|
|
|
|
// Look for the newline after the source range. If the source range itself includes a
|
|
|
|
// newline, that's the one we want, so start just before the end of the range.
|
2022-08-21 21:51:33 +00:00
|
|
|
size_t last_char_in_range = (len == 0 ? start : start + len - 1);
|
2016-10-30 03:51:03 +00:00
|
|
|
size_t line_end = src.find(L'\n', last_char_in_range);
|
|
|
|
if (line_end == wcstring::npos) {
|
|
|
|
line_end = src.size();
|
|
|
|
}
|
2013-07-23 01:26:15 +00:00
|
|
|
|
2016-10-30 03:51:03 +00:00
|
|
|
assert(line_end >= line_start);
|
2022-08-11 17:24:41 +00:00
|
|
|
assert(start >= line_start);
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2019-10-27 22:44:08 +00:00
|
|
|
// Don't include the caret and line if we're interactive and this is the first line, because
|
2016-10-30 03:51:03 +00:00
|
|
|
// then it's obvious.
|
2022-08-11 17:24:41 +00:00
|
|
|
bool interactive_skip_caret = is_interactive && start == 0;
|
2016-10-30 03:51:03 +00:00
|
|
|
if (interactive_skip_caret) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append the line of text.
|
2017-06-18 05:36:56 +00:00
|
|
|
if (!result.empty()) result.push_back(L'\n');
|
2016-10-30 03:51:03 +00:00
|
|
|
result.append(src, line_start, line_end - line_start);
|
|
|
|
|
|
|
|
// Append the caret line. The input source may include tabs; for that reason we
|
|
|
|
// construct a "caret line" that has tabs in corresponding positions.
|
|
|
|
wcstring caret_space_line;
|
2022-08-11 17:24:41 +00:00
|
|
|
caret_space_line.reserve(start - line_start);
|
|
|
|
for (size_t i = line_start; i < start; i++) {
|
2017-03-26 12:38:59 +00:00
|
|
|
wchar_t wc = src.at(i);
|
2016-10-30 03:51:03 +00:00
|
|
|
if (wc == L'\t') {
|
|
|
|
caret_space_line.push_back(L'\t');
|
|
|
|
} else if (wc == L'\n') {
|
2022-08-11 17:24:41 +00:00
|
|
|
// It's possible that the start points at a newline itself. In that case,
|
2016-10-30 03:51:03 +00:00
|
|
|
// pretend it's a space. We only expect this to be at the end of the string.
|
|
|
|
caret_space_line.push_back(L' ');
|
|
|
|
} else {
|
|
|
|
int width = fish_wcwidth(wc);
|
|
|
|
if (width > 0) {
|
|
|
|
caret_space_line.append(static_cast<size_t>(width), L' ');
|
2013-12-13 02:18:07 +00:00
|
|
|
}
|
|
|
|
}
|
2013-06-15 22:21:35 +00:00
|
|
|
}
|
2016-10-30 03:51:03 +00:00
|
|
|
result.push_back(L'\n');
|
|
|
|
result.append(caret_space_line);
|
|
|
|
result.push_back(L'^');
|
2022-08-11 17:24:41 +00:00
|
|
|
if (len > 1) {
|
2022-08-09 13:26:48 +00:00
|
|
|
// Add a squiggle under the error location.
|
|
|
|
// We do it like this
|
|
|
|
// ^~~^
|
|
|
|
// With a "^" under the start and end, and squiggles in-between.
|
2022-08-11 17:24:41 +00:00
|
|
|
auto width = fish_wcswidth(src.c_str() + start, len);
|
2022-08-11 16:15:15 +00:00
|
|
|
if (width >= 2) {
|
2022-08-09 13:26:48 +00:00
|
|
|
// Subtract one for each of the carets - this is important in case
|
|
|
|
// the starting char has a width of > 1.
|
|
|
|
result.append(width - 2, L'~');
|
|
|
|
result.push_back(L'^');
|
|
|
|
}
|
|
|
|
}
|
2013-06-15 22:21:35 +00:00
|
|
|
return result;
|
|
|
|
}
|
2013-06-02 05:14:47 +00:00
|
|
|
|
2019-05-27 21:52:48 +00:00
|
|
|
wcstring parse_error_t::describe(const wcstring &src, bool is_interactive) const {
|
|
|
|
return this->describe_with_prefix(src, wcstring(), is_interactive, false);
|
2014-02-17 22:51:51 +00:00
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) {
|
2020-12-22 04:39:41 +00:00
|
|
|
if (amt > 0 && errors != nullptr) {
|
|
|
|
for (parse_error_t &error : *errors) {
|
2016-05-02 23:09:46 +00:00
|
|
|
// Preserve the special meaning of -1 as 'unknown'.
|
2020-12-22 04:39:41 +00:00
|
|
|
if (error.source_start != SOURCE_LOCATION_UNKNOWN) {
|
|
|
|
error.source_start += amt;
|
2014-03-22 00:13:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Returns a string description for the given token type.
|
|
|
|
const wchar_t *token_type_description(parse_token_type_t type) {
|
2016-11-10 05:37:49 +00:00
|
|
|
const wchar_t *description = enum_to_str(type, token_enum_map);
|
|
|
|
if (description) return description;
|
2017-01-27 01:47:24 +00:00
|
|
|
return L"unknown_token_type";
|
2013-06-09 02:20:26 +00:00
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
const wchar_t *keyword_description(parse_keyword_t type) {
|
2016-11-10 05:37:49 +00:00
|
|
|
const wchar_t *keyword = enum_to_str(type, keyword_enum_map);
|
|
|
|
if (keyword) return keyword;
|
2017-01-27 01:47:24 +00:00
|
|
|
return L"unknown_keyword";
|
2013-06-23 09:09:46 +00:00
|
|
|
}
|
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) {
|
2020-06-09 22:13:02 +00:00
|
|
|
if (keyword != parse_keyword_t::none) {
|
2016-04-11 02:08:07 +00:00
|
|
|
return format_string(L"keyword '%ls'", keyword_description(keyword));
|
2014-01-01 08:04:02 +00:00
|
|
|
}
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
switch (type) {
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::string:
|
2014-01-01 08:04:02 +00:00
|
|
|
return L"a string";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::pipe:
|
2014-01-01 08:04:02 +00:00
|
|
|
return L"a pipe";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::redirection:
|
2014-01-01 08:04:02 +00:00
|
|
|
return L"a redirection";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::background:
|
2014-01-01 08:04:02 +00:00
|
|
|
return L"a '&'";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::andand:
|
2018-03-01 21:39:39 +00:00
|
|
|
return L"'&&'";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::oror:
|
2018-03-01 21:39:39 +00:00
|
|
|
return L"'||'";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::end:
|
2014-01-12 23:10:59 +00:00
|
|
|
return L"end of the statement";
|
2020-07-08 18:12:15 +00:00
|
|
|
case parse_token_type_t::terminate:
|
2014-10-14 07:37:01 +00:00
|
|
|
return L"end of the input";
|
2021-02-09 21:27:20 +00:00
|
|
|
case parse_token_type_t::error:
|
|
|
|
return L"a parse error";
|
|
|
|
case parse_token_type_t::tokenizer_error:
|
|
|
|
return L"an incomplete token";
|
|
|
|
case parse_token_type_t::comment:
|
|
|
|
return L"a comment";
|
2019-05-05 10:09:25 +00:00
|
|
|
default: {
|
|
|
|
return format_string(L"a %ls", token_type_description(type));
|
|
|
|
}
|
2014-01-12 23:10:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Returns a string description of the given parse token.
|
|
|
|
wcstring parse_token_t::describe() const {
|
2014-01-12 23:10:59 +00:00
|
|
|
wcstring result = token_type_description(type);
|
2020-06-09 22:13:02 +00:00
|
|
|
if (keyword != parse_keyword_t::none) {
|
2016-04-11 02:08:07 +00:00
|
|
|
append_format(result, L" <%ls>", keyword_description(keyword));
|
2014-01-01 08:04:02 +00:00
|
|
|
}
|
2014-01-12 23:10:59 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// A string description appropriate for presentation to the user.
|
|
|
|
wcstring parse_token_t::user_presentable_description() const {
|
2014-01-12 23:10:59 +00:00
|
|
|
return token_type_user_presentable_description(type, keyword);
|
2014-01-01 08:04:02 +00:00
|
|
|
}
|
2013-06-23 09:09:46 +00:00
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast)
|
2020-07-07 23:16:45 +00:00
|
|
|
: src(std::move(s)), ast(std::move(ast)) {}
|
2020-07-03 18:16:51 +00:00
|
|
|
|
|
|
|
parsed_source_t::~parsed_source_t() = default;
|
|
|
|
|
2020-07-12 20:55:51 +00:00
|
|
|
parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
|
2020-06-28 23:53:58 +00:00
|
|
|
parse_error_list_t *errors) {
|
2020-07-03 18:16:51 +00:00
|
|
|
using namespace ast;
|
|
|
|
ast_t ast = ast_t::parse(src, flags, errors);
|
|
|
|
if (ast.errored() && !(flags & parse_flag_continue_after_error)) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return std::make_shared<parsed_source_t>(std::move(src), std::move(ast));
|
2017-12-22 22:40:15 +00:00
|
|
|
}
|