fish-shell/src/parse_tree.cpp

// Programmatic representation of fish code.
#include "config.h"  // IWYU pragma: keep

#include "parse_tree.h"

#include <stddef.h>

#include <string>
#include <utility>

#include "ast.h"
#include "common.h"
#include "enum_map.h"
#include "fallback.h"
#include "maybe.h"
#include "parse_constants.h"
#include "tokenizer.h"
#include "wutil.h"  // IWYU pragma: keep

parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
    switch (err) {
        case tokenizer_error_t::none:
            return parse_error_none;
        case tokenizer_error_t::unterminated_quote:
            return parse_error_tokenizer_unterminated_quote;
        case tokenizer_error_t::unterminated_subshell:
            return parse_error_tokenizer_unterminated_subshell;
        case tokenizer_error_t::unterminated_slice:
            return parse_error_tokenizer_unterminated_slice;
        case tokenizer_error_t::unterminated_escape:
            return parse_error_tokenizer_unterminated_escape;
        default:
            return parse_error_tokenizer_other;
    }
}

/// Returns a string description of this parse error.
wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix,
                                             bool is_interactive, bool skip_caret) const {
    wcstring result = prefix;
    // Some errors don't have their message passed in, so we construct them here.
    // This affects e.g. `eval "a=(foo)"`
    switch (code) {
        default:
            if (skip_caret && this->text.empty()) return L"";
            result.append(this->text);
            break;
        case parse_error_andor_in_pipeline:
            append_format(result, INVALID_PIPELINE_CMD_ERR_MSG,
                          src.substr(this->source_start, this->source_length).c_str());
            break;
        case parse_error_bare_variable_assignment: {
            wcstring assignment_src = src.substr(this->source_start, this->source_length);
            maybe_t<size_t> equals_pos = variable_assignment_equals_pos(assignment_src);
            assert(equals_pos.has_value());
            wcstring variable = assignment_src.substr(0, *equals_pos);
            wcstring value = assignment_src.substr(*equals_pos + 1);
            append_format(result, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable.c_str(),
                          value.c_str());
            break;
        }
    }

    size_t start = source_start;
    size_t len = source_length;
    if (start >= src.size()) {
        // If we are past the source, we clamp it to the end.
        start = src.size() - 1;
        len = 0;
    }

    if (start + len > src.size()) {
        len = src.size() - source_start;
    }

    if (skip_caret) {
        return result;
    }

    // Locate the beginning of this line of source.
    size_t line_start = 0;

    // Look for a newline prior to source_start. If we don't find one, start at the beginning of
    // the string; otherwise start one past the newline. Note that source_start may itself point
    // at a newline; we want to find the newline before it.
    if (start > 0) {
        size_t newline = src.find_last_of(L'\n', start - 1);
        if (newline != wcstring::npos) {
            line_start = newline + 1;
        }
    }
    // Look for the newline after the source range. If the source range itself includes a
    // newline, that's the one we want, so start just before the end of the range.
    size_t last_char_in_range = (len == 0 ? start : start + len - 1);
    size_t line_end = src.find(L'\n', last_char_in_range);
    if (line_end == wcstring::npos) {
        line_end = src.size();
    }

    assert(line_end >= line_start);
    assert(start >= line_start);

    // Don't include the caret and line if we're interactive and this is the first line, because
    // then it's obvious.
    bool interactive_skip_caret = is_interactive && start == 0;
    if (interactive_skip_caret) {
        return result;
    }

    // Append the line of text.
    if (!result.empty()) result.push_back(L'\n');
    result.append(src, line_start, line_end - line_start);

    // Append the caret line. The input source may include tabs; for that reason we
    // construct a "caret line" that has tabs in corresponding positions.
    wcstring caret_space_line;
    caret_space_line.reserve(start - line_start);
    for (size_t i = line_start; i < start; i++) {
        wchar_t wc = src.at(i);
        if (wc == L'\t') {
            caret_space_line.push_back(L'\t');
        } else if (wc == L'\n') {
            // It's possible that the start points at a newline itself. In that case,
            // pretend it's a space. We only expect this to be at the end of the string.
            caret_space_line.push_back(L' ');
        } else {
            int width = fish_wcwidth(wc);
            if (width > 0) {
                caret_space_line.append(static_cast<size_t>(width), L' ');
            }
        }
    }
    result.push_back(L'\n');
    result.append(caret_space_line);
    result.push_back(L'^');
    if (len > 1) {
        // Add a squiggle under the error location.
        // We do it like this
        //               ^~~^
        // With a "^" under the start and end, and squiggles in-between.
        auto width = fish_wcswidth(src.c_str() + start, len);
        if (width >= 2) {
            // Subtract one for each of the carets - this is important in case
            // the starting char has a width of > 1.
            result.append(width - 2, L'~');
            result.push_back(L'^');
        }
    }
    return result;
}

wcstring parse_error_t::describe(const wcstring &src, bool is_interactive) const {
    return this->describe_with_prefix(src, wcstring(), is_interactive, false);
}

void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) {
    if (amt > 0 && errors != nullptr) {
        for (parse_error_t &error : *errors) {
            // Preserve the special meaning of -1 as 'unknown'.
            if (error.source_start != SOURCE_LOCATION_UNKNOWN) {
                error.source_start += amt;
            }
        }
    }
}

/// Returns a string description for the given token type.
const wchar_t *token_type_description(parse_token_type_t type) {
    const wchar_t *description = enum_to_str(type, token_enum_map);
    if (description) return description;
    return L"unknown_token_type";
}

const wchar_t *keyword_description(parse_keyword_t type) {
    const wchar_t *keyword = enum_to_str(type, keyword_enum_map);
    if (keyword) return keyword;
    return L"unknown_keyword";
}

wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) {
    if (keyword != parse_keyword_t::none) {
        return format_string(L"keyword '%ls'", keyword_description(keyword));
    }

    switch (type) {
        case parse_token_type_t::string:
            return L"a string";
        case parse_token_type_t::pipe:
            return L"a pipe";
        case parse_token_type_t::redirection:
            return L"a redirection";
        case parse_token_type_t::background:
            return L"a '&'";
        case parse_token_type_t::andand:
            return L"'&&'";
        case parse_token_type_t::oror:
            return L"'||'";
        case parse_token_type_t::end:
            return L"end of the statement";
        case parse_token_type_t::terminate:
            return L"end of the input";
        case parse_token_type_t::error:
            return L"a parse error";
        case parse_token_type_t::tokenizer_error:
            return L"an incomplete token";
        case parse_token_type_t::comment:
            return L"a comment";
        default: {
            return format_string(L"a %ls", token_type_description(type));
        }
    }
}

/// Returns a string description of the given parse token.
wcstring parse_token_t::describe() const {
    wcstring result = token_type_description(type);
    if (keyword != parse_keyword_t::none) {
        append_format(result, L" <%ls>", keyword_description(keyword));
    }
    return result;
}

/// A string description appropriate for presentation to the user.
wcstring parse_token_t::user_presentable_description() const {
    return token_type_user_presentable_description(type, keyword);
}

parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast)
    : src(std::move(s)), ast(std::move(ast)) {}

parsed_source_t::~parsed_source_t() = default;

parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
                                 parse_error_list_t *errors) {
    using namespace ast;
    ast_t ast = ast_t::parse(src, flags, errors);
    if (ast.errored() && !(flags & parse_flag_continue_after_error)) {
        return nullptr;
    }
    return std::make_shared<parsed_source_t>(std::move(src), std::move(ast));
}