mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-19 08:24:00 +00:00
3eae0a9b6a
This mostly re-sorts headers that got desorted after the IWYU
application in 14d2a6d8ff
.
1083 lines
40 KiB
C++
1083 lines
40 KiB
C++
// The fish_indent program.
|
|
/*
|
|
Copyright (C) 2014 ridiculous_fish
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License version 2 as
|
|
published by the Free Software Foundation.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
#include "config.h" // IWYU pragma: keep
|
|
|
|
#include <errno.h>
|
|
#include <getopt.h>
|
|
#include <locale.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <cwchar>
|
|
#include <cwctype>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <type_traits>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "ast.h"
|
|
#include "common.h"
|
|
#include "env.h"
|
|
#include "expand.h"
|
|
#include "fds.h"
|
|
#include "fish_version.h"
|
|
#include "flog.h"
|
|
#include "future_feature_flags.h"
|
|
#include "global_safety.h"
|
|
#include "highlight.h"
|
|
#include "maybe.h"
|
|
#include "operation_context.h"
|
|
#include "parse_constants.h"
|
|
#include "parse_util.h"
|
|
#include "print_help.h"
|
|
#include "tokenizer.h"
|
|
#include "wcstringutil.h"
|
|
#include "wutil.h" // IWYU pragma: keep
|
|
|
|
// The number of spaces per indent isn't supposed to be configurable.
|
|
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
|
|
#define SPACES_PER_INDENT 4
|
|
|
|
static bool dump_parse_tree = false;
|
|
static int ret = 0;
|
|
|
|
// Read the entire contents of a file into the specified string.
|
|
static wcstring read_file(FILE *f) {
|
|
wcstring result;
|
|
while (true) {
|
|
wint_t c = std::fgetwc(f);
|
|
|
|
if (c == WEOF) {
|
|
if (ferror(f)) {
|
|
if (errno == EILSEQ) {
|
|
// Illegal byte sequence. Try to skip past it.
|
|
clearerr(f);
|
|
int ch = fgetc(f); // for printing the warning, and seeks forward 1 byte.
|
|
FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch);
|
|
ret = 1;
|
|
continue;
|
|
} else {
|
|
wperror(L"fgetwc");
|
|
exit(1);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
result.push_back(static_cast<wchar_t>(c));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
namespace {
|
|
/// From C++14.
|
|
template <bool B, typename T = void>
|
|
using enable_if_t = typename std::enable_if<B, T>::type;
|
|
|
|
/// \return whether a character at a given index is escaped.
|
|
/// A character is escaped if it has an odd number of backslashes.
|
|
bool char_is_escaped(const wcstring &text, size_t idx) {
|
|
return count_preceding_backslashes(text, idx) % 2 == 1;
|
|
}
|
|
|
|
using namespace ast;
|
|
struct pretty_printer_t {
|
|
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
|
|
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
|
|
// than necessary and would probably benefit from a more layered approach where we identify
|
|
// certain runs, weight line breaks, have a cost model, etc.
|
|
pretty_printer_t(const wcstring &src, bool do_indent)
|
|
: source(src),
|
|
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
|
|
ast(ast_t::parse(src, parse_flags())),
|
|
do_indent(do_indent),
|
|
gaps(compute_gaps()),
|
|
preferred_semi_locations(compute_preferred_semi_locations()) {
|
|
assert(indents.size() == source.size() && "indents and source should be same length");
|
|
}
|
|
|
|
// Original source.
|
|
const wcstring &source;
|
|
|
|
// The indents of our string.
|
|
// This has the same length as 'source' and describes the indentation level.
|
|
const std::vector<int> indents;
|
|
|
|
// The parsed ast.
|
|
const ast_t ast;
|
|
|
|
// The prettifier output.
|
|
wcstring output;
|
|
|
|
// The indent of the source range which we are currently emitting.
|
|
int current_indent{0};
|
|
|
|
// Whether to indent, or just insert spaces.
|
|
const bool do_indent;
|
|
|
|
// Whether the next gap text should hide the first newline.
|
|
bool gap_text_mask_newline{false};
|
|
|
|
// The "gaps": a sorted set of ranges between tokens.
|
|
// These contain whitespace, comments, semicolons, and other lexical elements which are not
|
|
// present in the ast.
|
|
const std::vector<source_range_t> gaps;
|
|
|
|
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
|
|
// This is computed ahead of time for convenience.
|
|
const std::vector<uint32_t> preferred_semi_locations;
|
|
|
|
// Flags we support.
|
|
using gap_flags_t = uint32_t;
|
|
enum {
|
|
default_flags = 0,
|
|
|
|
// Whether to allow line splitting via escaped newlines.
|
|
// For example, in argument lists:
|
|
//
|
|
// echo a \
|
|
// b
|
|
//
|
|
// If this is not set, then split-lines will be joined.
|
|
allow_escaped_newlines = 1 << 0,
|
|
|
|
// Whether to require a space before this token.
|
|
// This is used when emitting semis:
|
|
// echo a; echo b;
|
|
// No space required between 'a' and ';', or 'b' and ';'.
|
|
skip_space = 1 << 1,
|
|
};
|
|
|
|
// \return gap text flags for the gap text that comes *before* a given node type.
|
|
static gap_flags_t gap_text_flags_before_node(const node_t &node) {
|
|
gap_flags_t result = default_flags;
|
|
switch (node.type) {
|
|
// Allow escaped newlines before leaf nodes that can be part of a long command.
|
|
case type_t::argument:
|
|
case type_t::redirection:
|
|
case type_t::variable_assignment:
|
|
result |= allow_escaped_newlines;
|
|
break;
|
|
|
|
case type_t::token_base:
|
|
// Allow escaped newlines before && and ||, and also pipes.
|
|
switch (node.as<token_base_t>()->type) {
|
|
case parse_token_type_t::andand:
|
|
case parse_token_type_t::oror:
|
|
case parse_token_type_t::pipe:
|
|
result |= allow_escaped_newlines;
|
|
break;
|
|
case parse_token_type_t::string: {
|
|
// Allow escaped newlines before commands that follow a variable assignment
|
|
// since both can be long (#7955).
|
|
const node_t *p = node.parent;
|
|
if (p->type != type_t::decorated_statement) break;
|
|
p = p->parent;
|
|
assert(p->type == type_t::statement);
|
|
p = p->parent;
|
|
if (auto job = p->try_as<job_t>()) {
|
|
if (!job->variables.empty()) result |= allow_escaped_newlines;
|
|
} else if (auto job_cnt = p->try_as<job_continuation_t>()) {
|
|
if (!job_cnt->variables.empty()) result |= allow_escaped_newlines;
|
|
} else if (auto not_stmt = p->try_as<not_statement_t>()) {
|
|
if (!not_stmt->variables.empty()) result |= allow_escaped_newlines;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// \return whether we are at the start of a new line.
|
|
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
|
|
|
|
// \return whether we have a space before the output.
|
|
// This ignores escaped spaces and escaped newlines.
|
|
bool has_preceding_space() const {
|
|
long idx = static_cast<long>(output.size()) - 1;
|
|
// Skip escaped newlines.
|
|
// This is historical. Example:
|
|
//
|
|
// cmd1 \
|
|
// | cmd2
|
|
//
|
|
// we want the pipe to "see" the space after cmd1.
|
|
// TODO: this is too tricky, we should factor this better.
|
|
while (idx >= 0 && output.at(idx) == L'\n') {
|
|
size_t backslashes = count_preceding_backslashes(source, idx);
|
|
if (backslashes % 2 == 0) {
|
|
// Not escaped.
|
|
return false;
|
|
}
|
|
idx -= (1 + backslashes);
|
|
}
|
|
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
|
|
}
|
|
|
|
// Entry point. Prettify our source code and return it.
|
|
wcstring prettify() {
|
|
output = wcstring{};
|
|
node_visitor(*this).accept(ast.top());
|
|
|
|
// Trailing gap text.
|
|
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
|
|
|
|
// Replace all trailing newlines with just a single one.
|
|
while (!output.empty() && at_line_start()) {
|
|
output.pop_back();
|
|
}
|
|
emit_newline();
|
|
|
|
wcstring result = std::move(output);
|
|
return result;
|
|
}
|
|
|
|
// \return a substring of source.
|
|
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
|
|
|
|
// Return the gap ranges from our ast.
|
|
std::vector<source_range_t> compute_gaps() const {
|
|
auto range_compare = [](source_range_t r1, source_range_t r2) {
|
|
if (r1.start != r2.start) return r1.start < r2.start;
|
|
return r1.length < r2.length;
|
|
};
|
|
// Collect the token ranges into a list.
|
|
std::vector<source_range_t> tok_ranges;
|
|
for (const node_t &node : ast) {
|
|
if (node.category == category_t::leaf) {
|
|
auto r = node.source_range();
|
|
if (r.length > 0) tok_ranges.push_back(r);
|
|
}
|
|
}
|
|
// Place a zero length range at end to aid in our inverting.
|
|
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
|
|
|
|
// Our tokens should be sorted.
|
|
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
|
|
|
|
// For each range, add a gap range between the previous range and this range.
|
|
std::vector<source_range_t> gaps;
|
|
uint32_t prev_end = 0;
|
|
for (source_range_t tok_range : tok_ranges) {
|
|
assert(tok_range.start >= prev_end &&
|
|
"Token range should not overlap or be out of order");
|
|
if (tok_range.start >= prev_end) {
|
|
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
|
|
}
|
|
prev_end = tok_range.start + tok_range.length;
|
|
}
|
|
return gaps;
|
|
}
|
|
|
|
// Return sorted list of semi-preferring semi_nl nodes.
|
|
std::vector<uint32_t> compute_preferred_semi_locations() const {
|
|
std::vector<uint32_t> result;
|
|
auto mark_semi_from_input = [&](const optional_t<semi_nl_t> &n) {
|
|
if (n && n->has_source() && substr(n->range) == L";") {
|
|
result.push_back(n->range.start);
|
|
}
|
|
};
|
|
|
|
// andor_job_lists get semis if the input uses semis.
|
|
for (const auto &node : ast) {
|
|
// See if we have a condition and an andor_job_list.
|
|
const optional_t<semi_nl_t> *condition = nullptr;
|
|
const andor_job_list_t *andors = nullptr;
|
|
if (const auto *ifc = node.try_as<if_clause_t>()) {
|
|
condition = &ifc->condition.semi_nl;
|
|
andors = &ifc->andor_tail;
|
|
} else if (const auto *wc = node.try_as<while_header_t>()) {
|
|
condition = &wc->condition.semi_nl;
|
|
andors = &wc->andor_tail;
|
|
}
|
|
|
|
// If there is no and-or tail then we always use a newline.
|
|
if (andors && andors->count() > 0) {
|
|
if (condition) mark_semi_from_input(*condition);
|
|
// Mark all but last of the andor list.
|
|
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
|
|
mark_semi_from_input(andors->at(i)->job.semi_nl);
|
|
}
|
|
}
|
|
}
|
|
|
|
// `x ; and y` gets semis if it has them already, and they are on the same line.
|
|
for (const auto &node : ast) {
|
|
if (const auto *job_list = node.try_as<job_list_t>()) {
|
|
const semi_nl_t *prev_job_semi_nl = nullptr;
|
|
for (const job_conjunction_t &job : *job_list) {
|
|
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
|
|
const semi_nl_t *prev = prev_job_semi_nl;
|
|
prev_job_semi_nl = job.semi_nl.contents.get();
|
|
|
|
// Is this an 'and' or 'or' job?
|
|
if (!job.decorator) continue;
|
|
|
|
// Now see if we want to mark 'prev' as allowing a semi.
|
|
// Did we have a previous semi_nl which was a newline?
|
|
if (!prev || substr(prev->range) != L";") continue;
|
|
|
|
// Is there a newline between them?
|
|
assert(prev->range.start <= job.decorator->range.start &&
|
|
"Ranges out of order");
|
|
auto start = source.begin() + prev->range.start;
|
|
auto end = source.begin() + job.decorator->range.end();
|
|
if (std::find(start, end, L'\n') == end) {
|
|
// We're going to allow the previous semi_nl to be a semi.
|
|
result.push_back(prev->range.start);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
std::sort(result.begin(), result.end());
|
|
return result;
|
|
}
|
|
|
|
// Emit a space or indent as necessary, depending on the previous output.
|
|
void emit_space_or_indent(gap_flags_t flags = default_flags) {
|
|
if (at_line_start()) {
|
|
output.append(SPACES_PER_INDENT * current_indent, L' ');
|
|
} else if (!(flags & skip_space) && !has_preceding_space()) {
|
|
output.append(1, L' ');
|
|
}
|
|
}
|
|
|
|
// Emit "gap text:" newlines and comments from the original source.
|
|
// Gap text may be a few things:
|
|
//
|
|
// 1. Just a space is common. We will trim the spaces to be empty.
|
|
//
|
|
// Here the gap text is the comment, followed by the newline:
|
|
//
|
|
// echo abc # arg
|
|
// echo def
|
|
//
|
|
// 2. It may also be an escaped newline:
|
|
// Here the gap text is a space, backslash, newline, space.
|
|
//
|
|
// echo \
|
|
// hi
|
|
//
|
|
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
|
|
//
|
|
// begin | stuff
|
|
//
|
|
// We do not handle errors here - instead our caller does.
|
|
bool emit_gap_text(source_range_t range, gap_flags_t flags) {
|
|
wcstring gap_text = substr(range);
|
|
// Common case: if we are only spaces, do nothing.
|
|
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;
|
|
|
|
// Look to see if there is an escaped newline.
|
|
// Emit it if either we allow it, or it comes before the first comment.
|
|
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
|
|
// text - we already know it has no semantic significance.
|
|
size_t escaped_nl = gap_text.find(L"\\\n");
|
|
if (escaped_nl != wcstring::npos) {
|
|
size_t comment_idx = gap_text.find(L'#');
|
|
if ((flags & allow_escaped_newlines) ||
|
|
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
|
|
// Emit a space before the escaped newline.
|
|
if (!at_line_start() && !has_preceding_space()) {
|
|
output.append(L" ");
|
|
}
|
|
output.append(L"\\\n");
|
|
// Indent the continuation line and any leading comments (#7252).
|
|
// Use the indentation level of the next newline.
|
|
current_indent = indents.at(range.start + escaped_nl + 1);
|
|
emit_space_or_indent();
|
|
}
|
|
}
|
|
|
|
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
|
|
// always emit one.
|
|
bool needs_nl = false;
|
|
|
|
tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
|
|
while (maybe_t<tok_t> tok = tokenizer.next()) {
|
|
wcstring tok_text = tokenizer.text_of(*tok);
|
|
|
|
if (needs_nl) {
|
|
emit_newline();
|
|
needs_nl = false;
|
|
if (tok_text == L"\n") continue;
|
|
} else if (gap_text_mask_newline) {
|
|
// We only respect mask_newline the first time through the loop.
|
|
gap_text_mask_newline = false;
|
|
if (tok_text == L"\n") continue;
|
|
}
|
|
|
|
if (tok->type == token_type_t::comment) {
|
|
emit_space_or_indent();
|
|
output.append(tok_text);
|
|
needs_nl = true;
|
|
} else if (tok->type == token_type_t::end) {
|
|
// This may be either a newline or semicolon.
|
|
// Semicolons found here are not part of the ast and can simply be removed.
|
|
// Newlines are preserved unless mask_newline is set.
|
|
if (tok_text == L"\n") {
|
|
emit_newline();
|
|
}
|
|
} else {
|
|
fprintf(stderr,
|
|
"Gap text should only have comments and newlines - instead found token "
|
|
"type %d with text: %ls\n",
|
|
(int)tok->type, tok_text.c_str());
|
|
DIE("Gap text should only have comments and newlines");
|
|
}
|
|
}
|
|
if (needs_nl) emit_newline();
|
|
return needs_nl;
|
|
}
|
|
|
|
/// \return the gap text ending at a given index into the string, or empty if none.
|
|
source_range_t gap_text_to(uint32_t end) const {
|
|
auto where = std::lower_bound(
|
|
gaps.begin(), gaps.end(), end,
|
|
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
|
|
if (where == gaps.end() || where->start + where->length != end) {
|
|
// Not found.
|
|
return source_range_t{0, 0};
|
|
} else {
|
|
return *where;
|
|
}
|
|
}
|
|
|
|
/// \return whether a range \p r overlaps an error range from our ast.
|
|
bool range_contained_error(source_range_t r) const {
|
|
const auto &errs = ast.extras().errors;
|
|
auto range_is_before = [](source_range_t x, source_range_t y) {
|
|
return x.start + x.length <= y.start;
|
|
};
|
|
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
|
|
"Error ranges should be sorted");
|
|
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
|
|
}
|
|
|
|
// Emit the gap text before a source range.
|
|
bool emit_gap_text_before(source_range_t r, gap_flags_t flags) {
|
|
assert(r.start <= source.size() && "source out of bounds");
|
|
bool added_newline = false;
|
|
|
|
// Find the gap text which ends at start.
|
|
source_range_t range = gap_text_to(r.start);
|
|
if (range.length > 0) {
|
|
// Set the indent from the beginning of this gap text.
|
|
// For example:
|
|
// begin
|
|
// cmd
|
|
// # comment
|
|
// end
|
|
// Here the comment is the gap text before the end, but we want the indent from the
|
|
// command.
|
|
if (range.start < indents.size()) current_indent = indents.at(range.start);
|
|
|
|
// If this range contained an error, append the gap text without modification.
|
|
// For example in: echo foo "
|
|
// We don't want to mess with the quote.
|
|
if (range_contained_error(range)) {
|
|
output.append(substr(range));
|
|
} else {
|
|
added_newline = emit_gap_text(range, flags);
|
|
}
|
|
}
|
|
// Always clear gap_text_mask_newline after emitting even empty gap text.
|
|
gap_text_mask_newline = false;
|
|
return added_newline;
|
|
}
|
|
|
|
/// Given a string \p input, remove unnecessary quotes, etc.
|
|
wcstring clean_text(const wcstring &input) {
|
|
// Unescape the string - this leaves special markers around if there are any
|
|
// expansions or anything. We specifically tell it to not compute backslash-escapes
|
|
// like \U or \x, because we want to leave them intact.
|
|
wcstring unescaped = input;
|
|
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
|
|
|
|
// Remove INTERNAL_SEPARATOR because that's a quote.
|
|
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
|
|
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
|
|
|
|
// If no non-"good" char is left, use the unescaped version.
|
|
// This can be extended to other characters, but giving the precise list is tough,
|
|
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
|
|
// people feel more at ease.
|
|
auto goodchars = [](wchar_t ch) {
|
|
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
|
|
};
|
|
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
|
|
!unescaped.empty()) {
|
|
return unescaped;
|
|
} else {
|
|
return input;
|
|
}
|
|
}
|
|
|
|
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
|
|
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
|
|
// lines.
|
|
void emit_text(source_range_t r, gap_flags_t flags) {
|
|
emit_gap_text_before(r, flags);
|
|
current_indent = indents.at(r.start);
|
|
if (r.length > 0) {
|
|
emit_space_or_indent(flags);
|
|
output.append(clean_text(substr(r)));
|
|
}
|
|
}
|
|
|
|
template <type_t Type>
|
|
void emit_node_text(const leaf_t<Type> &node) {
|
|
source_range_t range = node.range;
|
|
|
|
// Weird special-case: a token may end in an escaped newline. Notably, the newline is
|
|
// not part of the following gap text, handle indentation here (#8197).
|
|
bool ends_with_escaped_nl = node.range.length >= 2 &&
|
|
source.at(node.range.end() - 2) == L'\\' &&
|
|
source.at(node.range.end() - 1) == L'\n';
|
|
if (ends_with_escaped_nl) {
|
|
range = {range.start, range.length - 2};
|
|
}
|
|
|
|
emit_text(range, gap_text_flags_before_node(node));
|
|
|
|
if (ends_with_escaped_nl) {
|
|
// By convention, escaped newlines are preceded with a space.
|
|
output.append(L" \\\n");
|
|
// TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
|
|
// The cases where this matters are probably very rare.
|
|
current_indent++;
|
|
emit_space_or_indent();
|
|
current_indent--;
|
|
}
|
|
}
|
|
|
|
// Emit one newline.
|
|
void emit_newline() { output.push_back(L'\n'); }
|
|
|
|
// Emit a semicolon.
|
|
void emit_semi() { output.push_back(L';'); }
|
|
|
|
// For branch and list nodes, default is to visit their children.
|
|
template <typename Node>
|
|
enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
|
|
node_visitor(*this).accept_children_of(node);
|
|
}
|
|
|
|
template <typename Node>
|
|
enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
|
|
node_visitor(*this).accept_children_of(node);
|
|
}
|
|
|
|
// Leaf nodes we just visit their text.
|
|
void visit(const keyword_base_t &node) { emit_node_text(node); }
|
|
void visit(const token_base_t &node) { emit_node_text(node); }
|
|
void visit(const argument_t &node) { emit_node_text(node); }
|
|
void visit(const variable_assignment_t &node) { emit_node_text(node); }
|
|
|
|
void visit(const semi_nl_t &node) {
|
|
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
|
|
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
|
|
// preferred_semi_locations to decide whether or not these should stay as newlines or
|
|
// become semicolons.
|
|
|
|
// Check if we should prefer a semicolon.
|
|
bool prefer_semi = node.range.length > 0 &&
|
|
std::binary_search(preferred_semi_locations.begin(),
|
|
preferred_semi_locations.end(), node.range.start);
|
|
emit_gap_text_before(node.range, gap_text_flags_before_node(node));
|
|
|
|
// Don't emit anything if the gap text put us on a newline (because it had a comment).
|
|
if (!at_line_start()) {
|
|
prefer_semi ? emit_semi() : emit_newline();
|
|
|
|
// If it was a semi but we emitted a newline, swallow a subsequent newline.
|
|
if (!prefer_semi && substr(node.range) == L";") {
|
|
gap_text_mask_newline = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
void visit(const redirection_t &node) {
|
|
// No space between a redirection operator and its target (#2899).
|
|
emit_text(node.oper.range, default_flags);
|
|
emit_text(node.target.range, skip_space);
|
|
}
|
|
|
|
void visit(const maybe_newlines_t &node) {
|
|
// Our newlines may have comments embedded in them, example:
|
|
// cmd |
|
|
// # something
|
|
// cmd2
|
|
// Treat it as gap text.
|
|
if (node.range.length > 0) {
|
|
auto flags = gap_text_flags_before_node(node);
|
|
current_indent = indents.at(node.range.start);
|
|
bool added_newline = emit_gap_text_before(node.range, flags);
|
|
source_range_t gap_range = node.range;
|
|
if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
|
|
gap_range.start++;
|
|
}
|
|
emit_gap_text(gap_range, flags);
|
|
}
|
|
}
|
|
|
|
void visit(const begin_header_t &node) {
|
|
// 'begin' does not require a newline after it, but we insert one.
|
|
node_visitor(*this).accept_children_of(node);
|
|
if (!at_line_start()) {
|
|
emit_newline();
|
|
}
|
|
}
|
|
|
|
// The flags we use to parse.
|
|
static parse_tree_flags_t parse_flags() {
|
|
return parse_flag_continue_after_error | parse_flag_include_comments |
|
|
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
static const char *highlight_role_to_string(highlight_role_t role) {
|
|
#define TEST_ROLE(x) \
|
|
case highlight_role_t::x: \
|
|
return #x;
|
|
switch (role) {
|
|
TEST_ROLE(normal)
|
|
TEST_ROLE(error)
|
|
TEST_ROLE(command)
|
|
TEST_ROLE(keyword)
|
|
TEST_ROLE(statement_terminator)
|
|
TEST_ROLE(param)
|
|
TEST_ROLE(option)
|
|
TEST_ROLE(comment)
|
|
TEST_ROLE(search_match)
|
|
TEST_ROLE(operat)
|
|
TEST_ROLE(escape)
|
|
TEST_ROLE(quote)
|
|
TEST_ROLE(redirection)
|
|
TEST_ROLE(autosuggestion)
|
|
TEST_ROLE(selection)
|
|
TEST_ROLE(pager_progress)
|
|
TEST_ROLE(pager_background)
|
|
TEST_ROLE(pager_prefix)
|
|
TEST_ROLE(pager_completion)
|
|
TEST_ROLE(pager_description)
|
|
TEST_ROLE(pager_secondary_background)
|
|
TEST_ROLE(pager_secondary_prefix)
|
|
TEST_ROLE(pager_secondary_completion)
|
|
TEST_ROLE(pager_secondary_description)
|
|
TEST_ROLE(pager_selected_background)
|
|
TEST_ROLE(pager_selected_prefix)
|
|
TEST_ROLE(pager_selected_completion)
|
|
TEST_ROLE(pager_selected_description)
|
|
default:
|
|
DIE("UNKNOWN ROLE");
|
|
}
|
|
#undef TEST_ROLE
|
|
}
|
|
|
|
// Entry point for Pygments CSV output.
|
|
// Our output is a newline-separated string.
|
|
// Each line is of the form `start,end,role`
|
|
// start and end is the half-open token range, value is a string from highlight_role_t.
|
|
// Example:
|
|
// 3,7,command
|
|
static std::string make_pygments_csv(const wcstring &src) {
|
|
const size_t len = src.size();
|
|
std::vector<highlight_spec_t> colors;
|
|
highlight_shell(src, colors, operation_context_t::globals());
|
|
assert(colors.size() == len && "Colors and src should have same size");
|
|
|
|
struct token_range_t {
|
|
unsigned long start;
|
|
unsigned long end;
|
|
highlight_role_t role;
|
|
};
|
|
|
|
std::vector<token_range_t> token_ranges;
|
|
for (size_t i = 0; i < len; i++) {
|
|
highlight_role_t role = colors.at(i).foreground;
|
|
// See if we can extend the last range.
|
|
if (!token_ranges.empty()) {
|
|
auto &last = token_ranges.back();
|
|
if (last.role == role && last.end == i) {
|
|
last.end = i + 1;
|
|
continue;
|
|
}
|
|
}
|
|
// We need a new range.
|
|
token_ranges.push_back(token_range_t{i, i + 1, role});
|
|
}
|
|
|
|
// Now render these to a string.
|
|
std::string result;
|
|
for (const auto &range : token_ranges) {
|
|
char buff[128];
|
|
snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
|
|
highlight_role_to_string(range.role));
|
|
result.append(buff);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Entry point for prettification.
|
|
static wcstring prettify(const wcstring &src, bool do_indent) {
|
|
if (dump_parse_tree) {
|
|
auto ast =
|
|
ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
|
|
parse_flag_show_extra_semis);
|
|
wcstring ast_dump = ast.dump(src);
|
|
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
|
|
}
|
|
|
|
pretty_printer_t printer{src, do_indent};
|
|
wcstring output = printer.prettify();
|
|
return output;
|
|
}
|
|
|
|
/// Given a string and list of colors of the same size, return the string with HTML span elements
|
|
/// for the various colors.
|
|
static const wchar_t *html_class_name_for_color(highlight_spec_t spec) {
|
|
#define P(x) L"fish_color_" #x
|
|
switch (spec.foreground) {
|
|
case highlight_role_t::normal: {
|
|
return P(normal);
|
|
}
|
|
case highlight_role_t::error: {
|
|
return P(error);
|
|
}
|
|
case highlight_role_t::command: {
|
|
return P(command);
|
|
}
|
|
case highlight_role_t::statement_terminator: {
|
|
return P(statement_terminator);
|
|
}
|
|
case highlight_role_t::param: {
|
|
return P(param);
|
|
}
|
|
case highlight_role_t::option: {
|
|
return P(option);
|
|
}
|
|
case highlight_role_t::comment: {
|
|
return P(comment);
|
|
}
|
|
case highlight_role_t::search_match: {
|
|
return P(search_match);
|
|
}
|
|
case highlight_role_t::operat: {
|
|
return P(operator);
|
|
}
|
|
case highlight_role_t::escape: {
|
|
return P(escape);
|
|
}
|
|
case highlight_role_t::quote: {
|
|
return P(quote);
|
|
}
|
|
case highlight_role_t::redirection: {
|
|
return P(redirection);
|
|
}
|
|
case highlight_role_t::autosuggestion: {
|
|
return P(autosuggestion);
|
|
}
|
|
case highlight_role_t::selection: {
|
|
return P(selection);
|
|
}
|
|
default: {
|
|
return P(other);
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::string html_colorize(const wcstring &text,
|
|
const std::vector<highlight_spec_t> &colors) {
|
|
if (text.empty()) {
|
|
return "";
|
|
}
|
|
|
|
assert(colors.size() == text.size());
|
|
wcstring html = L"<pre><code>";
|
|
highlight_spec_t last_color = highlight_role_t::normal;
|
|
for (size_t i = 0; i < text.size(); i++) {
|
|
// Handle colors.
|
|
highlight_spec_t color = colors.at(i);
|
|
if (i > 0 && color != last_color) {
|
|
html.append(L"</span>");
|
|
}
|
|
if (i == 0 || color != last_color) {
|
|
append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
|
|
}
|
|
last_color = color;
|
|
|
|
// Handle text.
|
|
wchar_t wc = text.at(i);
|
|
switch (wc) {
|
|
case L'&': {
|
|
html.append(L"&");
|
|
break;
|
|
}
|
|
case L'\'': {
|
|
html.append(L"'");
|
|
break;
|
|
}
|
|
case L'"': {
|
|
html.append(L""");
|
|
break;
|
|
}
|
|
case L'<': {
|
|
html.append(L"<");
|
|
break;
|
|
}
|
|
case L'>': {
|
|
html.append(L">");
|
|
break;
|
|
}
|
|
default: {
|
|
html.push_back(wc);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
html.append(L"</span></code></pre>");
|
|
return wcs2string(html);
|
|
}
|
|
|
|
static std::string no_colorize(const wcstring &text) { return wcs2string(text); }
|
|
|
|
int main(int argc, char *argv[]) {
|
|
program_name = L"fish_indent";
|
|
set_main_thread();
|
|
setup_fork_guards();
|
|
// Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
|
|
// because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
|
|
//
|
|
// TODO: Auto-detect the encoding of the script. We should look for a vim style comment
|
|
// (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
|
|
// (e.g., "# -*- coding: <encoding-name> -*-").
|
|
setlocale(LC_ALL, "");
|
|
env_init();
|
|
|
|
if (auto features_var = env_stack_t::globals().get(L"fish_features")) {
|
|
for (const wcstring &s : features_var->as_list()) {
|
|
mutable_fish_features().set_from_string(s);
|
|
}
|
|
}
|
|
|
|
// Types of output we support.
|
|
enum {
|
|
output_type_plain_text,
|
|
output_type_file,
|
|
output_type_ansi,
|
|
output_type_pygments_csv,
|
|
output_type_check,
|
|
output_type_html
|
|
} output_type = output_type_plain_text;
|
|
const char *output_location = "";
|
|
bool do_indent = true;
|
|
// File path for debug output.
|
|
std::string debug_output;
|
|
|
|
const char *short_opts = "+d:hvwicD:";
|
|
const struct option long_opts[] = {{"debug", required_argument, nullptr, 'd'},
|
|
{"debug-output", required_argument, nullptr, 'o'},
|
|
{"debug-stack-frames", required_argument, nullptr, 'D'},
|
|
{"dump-parse-tree", no_argument, nullptr, 'P'},
|
|
{"no-indent", no_argument, nullptr, 'i'},
|
|
{"help", no_argument, nullptr, 'h'},
|
|
{"version", no_argument, nullptr, 'v'},
|
|
{"write", no_argument, nullptr, 'w'},
|
|
{"html", no_argument, nullptr, 1},
|
|
{"ansi", no_argument, nullptr, 2},
|
|
{"pygments", no_argument, nullptr, 3},
|
|
{"check", no_argument, nullptr, 'c'},
|
|
{}};
|
|
|
|
int opt;
|
|
while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) {
|
|
switch (opt) {
|
|
case 'P': {
|
|
dump_parse_tree = true;
|
|
break;
|
|
}
|
|
case 'h': {
|
|
print_help("fish_indent", 1);
|
|
exit(0);
|
|
}
|
|
case 'v': {
|
|
std::fwprintf(stdout, _(L"%ls, version %s\n"), program_name, get_fish_version());
|
|
exit(0);
|
|
}
|
|
case 'w': {
|
|
output_type = output_type_file;
|
|
break;
|
|
}
|
|
case 'i': {
|
|
do_indent = false;
|
|
break;
|
|
}
|
|
case 1: {
|
|
output_type = output_type_html;
|
|
break;
|
|
}
|
|
case 2: {
|
|
output_type = output_type_ansi;
|
|
break;
|
|
}
|
|
case 3: {
|
|
output_type = output_type_pygments_csv;
|
|
break;
|
|
}
|
|
case 'c': {
|
|
output_type = output_type_check;
|
|
break;
|
|
}
|
|
case 'd': {
|
|
activate_flog_categories_by_pattern(str2wcstring(optarg));
|
|
for (auto cat : get_flog_categories()) {
|
|
if (cat->enabled) {
|
|
std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 'D': {
|
|
// TODO: Option is currently useless.
|
|
// Either remove it or make it work with FLOG.
|
|
break;
|
|
}
|
|
case 'o': {
|
|
debug_output = optarg;
|
|
break;
|
|
}
|
|
default: {
|
|
// We assume getopt_long() has already emitted a diagnostic msg.
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
argc -= optind;
|
|
argv += optind;
|
|
|
|
// Direct any debug output right away.
|
|
FILE *debug_output_file = nullptr;
|
|
if (!debug_output.empty()) {
|
|
debug_output_file = fopen(debug_output.c_str(), "w");
|
|
if (!debug_output_file) {
|
|
fprintf(stderr, "Could not open file %s\n", debug_output.c_str());
|
|
perror("fopen");
|
|
exit(-1);
|
|
}
|
|
set_cloexec(fileno(debug_output_file));
|
|
setlinebuf(debug_output_file);
|
|
set_flog_output_file(debug_output_file);
|
|
}
|
|
|
|
int retval = 0;
|
|
|
|
wcstring src;
|
|
for (int i = 0; i < argc || (argc == 0 && i == 0); i++) {
|
|
if (argc == 0 && i == 0) {
|
|
if (output_type == output_type_file) {
|
|
std::fwprintf(
|
|
stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"),
|
|
program_name);
|
|
exit(1);
|
|
}
|
|
src = read_file(stdin);
|
|
} else {
|
|
FILE *fh = fopen(argv[i], "r");
|
|
if (fh) {
|
|
src = read_file(fh);
|
|
fclose(fh);
|
|
output_location = argv[i];
|
|
} else {
|
|
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), argv[i],
|
|
std::strerror(errno));
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (output_type == output_type_pygments_csv) {
|
|
std::string output = make_pygments_csv(src);
|
|
fputs(output.c_str(), stdout);
|
|
continue;
|
|
}
|
|
|
|
const wcstring output_wtext = prettify(src, do_indent);
|
|
|
|
// Maybe colorize.
|
|
std::vector<highlight_spec_t> colors;
|
|
if (output_type != output_type_plain_text) {
|
|
highlight_shell(output_wtext, colors, operation_context_t::globals());
|
|
}
|
|
|
|
std::string colored_output;
|
|
switch (output_type) {
|
|
case output_type_plain_text: {
|
|
colored_output = no_colorize(output_wtext);
|
|
break;
|
|
}
|
|
case output_type_file: {
|
|
FILE *fh = fopen(output_location, "w");
|
|
if (fh) {
|
|
std::fputws(output_wtext.c_str(), fh);
|
|
fclose(fh);
|
|
} else {
|
|
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location,
|
|
std::strerror(errno));
|
|
exit(1);
|
|
}
|
|
break;
|
|
}
|
|
case output_type_ansi: {
|
|
colored_output = colorize(output_wtext, colors, env_stack_t::globals());
|
|
break;
|
|
}
|
|
case output_type_html: {
|
|
colored_output = html_colorize(output_wtext, colors);
|
|
break;
|
|
}
|
|
case output_type_pygments_csv: {
|
|
DIE("pygments_csv should have been handled above");
|
|
}
|
|
case output_type_check: {
|
|
if (output_wtext != src) {
|
|
if (argc) {
|
|
std::fwprintf(stderr, _(L"%s\n"), argv[i]);
|
|
}
|
|
retval++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::fputws(str2wcstring(colored_output).c_str(), stdout);
|
|
}
|
|
return retval;
|
|
}
|