Port fish_indent

This commit is contained in:
Johannes Altmanninger 2023-12-03 16:54:52 +01:00
parent 3842d03473
commit b28521c3d5
10 changed files with 1120 additions and 1142 deletions

View file

@ -113,7 +113,6 @@ set(FISH_SRCS
src/expand.cpp
src/fallback.cpp
src/fds.cpp
src/fish_indent_common.cpp
src/fish_version.cpp
src/flog.cpp
src/highlight.cpp

View file

@ -50,6 +50,7 @@ extern "C" {
pub fn C_O_EXLOCK() -> c_int;
pub fn stdout_stream() -> *mut libc::FILE;
pub fn UVAR_FILE_SET_MTIME_HACK() -> bool;
pub fn setlinebuf(stream: *mut libc::FILE);
}
macro_rules! CVAR {

View file

@ -164,7 +164,7 @@ pub fn make_autoclose_pipes() -> Option<AutoClosePipes> {
}
/// Sets CLO_EXEC on a given fd according to the value of \p should_set.
pub fn set_cloexec(fd: RawFd, should_set: bool) -> c_int {
pub fn set_cloexec(fd: RawFd, should_set: bool /* = true */) -> c_int {
// Note we don't want to overwrite existing flags like O_NONBLOCK which may be set. So fetch the
// existing flags and modify them.
let flags = unsafe { libc::fcntl(fd, F_GETFD, 0) };

View file

@ -24,7 +24,6 @@ include_cpp! {
#include "exec.h"
#include "fallback.h"
#include "fds.h"
#include "fish_indent_common.h"
#include "flog.h"
#include "function.h"
#include "highlight.h"
@ -83,8 +82,6 @@ include_cpp! {
generate!("wgettext_ptr")
generate!("pretty_printer_t")
generate!("fd_event_signaller_t")
generate!("highlight_role_t")

File diff suppressed because it is too large Load diff

View file

@ -23,3 +23,28 @@ impl<T> IsSomeAnd for Option<T> {
}
}
}
pub trait IsSorted {
type T;
fn is_sorted_by(&self, pred: impl Fn(&Self::T, &Self::T) -> Option<std::cmp::Ordering>)
-> bool;
}
impl<T> IsSorted for &[T] {
type T = T;
fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
self.windows(2)
.all(|w| pred(&w[0], &w[1]).is_none_or(|order| order.is_le()))
}
}
impl<T> IsSorted for Vec<T> {
type T = T;
fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
IsSorted::is_sorted_by(&self.as_slice(), pred)
}
}
impl<T> IsSorted for &Vec<T> {
type T = T;
fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
IsSorted::is_sorted_by(&self.as_slice(), pred)
}
}

View file

@ -3,7 +3,6 @@
#include "builtins/commandline.h"
#include "event.h"
#include "fds.h"
#include "fish_indent_common.h"
#include "highlight.h"
#include "input.h"
#include "parse_util.h"
@ -23,7 +22,6 @@ void mark_as_used(const parser_t& parser, env_stack_t& env_stack) {
highlight_spec_t{};
init_input();
make_pipes_ffi();
pretty_printer_t({}, {});
reader_change_cursor_selection_mode(cursor_selection_mode_t::exclusive);
reader_change_history({});
reader_read_ffi({}, {}, {});

View file

@ -41,7 +41,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "fds.h"
#include "ffi_baggage.h"
#include "ffi_init.rs.h"
#include "fish_indent_common.h"
#include "fish_indent.rs.h"
#include "fish_version.h"
#include "flog.h"
#include "future_feature_flags.h"
@ -52,454 +52,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "wcstringutil.h"
#include "wutil.h" // IWYU pragma: keep
static bool dump_parse_tree = false;
static int ret = 0;
// Read the entire contents of a file into the specified string.
static wcstring read_file(FILE *f) {
wcstring result;
while (true) {
wint_t c = std::fgetwc(f);
if (c == WEOF) {
if (ferror(f)) {
if (errno == EILSEQ) {
// Illegal byte sequence. Try to skip past it.
clearerr(f);
int ch = fgetc(f); // for printing the warning, and seeks forward 1 byte.
FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch);
ret = 1;
continue;
} else {
wperror(L"fgetwc");
exit(1);
}
}
break;
}
result.push_back(static_cast<wchar_t>(c));
}
return result;
}
static const char *highlight_role_to_string(highlight_role_t role) {
#define TEST_ROLE(x) \
case highlight_role_t::x: \
return #x;
switch (role) {
TEST_ROLE(normal)
TEST_ROLE(error)
TEST_ROLE(command)
TEST_ROLE(keyword)
TEST_ROLE(statement_terminator)
TEST_ROLE(param)
TEST_ROLE(option)
TEST_ROLE(comment)
TEST_ROLE(search_match)
TEST_ROLE(operat)
TEST_ROLE(escape)
TEST_ROLE(quote)
TEST_ROLE(redirection)
TEST_ROLE(autosuggestion)
TEST_ROLE(selection)
TEST_ROLE(pager_progress)
TEST_ROLE(pager_background)
TEST_ROLE(pager_prefix)
TEST_ROLE(pager_completion)
TEST_ROLE(pager_description)
TEST_ROLE(pager_secondary_background)
TEST_ROLE(pager_secondary_prefix)
TEST_ROLE(pager_secondary_completion)
TEST_ROLE(pager_secondary_description)
TEST_ROLE(pager_selected_background)
TEST_ROLE(pager_selected_prefix)
TEST_ROLE(pager_selected_completion)
TEST_ROLE(pager_selected_description)
default:
DIE("UNKNOWN ROLE");
}
#undef TEST_ROLE
}
// Entry point for Pygments CSV output.
// Our output is a newline-separated string.
// Each line is of the form `start,end,role`
// start and end is the half-open token range, value is a string from highlight_role_t.
// Example:
// 3,7,command
static std::string make_pygments_csv(const wcstring &src) {
const size_t len = src.size();
auto colors = highlight_shell_ffi(src, *operation_context_globals(), false, {});
assert(colors->size() == len && "Colors and src should have same size");
struct token_range_t {
unsigned long start;
unsigned long end;
highlight_role_t role;
};
std::vector<token_range_t> token_ranges;
for (size_t i = 0; i < len; i++) {
highlight_role_t role = colors->at(i).foreground;
// See if we can extend the last range.
if (!token_ranges.empty()) {
auto &last = token_ranges.back();
if (last.role == role && last.end == i) {
last.end = i + 1;
continue;
}
}
// We need a new range.
token_ranges.push_back(token_range_t{i, i + 1, role});
}
// Now render these to a string.
std::string result;
for (const auto &range : token_ranges) {
char buff[128];
snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
highlight_role_to_string(range.role));
result.append(buff);
}
return result;
}
// Entry point for prettification.
static wcstring prettify(const wcstring &src, bool do_indent) {
if (dump_parse_tree) {
auto ast = ast_parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
parse_flag_show_extra_semis);
wcstring ast_dump = *ast->dump(src);
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
}
pretty_printer_t printer{src, do_indent};
wcstring output = printer.prettify();
return output;
}
/// Given a string and list of colors of the same size, return the string with HTML span elements
/// for the various colors.
static const wchar_t *html_class_name_for_color(highlight_spec_t spec) {
#define P(x) L"fish_color_" #x
switch (spec->foreground) {
case highlight_role_t::normal: {
return P(normal);
}
case highlight_role_t::error: {
return P(error);
}
case highlight_role_t::command: {
return P(command);
}
case highlight_role_t::statement_terminator: {
return P(statement_terminator);
}
case highlight_role_t::param: {
return P(param);
}
case highlight_role_t::option: {
return P(option);
}
case highlight_role_t::comment: {
return P(comment);
}
case highlight_role_t::search_match: {
return P(search_match);
}
case highlight_role_t::operat: {
return P(operator);
}
case highlight_role_t::escape: {
return P(escape);
}
case highlight_role_t::quote: {
return P(quote);
}
case highlight_role_t::redirection: {
return P(redirection);
}
case highlight_role_t::autosuggestion: {
return P(autosuggestion);
}
case highlight_role_t::selection: {
return P(selection);
}
default: {
return P(other);
}
}
}
static std::string html_colorize(const wcstring &text,
const std::vector<highlight_spec_t> &colors) {
if (text.empty()) {
return "";
}
assert(colors.size() == text.size());
wcstring html = L"<pre><code>";
highlight_spec_t last_color = highlight_role_t::normal;
for (size_t i = 0; i < text.size(); i++) {
// Handle colors.
highlight_spec_t color = colors.at(i);
if (i > 0 && color != last_color) {
html.append(L"</span>");
}
if (i == 0 || color != last_color) {
append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
}
last_color = color;
// Handle text.
wchar_t wc = text.at(i);
switch (wc) {
case L'&': {
html.append(L"&amp;");
break;
}
case L'\'': {
html.append(L"&apos;");
break;
}
case L'"': {
html.append(L"&quot;");
break;
}
case L'<': {
html.append(L"&lt;");
break;
}
case L'>': {
html.append(L"&gt;");
break;
}
default: {
html.push_back(wc);
break;
}
}
}
html.append(L"</span></code></pre>");
return wcs2zstring(html);
}
static std::string no_colorize(const wcstring &text) { return wcs2zstring(text); }
int main(int argc, char *argv[]) {
int main() {
program_name = L"fish_indent";
rust_init();
// Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
// because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
//
// TODO: Auto-detect the encoding of the script. We should look for a vim style comment
// (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
// (e.g., "# -*- coding: <encoding-name> -*-").
setlocale(LC_ALL, "");
rust_env_init(true);
if (auto features_var = env_stack_t::globals().get(L"fish_features")) {
for (const wcstring &s : features_var->as_list()) {
feature_set_from_string(s.c_str());
}
}
// Types of output we support.
enum {
output_type_plain_text,
output_type_file,
output_type_ansi,
output_type_pygments_csv,
output_type_check,
output_type_html
} output_type = output_type_plain_text;
const char *output_location = "";
bool do_indent = true;
// File path for debug output.
std::string debug_output;
const char *short_opts = "+d:hvwicD:";
const struct option long_opts[] = {{"debug", required_argument, nullptr, 'd'},
{"debug-output", required_argument, nullptr, 'o'},
{"debug-stack-frames", required_argument, nullptr, 'D'},
{"dump-parse-tree", no_argument, nullptr, 'P'},
{"no-indent", no_argument, nullptr, 'i'},
{"help", no_argument, nullptr, 'h'},
{"version", no_argument, nullptr, 'v'},
{"write", no_argument, nullptr, 'w'},
{"html", no_argument, nullptr, 1},
{"ansi", no_argument, nullptr, 2},
{"pygments", no_argument, nullptr, 3},
{"check", no_argument, nullptr, 'c'},
{}};
int opt;
while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) {
switch (opt) {
case 'P': {
dump_parse_tree = true;
break;
}
case 'h': {
unsafe_print_help("fish_indent");
exit(0);
}
case 'v': {
std::fwprintf(stdout, _(L"%ls, version %s\n"), program_name, get_fish_version());
exit(0);
}
case 'w': {
output_type = output_type_file;
break;
}
case 'i': {
do_indent = false;
break;
}
case 1: {
output_type = output_type_html;
break;
}
case 2: {
output_type = output_type_ansi;
break;
}
case 3: {
output_type = output_type_pygments_csv;
break;
}
case 'c': {
output_type = output_type_check;
break;
}
case 'd': {
activate_flog_categories_by_pattern(str2wcstring(optarg));
for (auto cat : get_flog_categories()) {
if (cat->enabled) {
std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name);
}
}
break;
}
case 'D': {
// TODO: Option is currently useless.
// Either remove it or make it work with FLOG.
break;
}
case 'o': {
debug_output = optarg;
break;
}
default: {
// We assume getopt_long() has already emitted a diagnostic msg.
exit(1);
}
}
}
argc -= optind;
argv += optind;
// Direct any debug output right away.
FILE *debug_output_file = nullptr;
if (!debug_output.empty()) {
debug_output_file = fopen(debug_output.c_str(), "w");
if (!debug_output_file) {
fprintf(stderr, "Could not open file %s\n", debug_output.c_str());
perror("fopen");
exit(-1);
}
set_cloexec(fileno(debug_output_file));
setlinebuf(debug_output_file);
set_flog_output_file(debug_output_file);
}
int retval = 0;
wcstring src;
for (int i = 0; i < argc || (argc == 0 && i == 0); i++) {
if (argc == 0 && i == 0) {
if (output_type == output_type_file) {
std::fwprintf(
stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"),
program_name);
exit(1);
}
src = read_file(stdin);
} else {
FILE *fh = fopen(argv[i], "r");
if (fh) {
src = read_file(fh);
fclose(fh);
output_location = argv[i];
} else {
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), argv[i],
std::strerror(errno));
exit(1);
}
}
if (output_type == output_type_pygments_csv) {
std::string output = make_pygments_csv(src);
fputs(output.c_str(), stdout);
continue;
}
const wcstring output_wtext = prettify(src, do_indent);
// Maybe colorize.
std::vector<highlight_spec_t> colors;
maybe_t<rust::Box<HighlightSpecListFFI>> ffi_colors;
if (output_type != output_type_plain_text) {
highlight_shell(output_wtext, colors, *operation_context_globals());
ffi_colors = highlight_shell_ffi(output_wtext, *operation_context_globals(), false, {});
}
std::string colored_output;
switch (output_type) {
case output_type_plain_text: {
colored_output = no_colorize(output_wtext);
break;
}
case output_type_file: {
FILE *fh = fopen(output_location, "w");
if (fh) {
std::fputws(output_wtext.c_str(), fh);
fclose(fh);
} else {
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location,
std::strerror(errno));
exit(1);
}
break;
}
case output_type_ansi: {
auto ffi_colored =
colorize(output_wtext, **ffi_colors, env_stack_t::globals().get_impl_ffi());
for (uint8_t c : ffi_colored) {
colored_output.push_back(c);
}
break;
}
case output_type_html: {
colored_output = html_colorize(output_wtext, colors);
break;
}
case output_type_pygments_csv: {
DIE("pygments_csv should have been handled above");
}
case output_type_check: {
if (output_wtext != src) {
if (argc) {
std::fwprintf(stderr, _(L"%s\n"), argv[i]);
}
retval++;
}
break;
}
}
std::fputws(str2wcstring(colored_output).c_str(), stdout);
}
return retval;
return fish_indent_main();
}

View file

@ -1,476 +0,0 @@
#include "fish_indent_common.h"
#include "ast.h"
#include "common.h"
#include "env.h"
#include "expand.h"
#include "flog.h"
#include "global_safety.h"
#include "maybe.h"
#include "operation_context.h"
#include "parse_constants.h"
#include "parse_util.h"
#include "tokenizer.h"
#include "wcstringutil.h"
#if INCLUDE_RUST_HEADERS
#include "fish_indent.rs.h"
#endif
using namespace ast;
// The number of spaces per indent isn't supposed to be configurable.
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
#define SPACES_PER_INDENT 4
/// \return whether a character at a given index is escaped.
/// A character is escaped if it has an odd number of backslashes.
static bool char_is_escaped(const wcstring &text, size_t idx) {
return count_preceding_backslashes(text, idx) % 2 == 1;
}
pretty_printer_t::pretty_printer_t(const wcstring &src, bool do_indent)
: source(src),
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
ast(ast_parse(src, parse_flags())),
visitor(new_pretty_printer(*this)),
do_indent(do_indent),
gaps(compute_gaps()),
preferred_semi_locations(compute_preferred_semi_locations()) {
assert(indents.size() == source.size() && "indents and source should be same length");
}
pretty_printer_t::gap_flags_t pretty_printer_t::gap_text_flags_before_node(const node_t &node) {
gap_flags_t result = default_flags;
switch (node.typ()) {
// Allow escaped newlines before leaf nodes that can be part of a long command.
case type_t::argument:
case type_t::redirection:
case type_t::variable_assignment:
result |= allow_escaped_newlines;
break;
case type_t::token_base:
// Allow escaped newlines before && and ||, and also pipes.
switch (node.token_type()) {
case parse_token_type_t::andand:
case parse_token_type_t::oror:
case parse_token_type_t::pipe:
result |= allow_escaped_newlines;
break;
case parse_token_type_t::string: {
// Allow escaped newlines before commands that follow a variable assignment
// since both can be long (#7955).
auto p = node.parent();
if (p->typ() != type_t::decorated_statement) break;
p = p->parent();
assert(p->typ() == type_t::statement);
p = p->parent();
if (auto *job = p->try_as_job_pipeline()) {
if (!job->variables().empty()) result |= allow_escaped_newlines;
} else if (auto *job_cnt = p->try_as_job_continuation()) {
if (!job_cnt->variables().empty()) result |= allow_escaped_newlines;
} else if (auto *not_stmt = p->try_as_not_statement()) {
if (!not_stmt->variables().empty()) result |= allow_escaped_newlines;
}
break;
}
default:
break;
}
break;
default:
break;
}
return result;
}
bool pretty_printer_t::has_preceding_space() const {
long idx = static_cast<long>(output.size()) - 1;
// Skip escaped newlines.
// This is historical. Example:
//
// cmd1 \
// | cmd2
//
// we want the pipe to "see" the space after cmd1.
// TODO: this is too tricky, we should factor this better.
while (idx >= 0 && output.at(idx) == L'\n') {
size_t backslashes = count_preceding_backslashes(source, idx);
if (backslashes % 2 == 0) {
// Not escaped.
return false;
}
idx -= (1 + backslashes);
}
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
}
wcstring pretty_printer_t::prettify() {
output = wcstring{};
visitor->visit(*ast->top());
// Trailing gap text.
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
// Replace all trailing newlines with just a single one.
while (!output.empty() && at_line_start()) {
output.pop_back();
}
emit_newline();
wcstring result = std::move(output);
return result;
}
std::vector<source_range_t> pretty_printer_t::compute_gaps() const {
auto range_compare = [](source_range_t r1, source_range_t r2) {
if (r1.start != r2.start) return r1.start < r2.start;
return r1.length < r2.length;
};
// Collect the token ranges into a list.
std::vector<source_range_t> tok_ranges;
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
if (node->category() == category_t::leaf) {
auto r = node->source_range();
if (r.length > 0) tok_ranges.push_back(r);
}
}
// Place a zero length range at end to aid in our inverting.
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
// Our tokens should be sorted.
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
// For each range, add a gap range between the previous range and this range.
std::vector<source_range_t> gaps;
uint32_t prev_end = 0;
for (source_range_t tok_range : tok_ranges) {
assert(tok_range.start >= prev_end && "Token range should not overlap or be out of order");
if (tok_range.start >= prev_end) {
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
}
prev_end = tok_range.start + tok_range.length;
}
return gaps;
}
void pretty_printer_t::visit_begin_header() {
if (!at_line_start()) {
emit_newline();
}
}
void pretty_printer_t::visit_maybe_newlines(const void *node_) {
const auto &node = *static_cast<const maybe_newlines_t *>(node_);
// Our newlines may have comments embedded in them, example:
// cmd |
// # something
// cmd2
// Treat it as gap text.
if (node.range().length > 0) {
auto flags = gap_text_flags_before_node(*node.ptr());
current_indent = indents.at(node.range().start);
bool added_newline = emit_gap_text_before(node.range(), flags);
source_range_t gap_range = node.range();
if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
gap_range.start++;
}
emit_gap_text(gap_range, flags);
}
}
void pretty_printer_t::visit_redirection(const void *node_) {
const auto &node = *static_cast<const redirection_t *>(node_);
// No space between a redirection operator and its target (#2899).
emit_text(node.oper().range(), default_flags);
emit_text(node.target().range(), skip_space);
}
void pretty_printer_t::visit_semi_nl(const void *node_) {
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
// preferred_semi_locations to decide whether or not these should stay as newlines or
// become semicolons.
const auto &node = *static_cast<const node_t *>(node_);
auto range = node.source_range();
// Check if we should prefer a semicolon.
bool prefer_semi =
range.length > 0 && std::binary_search(preferred_semi_locations.begin(),
preferred_semi_locations.end(), range.start);
emit_gap_text_before(range, gap_text_flags_before_node(*node.ptr()));
// Don't emit anything if the gap text put us on a newline (because it had a comment).
if (!at_line_start()) {
prefer_semi ? emit_semi() : emit_newline();
// If it was a semi but we emitted a newline, swallow a subsequent newline.
if (!prefer_semi && substr(range) == L";") {
gap_text_mask_newline = true;
}
}
}
void pretty_printer_t::emit_node_text(const void *node_) {
const auto &node = *static_cast<const node_t *>(node_);
source_range_t range = node.source_range();
// Weird special-case: a token may end in an escaped newline. Notably, the newline is
// not part of the following gap text, handle indentation here (#8197).
bool ends_with_escaped_nl = range.length >= 2 && source.at(range.end() - 2) == L'\\' &&
source.at(range.end() - 1) == L'\n';
if (ends_with_escaped_nl) {
range = {range.start, range.length - 2};
}
emit_text(range, gap_text_flags_before_node(node));
if (ends_with_escaped_nl) {
// By convention, escaped newlines are preceded with a space.
output.append(L" \\\n");
// TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
// The cases where this matters are probably very rare.
current_indent++;
emit_space_or_indent();
current_indent--;
}
}
void pretty_printer_t::emit_text(source_range_t r, gap_flags_t flags) {
emit_gap_text_before(r, flags);
current_indent = indents.at(r.start);
if (r.length > 0) {
emit_space_or_indent(flags);
output.append(clean_text(substr(r)));
}
}
wcstring pretty_printer_t::clean_text(const wcstring &input) {
// Unescape the string - this leaves special markers around if there are any
// expansions or anything. We specifically tell it to not compute backslash-escapes
// like \U or \x, because we want to leave them intact.
wcstring unescaped =
*unescape_string(input.c_str(), input.size(), UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES,
STRING_STYLE_SCRIPT);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
// If no non-"good" char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
// people feel more at ease.
auto goodchars = [](wchar_t ch) {
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
};
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
!unescaped.empty()) {
return unescaped;
} else {
return input;
}
}
bool pretty_printer_t::emit_gap_text_before(source_range_t r, gap_flags_t flags) {
assert(r.start <= source.size() && "source out of bounds");
bool added_newline = false;
// Find the gap text which ends at start.
source_range_t range = gap_text_to(r.start);
if (range.length > 0) {
// Set the indent from the beginning of this gap text.
// For example:
// begin
// cmd
// # comment
// end
// Here the comment is the gap text before the end, but we want the indent from the
// command.
if (range.start < indents.size()) current_indent = indents.at(range.start);
// If this range contained an error, append the gap text without modification.
// For example in: echo foo "
// We don't want to mess with the quote.
if (range_contained_error(range)) {
output.append(substr(range));
} else {
added_newline = emit_gap_text(range, flags);
}
}
// Always clear gap_text_mask_newline after emitting even empty gap text.
gap_text_mask_newline = false;
return added_newline;
}
bool pretty_printer_t::range_contained_error(source_range_t r) const {
const auto &errs = ast->extras()->errors();
auto range_is_before = [](source_range_t x, source_range_t y) {
return x.start + x.length <= y.start;
};
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
"Error ranges should be sorted");
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
}
source_range_t pretty_printer_t::gap_text_to(uint32_t end) const {
auto where =
std::lower_bound(gaps.begin(), gaps.end(), end,
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
if (where == gaps.end() || where->start + where->length != end) {
// Not found.
return source_range_t{0, 0};
} else {
return *where;
}
}
bool pretty_printer_t::emit_gap_text(source_range_t range, gap_flags_t flags) {
wcstring gap_text = substr(range);
// Common case: if we are only spaces, do nothing.
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;
// Look to see if there is an escaped newline.
// Emit it if either we allow it, or it comes before the first comment.
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
// text - we already know it has no semantic significance.
size_t escaped_nl = gap_text.find(L"\\\n");
if (escaped_nl != wcstring::npos) {
size_t comment_idx = gap_text.find(L'#');
if ((flags & allow_escaped_newlines) ||
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
// Emit a space before the escaped newline.
if (!at_line_start() && !has_preceding_space()) {
output.append(L" ");
}
output.append(L"\\\n");
// Indent the continuation line and any leading comments (#7252).
// Use the indentation level of the next newline.
current_indent = indents.at(range.start + escaped_nl + 1);
emit_space_or_indent();
}
}
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
// always emit one.
bool needs_nl = false;
auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
while (auto tok = tokenizer->next()) {
wcstring tok_text = *tokenizer->text_of(*tok);
if (needs_nl) {
emit_newline();
needs_nl = false;
if (tok_text == L"\n") continue;
} else if (gap_text_mask_newline) {
// We only respect mask_newline the first time through the loop.
gap_text_mask_newline = false;
if (tok_text == L"\n") continue;
}
if (tok->type_ == token_type_t::comment) {
emit_space_or_indent();
output.append(tok_text);
needs_nl = true;
} else if (tok->type_ == token_type_t::end) {
// This may be either a newline or semicolon.
// Semicolons found here are not part of the ast and can simply be removed.
// Newlines are preserved unless mask_newline is set.
if (tok_text == L"\n") {
emit_newline();
}
} else {
fprintf(stderr,
"Gap text should only have comments and newlines - instead found token "
"type %d with text: %ls\n",
(int)tok->type_, tok_text.c_str());
DIE("Gap text should only have comments and newlines");
}
}
if (needs_nl) emit_newline();
return needs_nl;
}
void pretty_printer_t::emit_space_or_indent(gap_flags_t flags) {
if (at_line_start()) {
output.append(SPACES_PER_INDENT * current_indent, L' ');
} else if (!(flags & skip_space) && !has_preceding_space()) {
output.append(1, L' ');
}
}
std::vector<uint32_t> pretty_printer_t::compute_preferred_semi_locations() const {
std::vector<uint32_t> result;
auto mark_semi_from_input = [&](const semi_nl_t &n) {
if (n.ptr()->has_source() && substr(n.range()) == L";") {
result.push_back(n.range().start);
}
};
// andor_job_lists get semis if the input uses semis.
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
// See if we have a condition and an andor_job_list.
const semi_nl_t *condition = nullptr;
const andor_job_list_t *andors = nullptr;
if (const auto *ifc = node->try_as_if_clause()) {
if (ifc->condition().has_semi_nl()) {
condition = &ifc->condition().semi_nl();
}
andors = &ifc->andor_tail();
} else if (const auto *wc = node->try_as_while_header()) {
if (wc->condition().has_semi_nl()) {
condition = &wc->condition().semi_nl();
}
andors = &wc->andor_tail();
}
// If there is no and-or tail then we always use a newline.
if (andors && andors->count() > 0) {
if (condition) mark_semi_from_input(*condition);
// Mark all but last of the andor list.
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
mark_semi_from_input(andors->at(i)->job().semi_nl());
}
}
}
// `x ; and y` gets semis if it has them already, and they are on the same line.
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
if (const auto *job_list = node->try_as_job_list()) {
const semi_nl_t *prev_job_semi_nl = nullptr;
for (size_t i = 0; i < job_list->count(); i++) {
const job_conjunction_t &job = *job_list->at(i);
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
const semi_nl_t *prev = prev_job_semi_nl;
prev_job_semi_nl = job.has_semi_nl() ? &job.semi_nl() : nullptr;
// Is this an 'and' or 'or' job?
if (!job.has_decorator()) continue;
// Now see if we want to mark 'prev' as allowing a semi.
// Did we have a previous semi_nl which was a newline?
if (!prev || substr(prev->range()) != L";") continue;
// Is there a newline between them?
assert(prev->range().start <= job.decorator().range().start &&
"Ranges out of order");
auto start = source.begin() + prev->range().start;
auto end = source.begin() + job.decorator().range().end();
if (std::find(start, end, L'\n') == end) {
// We're going to allow the previous semi_nl to be a semi.
result.push_back(prev->range().start);
}
}
}
}
std::sort(result.begin(), result.end());
return result;
}

View file

@ -1,160 +0,0 @@
#ifndef FISH_INDENT_STAGING_H
#define FISH_INDENT_STAGING_H
#include "ast.h"
#include "common.h"
#include "cxx.h"
struct PrettyPrinter;
struct pretty_printer_t {
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
// than necessary and would probably benefit from a more layered approach where we identify
// certain runs, weight line breaks, have a cost model, etc.
pretty_printer_t(const wcstring &src, bool do_indent);
// Original source.
const wcstring &source;
// The indents of our string.
// This has the same length as 'source' and describes the indentation level.
const std::vector<int> indents;
// The parsed ast.
rust::Box<Ast> ast;
rust::Box<PrettyPrinter> visitor;
// The prettifier output.
wcstring output;
// The indent of the source range which we are currently emitting.
int current_indent{0};
// Whether to indent, or just insert spaces.
const bool do_indent;
// Whether the next gap text should hide the first newline.
bool gap_text_mask_newline{false};
// The "gaps": a sorted set of ranges between tokens.
// These contain whitespace, comments, semicolons, and other lexical elements which are not
// present in the ast.
const std::vector<source_range_t> gaps;
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
// This is computed ahead of time for convenience.
const std::vector<uint32_t> preferred_semi_locations;
// Flags we support.
using gap_flags_t = uint32_t;
enum {
default_flags = 0,
// Whether to allow line splitting via escaped newlines.
// For example, in argument lists:
//
// echo a \
// b
//
// If this is not set, then split-lines will be joined.
allow_escaped_newlines = 1 << 0,
// Whether to require a space before this token.
// This is used when emitting semis:
// echo a; echo b;
// No space required between 'a' and ';', or 'b' and ';'.
skip_space = 1 << 1,
};
#if INCLUDE_RUST_HEADERS
// \return gap text flags for the gap text that comes *before* a given node type.
static gap_flags_t gap_text_flags_before_node(const ast::node_t &node);
#endif
// \return whether we are at the start of a new line.
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
// \return whether we have a space before the output.
// This ignores escaped spaces and escaped newlines.
bool has_preceding_space() const;
// Entry point. Prettify our source code and return it.
wcstring prettify();
// \return a substring of source.
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
// Return the gap ranges from our ast.
std::vector<source_range_t> compute_gaps() const;
// Return sorted list of semi-preferring semi_nl nodes.
std::vector<uint32_t> compute_preferred_semi_locations() const;
// Emit a space or indent as necessary, depending on the previous output.
void emit_space_or_indent(gap_flags_t flags = default_flags);
// Emit "gap text:" newlines and comments from the original source.
// Gap text may be a few things:
//
// 1. Just a space is common. We will trim the spaces to be empty.
//
// Here the gap text is the comment, followed by the newline:
//
// echo abc # arg
// echo def
//
// 2. It may also be an escaped newline:
// Here the gap text is a space, backslash, newline, space.
//
// echo \
// hi
//
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
//
// begin | stuff
//
// We do not handle errors here - instead our caller does.
bool emit_gap_text(source_range_t range, gap_flags_t flags);
/// \return the gap text ending at a given index into the string, or empty if none.
source_range_t gap_text_to(uint32_t end) const;
/// \return whether a range \p r overlaps an error range from our ast.
bool range_contained_error(source_range_t r) const;
// Emit the gap text before a source range.
bool emit_gap_text_before(source_range_t r, gap_flags_t flags);
/// Given a string \p input, remove unnecessary quotes, etc.
wcstring clean_text(const wcstring &input);
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
// lines.
void emit_text(source_range_t r, gap_flags_t flags);
void emit_node_text(const void *node);
// Emit one newline.
void emit_newline() { output.push_back(L'\n'); }
// Emit a semicolon.
void emit_semi() { output.push_back(L';'); }
void visit_semi_nl(const void *node_);
void visit_redirection(const void *node_);
void visit_maybe_newlines(const void *node_);
void visit_begin_header();
// The flags we use to parse.
static parse_tree_flags_t parse_flags() {
return parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
}
};
#endif // FISH_INDENT_STAGING_H