Port fish_indent

2024-12-26 21:03:12 +00:00 · 2023-12-03 16:54:52 +01:00 · 2023-12-03 16:54:52 +01:00 · b28521c3d5
commit b28521c3d5
parent 3842d03473
10 changed files with 1120 additions and 1142 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -113,7 +113,6 @@ set(FISH_SRCS
    src/expand.cpp
    src/fallback.cpp
    src/fds.cpp
-    src/fish_indent_common.cpp
    src/fish_version.cpp
    src/flog.cpp
    src/highlight.cpp
--- a/fish-rust/src/compat.rs
+++ b/fish-rust/src/compat.rs
@ -50,6 +50,7 @@ extern "C" {
    pub fn C_O_EXLOCK() -> c_int;
    pub fn stdout_stream() -> *mut libc::FILE;
    pub fn UVAR_FILE_SET_MTIME_HACK() -> bool;
+    pub fn setlinebuf(stream: *mut libc::FILE);
 }

 macro_rules! CVAR {
--- a/fish-rust/src/fds.rs
+++ b/fish-rust/src/fds.rs
@ -164,7 +164,7 @@ pub fn make_autoclose_pipes() -> Option<AutoClosePipes> {
 }

 /// Sets CLO_EXEC on a given fd according to the value of \p should_set.
-pub fn set_cloexec(fd: RawFd, should_set: bool) -> c_int {
+pub fn set_cloexec(fd: RawFd, should_set: bool /* = true */) -> c_int {
    // Note we don't want to overwrite existing flags like O_NONBLOCK which may be set. So fetch the
    // existing flags and modify them.
    let flags = unsafe { libc::fcntl(fd, F_GETFD, 0) };
--- a/fish-rust/src/ffi.rs
+++ b/fish-rust/src/ffi.rs
@ -24,7 +24,6 @@ include_cpp! {
    #include "exec.h"
    #include "fallback.h"
    #include "fds.h"
-    #include "fish_indent_common.h"
    #include "flog.h"
    #include "function.h"
    #include "highlight.h"
@ -83,8 +82,6 @@ include_cpp! {

    generate!("wgettext_ptr")

-    generate!("pretty_printer_t")
-
    generate!("fd_event_signaller_t")

    generate!("highlight_role_t")
--- a/fish-rust/src/fish_indent.rs
+++ b/fish-rust/src/fish_indent.rs
--- a/fish-rust/src/future.rs
+++ b/fish-rust/src/future.rs
@ -23,3 +23,28 @@ impl<T> IsSomeAnd for Option<T> {
        }
    }
 }
+
+pub trait IsSorted {
+    type T;
+    fn is_sorted_by(&self, pred: impl Fn(&Self::T, &Self::T) -> Option<std::cmp::Ordering>)
+        -> bool;
+}
+impl<T> IsSorted for &[T] {
+    type T = T;
+    fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
+        self.windows(2)
+            .all(|w| pred(&w[0], &w[1]).is_none_or(|order| order.is_le()))
+    }
+}
+impl<T> IsSorted for Vec<T> {
+    type T = T;
+    fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
+        IsSorted::is_sorted_by(&self.as_slice(), pred)
+    }
+}
+impl<T> IsSorted for &Vec<T> {
+    type T = T;
+    fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option<std::cmp::Ordering>) -> bool {
+        IsSorted::is_sorted_by(&self.as_slice(), pred)
+    }
+}
--- a/src/ffi_baggage.h
+++ b/src/ffi_baggage.h
@ -3,7 +3,6 @@
 #include "builtins/commandline.h"
 #include "event.h"
 #include "fds.h"
-#include "fish_indent_common.h"
 #include "highlight.h"
 #include "input.h"
 #include "parse_util.h"
@ -23,7 +22,6 @@ void mark_as_used(const parser_t& parser, env_stack_t& env_stack) {
    highlight_spec_t{};
    init_input();
    make_pipes_ffi();
-    pretty_printer_t({}, {});
    reader_change_cursor_selection_mode(cursor_selection_mode_t::exclusive);
    reader_change_history({});
    reader_read_ffi({}, {}, {});
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@ -41,7 +41,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include "fds.h"
 #include "ffi_baggage.h"
 #include "ffi_init.rs.h"
-#include "fish_indent_common.h"
+#include "fish_indent.rs.h"
 #include "fish_version.h"
 #include "flog.h"
 #include "future_feature_flags.h"
@ -52,454 +52,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include "wcstringutil.h"
 #include "wutil.h"  // IWYU pragma: keep

-static bool dump_parse_tree = false;
-static int ret = 0;
-
-// Read the entire contents of a file into the specified string.
-static wcstring read_file(FILE *f) {
-    wcstring result;
-    while (true) {
-        wint_t c = std::fgetwc(f);
-
-        if (c == WEOF) {
-            if (ferror(f)) {
-                if (errno == EILSEQ) {
-                    // Illegal byte sequence. Try to skip past it.
-                    clearerr(f);
-                    int ch = fgetc(f);  // for printing the warning, and seeks forward 1 byte.
-                    FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch);
-                    ret = 1;
-                    continue;
-                } else {
-                    wperror(L"fgetwc");
-                    exit(1);
-                }
-            }
-            break;
-        }
-        result.push_back(static_cast<wchar_t>(c));
-    }
-    return result;
-}
-
-static const char *highlight_role_to_string(highlight_role_t role) {
-#define TEST_ROLE(x)          \
-    case highlight_role_t::x: \
-        return #x;
-    switch (role) {
-        TEST_ROLE(normal)
-        TEST_ROLE(error)
-        TEST_ROLE(command)
-        TEST_ROLE(keyword)
-        TEST_ROLE(statement_terminator)
-        TEST_ROLE(param)
-        TEST_ROLE(option)
-        TEST_ROLE(comment)
-        TEST_ROLE(search_match)
-        TEST_ROLE(operat)
-        TEST_ROLE(escape)
-        TEST_ROLE(quote)
-        TEST_ROLE(redirection)
-        TEST_ROLE(autosuggestion)
-        TEST_ROLE(selection)
-        TEST_ROLE(pager_progress)
-        TEST_ROLE(pager_background)
-        TEST_ROLE(pager_prefix)
-        TEST_ROLE(pager_completion)
-        TEST_ROLE(pager_description)
-        TEST_ROLE(pager_secondary_background)
-        TEST_ROLE(pager_secondary_prefix)
-        TEST_ROLE(pager_secondary_completion)
-        TEST_ROLE(pager_secondary_description)
-        TEST_ROLE(pager_selected_background)
-        TEST_ROLE(pager_selected_prefix)
-        TEST_ROLE(pager_selected_completion)
-        TEST_ROLE(pager_selected_description)
-        default:
-            DIE("UNKNOWN ROLE");
-    }
-#undef TEST_ROLE
-}
-
-// Entry point for Pygments CSV output.
-// Our output is a newline-separated string.
-// Each line is of the form `start,end,role`
-// start and end is the half-open token range, value is a string from highlight_role_t.
-// Example:
-// 3,7,command
-static std::string make_pygments_csv(const wcstring &src) {
-    const size_t len = src.size();
-    auto colors = highlight_shell_ffi(src, *operation_context_globals(), false, {});
-    assert(colors->size() == len && "Colors and src should have same size");
-
-    struct token_range_t {
-        unsigned long start;
-        unsigned long end;
-        highlight_role_t role;
-    };
-
-    std::vector<token_range_t> token_ranges;
-    for (size_t i = 0; i < len; i++) {
-        highlight_role_t role = colors->at(i).foreground;
-        // See if we can extend the last range.
-        if (!token_ranges.empty()) {
-            auto &last = token_ranges.back();
-            if (last.role == role && last.end == i) {
-                last.end = i + 1;
-                continue;
-            }
-        }
-        // We need a new range.
-        token_ranges.push_back(token_range_t{i, i + 1, role});
-    }
-
-    // Now render these to a string.
-    std::string result;
-    for (const auto &range : token_ranges) {
-        char buff[128];
-        snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
-                 highlight_role_to_string(range.role));
-        result.append(buff);
-    }
-    return result;
-}
-
-// Entry point for prettification.
-static wcstring prettify(const wcstring &src, bool do_indent) {
-    if (dump_parse_tree) {
-        auto ast = ast_parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
-                                      parse_flag_show_extra_semis);
-        wcstring ast_dump = *ast->dump(src);
-        std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
-    }
-
-    pretty_printer_t printer{src, do_indent};
-    wcstring output = printer.prettify();
-    return output;
-}
-
-/// Given a string and list of colors of the same size, return the string with HTML span elements
-/// for the various colors.
-static const wchar_t *html_class_name_for_color(highlight_spec_t spec) {
-#define P(x) L"fish_color_" #x
-    switch (spec->foreground) {
-        case highlight_role_t::normal: {
-            return P(normal);
-        }
-        case highlight_role_t::error: {
-            return P(error);
-        }
-        case highlight_role_t::command: {
-            return P(command);
-        }
-        case highlight_role_t::statement_terminator: {
-            return P(statement_terminator);
-        }
-        case highlight_role_t::param: {
-            return P(param);
-        }
-        case highlight_role_t::option: {
-            return P(option);
-        }
-        case highlight_role_t::comment: {
-            return P(comment);
-        }
-        case highlight_role_t::search_match: {
-            return P(search_match);
-        }
-        case highlight_role_t::operat: {
-            return P(operator);
-        }
-        case highlight_role_t::escape: {
-            return P(escape);
-        }
-        case highlight_role_t::quote: {
-            return P(quote);
-        }
-        case highlight_role_t::redirection: {
-            return P(redirection);
-        }
-        case highlight_role_t::autosuggestion: {
-            return P(autosuggestion);
-        }
-        case highlight_role_t::selection: {
-            return P(selection);
-        }
-        default: {
-            return P(other);
-        }
-    }
-}
-
-static std::string html_colorize(const wcstring &text,
-                                 const std::vector<highlight_spec_t> &colors) {
-    if (text.empty()) {
-        return "";
-    }
-
-    assert(colors.size() == text.size());
-    wcstring html = L"<pre><code>";
-    highlight_spec_t last_color = highlight_role_t::normal;
-    for (size_t i = 0; i < text.size(); i++) {
-        // Handle colors.
-        highlight_spec_t color = colors.at(i);
-        if (i > 0 && color != last_color) {
-            html.append(L"</span>");
-        }
-        if (i == 0 || color != last_color) {
-            append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
-        }
-        last_color = color;
-
-        // Handle text.
-        wchar_t wc = text.at(i);
-        switch (wc) {
-            case L'&': {
-                html.append(L"&amp;");
-                break;
-            }
-            case L'\'': {
-                html.append(L"&apos;");
-                break;
-            }
-            case L'"': {
-                html.append(L"&quot;");
-                break;
-            }
-            case L'<': {
-                html.append(L"&lt;");
-                break;
-            }
-            case L'>': {
-                html.append(L"&gt;");
-                break;
-            }
-            default: {
-                html.push_back(wc);
-                break;
-            }
-        }
-    }
-    html.append(L"</span></code></pre>");
-    return wcs2zstring(html);
-}
-
-static std::string no_colorize(const wcstring &text) { return wcs2zstring(text); }
-
-int main(int argc, char *argv[]) {
+int main() {
    program_name = L"fish_indent";
-    rust_init();
-    // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
-    // because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
-    //
-    // TODO: Auto-detect the encoding of the script. We should look for a vim style comment
-    // (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
-    // (e.g., "# -*- coding: <encoding-name> -*-").
-    setlocale(LC_ALL, "");
-    rust_env_init(true);
-
-    if (auto features_var = env_stack_t::globals().get(L"fish_features")) {
-        for (const wcstring &s : features_var->as_list()) {
-            feature_set_from_string(s.c_str());
-        }
-    }
-
-    // Types of output we support.
-    enum {
-        output_type_plain_text,
-        output_type_file,
-        output_type_ansi,
-        output_type_pygments_csv,
-        output_type_check,
-        output_type_html
-    } output_type = output_type_plain_text;
-    const char *output_location = "";
-    bool do_indent = true;
-    // File path for debug output.
-    std::string debug_output;
-
-    const char *short_opts = "+d:hvwicD:";
-    const struct option long_opts[] = {{"debug", required_argument, nullptr, 'd'},
-                                       {"debug-output", required_argument, nullptr, 'o'},
-                                       {"debug-stack-frames", required_argument, nullptr, 'D'},
-                                       {"dump-parse-tree", no_argument, nullptr, 'P'},
-                                       {"no-indent", no_argument, nullptr, 'i'},
-                                       {"help", no_argument, nullptr, 'h'},
-                                       {"version", no_argument, nullptr, 'v'},
-                                       {"write", no_argument, nullptr, 'w'},
-                                       {"html", no_argument, nullptr, 1},
-                                       {"ansi", no_argument, nullptr, 2},
-                                       {"pygments", no_argument, nullptr, 3},
-                                       {"check", no_argument, nullptr, 'c'},
-                                       {}};
-
-    int opt;
-    while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) {
-        switch (opt) {
-            case 'P': {
-                dump_parse_tree = true;
-                break;
-            }
-            case 'h': {
-                unsafe_print_help("fish_indent");
-                exit(0);
-            }
-            case 'v': {
-                std::fwprintf(stdout, _(L"%ls, version %s\n"), program_name, get_fish_version());
-                exit(0);
-            }
-            case 'w': {
-                output_type = output_type_file;
-                break;
-            }
-            case 'i': {
-                do_indent = false;
-                break;
-            }
-            case 1: {
-                output_type = output_type_html;
-                break;
-            }
-            case 2: {
-                output_type = output_type_ansi;
-                break;
-            }
-            case 3: {
-                output_type = output_type_pygments_csv;
-                break;
-            }
-            case 'c': {
-                output_type = output_type_check;
-                break;
-            }
-            case 'd': {
-                activate_flog_categories_by_pattern(str2wcstring(optarg));
-                for (auto cat : get_flog_categories()) {
-                    if (cat->enabled) {
-                        std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name);
-                    }
-                }
-                break;
-            }
-            case 'D': {
-                // TODO: Option is currently useless.
-                // Either remove it or make it work with FLOG.
-                break;
-            }
-            case 'o': {
-                debug_output = optarg;
-                break;
-            }
-            default: {
-                // We assume getopt_long() has already emitted a diagnostic msg.
-                exit(1);
-            }
-        }
-    }
-
-    argc -= optind;
-    argv += optind;
-
-    // Direct any debug output right away.
-    FILE *debug_output_file = nullptr;
-    if (!debug_output.empty()) {
-        debug_output_file = fopen(debug_output.c_str(), "w");
-        if (!debug_output_file) {
-            fprintf(stderr, "Could not open file %s\n", debug_output.c_str());
-            perror("fopen");
-            exit(-1);
-        }
-        set_cloexec(fileno(debug_output_file));
-        setlinebuf(debug_output_file);
-        set_flog_output_file(debug_output_file);
-    }
-
-    int retval = 0;
-
-    wcstring src;
-    for (int i = 0; i < argc || (argc == 0 && i == 0); i++) {
-        if (argc == 0 && i == 0) {
-            if (output_type == output_type_file) {
-                std::fwprintf(
-                    stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"),
-                    program_name);
-                exit(1);
-            }
-            src = read_file(stdin);
-        } else {
-            FILE *fh = fopen(argv[i], "r");
-            if (fh) {
-                src = read_file(fh);
-                fclose(fh);
-                output_location = argv[i];
-            } else {
-                std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), argv[i],
-                              std::strerror(errno));
-                exit(1);
-            }
-        }
-
-        if (output_type == output_type_pygments_csv) {
-            std::string output = make_pygments_csv(src);
-            fputs(output.c_str(), stdout);
-            continue;
-        }
-
-        const wcstring output_wtext = prettify(src, do_indent);
-
-        // Maybe colorize.
-        std::vector<highlight_spec_t> colors;
-        maybe_t<rust::Box<HighlightSpecListFFI>> ffi_colors;
-        if (output_type != output_type_plain_text) {
-            highlight_shell(output_wtext, colors, *operation_context_globals());
-            ffi_colors = highlight_shell_ffi(output_wtext, *operation_context_globals(), false, {});
-        }
-
-        std::string colored_output;
-        switch (output_type) {
-            case output_type_plain_text: {
-                colored_output = no_colorize(output_wtext);
-                break;
-            }
-            case output_type_file: {
-                FILE *fh = fopen(output_location, "w");
-                if (fh) {
-                    std::fputws(output_wtext.c_str(), fh);
-                    fclose(fh);
-                } else {
-                    std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location,
-                                  std::strerror(errno));
-                    exit(1);
-                }
-                break;
-            }
-            case output_type_ansi: {
-                auto ffi_colored =
-                    colorize(output_wtext, **ffi_colors, env_stack_t::globals().get_impl_ffi());
-                for (uint8_t c : ffi_colored) {
-                    colored_output.push_back(c);
-                }
-                break;
-            }
-            case output_type_html: {
-                colored_output = html_colorize(output_wtext, colors);
-                break;
-            }
-            case output_type_pygments_csv: {
-                DIE("pygments_csv should have been handled above");
-            }
-            case output_type_check: {
-                if (output_wtext != src) {
-                    if (argc) {
-                        std::fwprintf(stderr, _(L"%s\n"), argv[i]);
-                    }
-                    retval++;
-                }
-                break;
-            }
-        }
-
-        std::fputws(str2wcstring(colored_output).c_str(), stdout);
-    }
-    return retval;
+    return fish_indent_main();
 }
--- a/src/fish_indent_common.cpp
+++ b/src/fish_indent_common.cpp
@ -1,476 +0,0 @@
-#include "fish_indent_common.h"
-
-#include "ast.h"
-#include "common.h"
-#include "env.h"
-#include "expand.h"
-#include "flog.h"
-#include "global_safety.h"
-#include "maybe.h"
-#include "operation_context.h"
-#include "parse_constants.h"
-#include "parse_util.h"
-#include "tokenizer.h"
-#include "wcstringutil.h"
-#if INCLUDE_RUST_HEADERS
-#include "fish_indent.rs.h"
-#endif
-
-using namespace ast;
-
-// The number of spaces per indent isn't supposed to be configurable.
-// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
-#define SPACES_PER_INDENT 4
-
-/// \return whether a character at a given index is escaped.
-/// A character is escaped if it has an odd number of backslashes.
-static bool char_is_escaped(const wcstring &text, size_t idx) {
-    return count_preceding_backslashes(text, idx) % 2 == 1;
-}
-
-pretty_printer_t::pretty_printer_t(const wcstring &src, bool do_indent)
-    : source(src),
-      indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
-      ast(ast_parse(src, parse_flags())),
-      visitor(new_pretty_printer(*this)),
-      do_indent(do_indent),
-      gaps(compute_gaps()),
-      preferred_semi_locations(compute_preferred_semi_locations()) {
-    assert(indents.size() == source.size() && "indents and source should be same length");
-}
-
-pretty_printer_t::gap_flags_t pretty_printer_t::gap_text_flags_before_node(const node_t &node) {
-    gap_flags_t result = default_flags;
-    switch (node.typ()) {
-        // Allow escaped newlines before leaf nodes that can be part of a long command.
-        case type_t::argument:
-        case type_t::redirection:
-        case type_t::variable_assignment:
-            result |= allow_escaped_newlines;
-            break;
-
-        case type_t::token_base:
-            // Allow escaped newlines before && and ||, and also pipes.
-            switch (node.token_type()) {
-                case parse_token_type_t::andand:
-                case parse_token_type_t::oror:
-                case parse_token_type_t::pipe:
-                    result |= allow_escaped_newlines;
-                    break;
-                case parse_token_type_t::string: {
-                    // Allow escaped newlines before commands that follow a variable assignment
-                    // since both can be long (#7955).
-                    auto p = node.parent();
-                    if (p->typ() != type_t::decorated_statement) break;
-                    p = p->parent();
-                    assert(p->typ() == type_t::statement);
-                    p = p->parent();
-                    if (auto *job = p->try_as_job_pipeline()) {
-                        if (!job->variables().empty()) result |= allow_escaped_newlines;
-                    } else if (auto *job_cnt = p->try_as_job_continuation()) {
-                        if (!job_cnt->variables().empty()) result |= allow_escaped_newlines;
-                    } else if (auto *not_stmt = p->try_as_not_statement()) {
-                        if (!not_stmt->variables().empty()) result |= allow_escaped_newlines;
-                    }
-                    break;
-                }
-                default:
-                    break;
-            }
-            break;
-
-        default:
-            break;
-    }
-    return result;
-}
-
-bool pretty_printer_t::has_preceding_space() const {
-    long idx = static_cast<long>(output.size()) - 1;
-    // Skip escaped newlines.
-    // This is historical. Example:
-    //
-    // cmd1 \
-        // | cmd2
-    //
-    // we want the pipe to "see" the space after cmd1.
-    // TODO: this is too tricky, we should factor this better.
-    while (idx >= 0 && output.at(idx) == L'\n') {
-        size_t backslashes = count_preceding_backslashes(source, idx);
-        if (backslashes % 2 == 0) {
-            // Not escaped.
-            return false;
-        }
-        idx -= (1 + backslashes);
-    }
-    return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
-}
-
-wcstring pretty_printer_t::prettify() {
-    output = wcstring{};
-    visitor->visit(*ast->top());
-
-    // Trailing gap text.
-    emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
-
-    // Replace all trailing newlines with just a single one.
-    while (!output.empty() && at_line_start()) {
-        output.pop_back();
-    }
-    emit_newline();
-
-    wcstring result = std::move(output);
-    return result;
-}
-
-std::vector<source_range_t> pretty_printer_t::compute_gaps() const {
-    auto range_compare = [](source_range_t r1, source_range_t r2) {
-        if (r1.start != r2.start) return r1.start < r2.start;
-        return r1.length < r2.length;
-    };
-    // Collect the token ranges into a list.
-    std::vector<source_range_t> tok_ranges;
-    for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
-        auto node = ast_traversal->next();
-        if (!node->has_value()) break;
-        if (node->category() == category_t::leaf) {
-            auto r = node->source_range();
-            if (r.length > 0) tok_ranges.push_back(r);
-        }
-    }
-    // Place a zero length range at end to aid in our inverting.
-    tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
-
-    // Our tokens should be sorted.
-    assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
-
-    // For each range, add a gap range between the previous range and this range.
-    std::vector<source_range_t> gaps;
-    uint32_t prev_end = 0;
-    for (source_range_t tok_range : tok_ranges) {
-        assert(tok_range.start >= prev_end && "Token range should not overlap or be out of order");
-        if (tok_range.start >= prev_end) {
-            gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
-        }
-        prev_end = tok_range.start + tok_range.length;
-    }
-    return gaps;
-}
-
-void pretty_printer_t::visit_begin_header() {
-    if (!at_line_start()) {
-        emit_newline();
-    }
-}
-
-void pretty_printer_t::visit_maybe_newlines(const void *node_) {
-    const auto &node = *static_cast<const maybe_newlines_t *>(node_);
-    // Our newlines may have comments embedded in them, example:
-    //    cmd |
-    //    # something
-    //    cmd2
-    // Treat it as gap text.
-    if (node.range().length > 0) {
-        auto flags = gap_text_flags_before_node(*node.ptr());
-        current_indent = indents.at(node.range().start);
-        bool added_newline = emit_gap_text_before(node.range(), flags);
-        source_range_t gap_range = node.range();
-        if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
-            gap_range.start++;
-        }
-        emit_gap_text(gap_range, flags);
-    }
-}
-
-void pretty_printer_t::visit_redirection(const void *node_) {
-    const auto &node = *static_cast<const redirection_t *>(node_);
-    // No space between a redirection operator and its target (#2899).
-    emit_text(node.oper().range(), default_flags);
-    emit_text(node.target().range(), skip_space);
-}
-
-void pretty_printer_t::visit_semi_nl(const void *node_) {
-    // These are semicolons or newlines which are part of the ast. That means it includes e.g.
-    // ones terminating a job or 'if' header, but not random semis in job lists. We respect
-    // preferred_semi_locations to decide whether or not these should stay as newlines or
-    // become semicolons.
-    const auto &node = *static_cast<const node_t *>(node_);
-    auto range = node.source_range();
-
-    // Check if we should prefer a semicolon.
-    bool prefer_semi =
-        range.length > 0 && std::binary_search(preferred_semi_locations.begin(),
-                                               preferred_semi_locations.end(), range.start);
-    emit_gap_text_before(range, gap_text_flags_before_node(*node.ptr()));
-
-    // Don't emit anything if the gap text put us on a newline (because it had a comment).
-    if (!at_line_start()) {
-        prefer_semi ? emit_semi() : emit_newline();
-
-        // If it was a semi but we emitted a newline, swallow a subsequent newline.
-        if (!prefer_semi && substr(range) == L";") {
-            gap_text_mask_newline = true;
-        }
-    }
-}
-
-void pretty_printer_t::emit_node_text(const void *node_) {
-    const auto &node = *static_cast<const node_t *>(node_);
-    source_range_t range = node.source_range();
-
-    // Weird special-case: a token may end in an escaped newline. Notably, the newline is
-    // not part of the following gap text, handle indentation here (#8197).
-    bool ends_with_escaped_nl = range.length >= 2 && source.at(range.end() - 2) == L'\\' &&
-                                source.at(range.end() - 1) == L'\n';
-    if (ends_with_escaped_nl) {
-        range = {range.start, range.length - 2};
-    }
-
-    emit_text(range, gap_text_flags_before_node(node));
-
-    if (ends_with_escaped_nl) {
-        // By convention, escaped newlines are preceded with a space.
-        output.append(L" \\\n");
-        // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
-        // The cases where this matters are probably very rare.
-        current_indent++;
-        emit_space_or_indent();
-        current_indent--;
-    }
-}
-
-void pretty_printer_t::emit_text(source_range_t r, gap_flags_t flags) {
-    emit_gap_text_before(r, flags);
-    current_indent = indents.at(r.start);
-    if (r.length > 0) {
-        emit_space_or_indent(flags);
-        output.append(clean_text(substr(r)));
-    }
-}
-
-wcstring pretty_printer_t::clean_text(const wcstring &input) {
-    // Unescape the string - this leaves special markers around if there are any
-    // expansions or anything. We specifically tell it to not compute backslash-escapes
-    // like \U or \x, because we want to leave them intact.
-    wcstring unescaped =
-        *unescape_string(input.c_str(), input.size(), UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES,
-                         STRING_STYLE_SCRIPT);
-
-    // Remove INTERNAL_SEPARATOR because that's a quote.
-    auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
-    unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
-
-    // If no non-"good" char is left, use the unescaped version.
-    // This can be extended to other characters, but giving the precise list is tough,
-    // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
-    // people feel more at ease.
-    auto goodchars = [](wchar_t ch) {
-        return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
-    };
-    if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
-        !unescaped.empty()) {
-        return unescaped;
-    } else {
-        return input;
-    }
-}
-
-bool pretty_printer_t::emit_gap_text_before(source_range_t r, gap_flags_t flags) {
-    assert(r.start <= source.size() && "source out of bounds");
-    bool added_newline = false;
-
-    // Find the gap text which ends at start.
-    source_range_t range = gap_text_to(r.start);
-    if (range.length > 0) {
-        // Set the indent from the beginning of this gap text.
-        // For example:
-        // begin
-        //    cmd
-        //    # comment
-        // end
-        // Here the comment is the gap text before the end, but we want the indent from the
-        // command.
-        if (range.start < indents.size()) current_indent = indents.at(range.start);
-
-        // If this range contained an error, append the gap text without modification.
-        // For example in: echo foo "
-        // We don't want to mess with the quote.
-        if (range_contained_error(range)) {
-            output.append(substr(range));
-        } else {
-            added_newline = emit_gap_text(range, flags);
-        }
-    }
-    // Always clear gap_text_mask_newline after emitting even empty gap text.
-    gap_text_mask_newline = false;
-    return added_newline;
-}
-
-bool pretty_printer_t::range_contained_error(source_range_t r) const {
-    const auto &errs = ast->extras()->errors();
-    auto range_is_before = [](source_range_t x, source_range_t y) {
-        return x.start + x.length <= y.start;
-    };
-    assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
-           "Error ranges should be sorted");
-    return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
-}
-
-source_range_t pretty_printer_t::gap_text_to(uint32_t end) const {
-    auto where =
-        std::lower_bound(gaps.begin(), gaps.end(), end,
-                         [](source_range_t r, uint32_t end) { return r.start + r.length < end; });
-    if (where == gaps.end() || where->start + where->length != end) {
-        // Not found.
-        return source_range_t{0, 0};
-    } else {
-        return *where;
-    }
-}
-
-bool pretty_printer_t::emit_gap_text(source_range_t range, gap_flags_t flags) {
-    wcstring gap_text = substr(range);
-    // Common case: if we are only spaces, do nothing.
-    if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;
-
-    // Look to see if there is an escaped newline.
-    // Emit it if either we allow it, or it comes before the first comment.
-    // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
-    // text - we already know it has no semantic significance.
-    size_t escaped_nl = gap_text.find(L"\\\n");
-    if (escaped_nl != wcstring::npos) {
-        size_t comment_idx = gap_text.find(L'#');
-        if ((flags & allow_escaped_newlines) ||
-            (comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
-            // Emit a space before the escaped newline.
-            if (!at_line_start() && !has_preceding_space()) {
-                output.append(L" ");
-            }
-            output.append(L"\\\n");
-            // Indent the continuation line and any leading comments (#7252).
-            // Use the indentation level of the next newline.
-            current_indent = indents.at(range.start + escaped_nl + 1);
-            emit_space_or_indent();
-        }
-    }
-
-    // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
-    // always emit one.
-    bool needs_nl = false;
-
-    auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
-    while (auto tok = tokenizer->next()) {
-        wcstring tok_text = *tokenizer->text_of(*tok);
-
-        if (needs_nl) {
-            emit_newline();
-            needs_nl = false;
-            if (tok_text == L"\n") continue;
-        } else if (gap_text_mask_newline) {
-            // We only respect mask_newline the first time through the loop.
-            gap_text_mask_newline = false;
-            if (tok_text == L"\n") continue;
-        }
-
-        if (tok->type_ == token_type_t::comment) {
-            emit_space_or_indent();
-            output.append(tok_text);
-            needs_nl = true;
-        } else if (tok->type_ == token_type_t::end) {
-            // This may be either a newline or semicolon.
-            // Semicolons found here are not part of the ast and can simply be removed.
-            // Newlines are preserved unless mask_newline is set.
-            if (tok_text == L"\n") {
-                emit_newline();
-            }
-        } else {
-            fprintf(stderr,
-                    "Gap text should only have comments and newlines - instead found token "
-                    "type %d with text: %ls\n",
-                    (int)tok->type_, tok_text.c_str());
-            DIE("Gap text should only have comments and newlines");
-        }
-    }
-    if (needs_nl) emit_newline();
-    return needs_nl;
-}
-
-void pretty_printer_t::emit_space_or_indent(gap_flags_t flags) {
-    if (at_line_start()) {
-        output.append(SPACES_PER_INDENT * current_indent, L' ');
-    } else if (!(flags & skip_space) && !has_preceding_space()) {
-        output.append(1, L' ');
-    }
-}
-
-std::vector<uint32_t> pretty_printer_t::compute_preferred_semi_locations() const {
-    std::vector<uint32_t> result;
-    auto mark_semi_from_input = [&](const semi_nl_t &n) {
-        if (n.ptr()->has_source() && substr(n.range()) == L";") {
-            result.push_back(n.range().start);
-        }
-    };
-
-    // andor_job_lists get semis if the input uses semis.
-    for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
-        auto node = ast_traversal->next();
-        if (!node->has_value()) break;
-        // See if we have a condition and an andor_job_list.
-        const semi_nl_t *condition = nullptr;
-        const andor_job_list_t *andors = nullptr;
-        if (const auto *ifc = node->try_as_if_clause()) {
-            if (ifc->condition().has_semi_nl()) {
-                condition = &ifc->condition().semi_nl();
-            }
-            andors = &ifc->andor_tail();
-        } else if (const auto *wc = node->try_as_while_header()) {
-            if (wc->condition().has_semi_nl()) {
-                condition = &wc->condition().semi_nl();
-            }
-            andors = &wc->andor_tail();
-        }
-
-        // If there is no and-or tail then we always use a newline.
-        if (andors && andors->count() > 0) {
-            if (condition) mark_semi_from_input(*condition);
-            // Mark all but last of the andor list.
-            for (uint32_t i = 0; i + 1 < andors->count(); i++) {
-                mark_semi_from_input(andors->at(i)->job().semi_nl());
-            }
-        }
-    }
-
-    // `x ; and y` gets semis if it has them already, and they are on the same line.
-    for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
-        auto node = ast_traversal->next();
-        if (!node->has_value()) break;
-        if (const auto *job_list = node->try_as_job_list()) {
-            const semi_nl_t *prev_job_semi_nl = nullptr;
-            for (size_t i = 0; i < job_list->count(); i++) {
-                const job_conjunction_t &job = *job_list->at(i);
-                // Set up prev_job_semi_nl for the next iteration to make control flow easier.
-                const semi_nl_t *prev = prev_job_semi_nl;
-                prev_job_semi_nl = job.has_semi_nl() ? &job.semi_nl() : nullptr;
-
-                // Is this an 'and' or 'or' job?
-                if (!job.has_decorator()) continue;
-
-                // Now see if we want to mark 'prev' as allowing a semi.
-                // Did we have a previous semi_nl which was a newline?
-                if (!prev || substr(prev->range()) != L";") continue;
-
-                // Is there a newline between them?
-                assert(prev->range().start <= job.decorator().range().start &&
-                       "Ranges out of order");
-                auto start = source.begin() + prev->range().start;
-                auto end = source.begin() + job.decorator().range().end();
-                if (std::find(start, end, L'\n') == end) {
-                    // We're going to allow the previous semi_nl to be a semi.
-                    result.push_back(prev->range().start);
-                }
-            }
-        }
-    }
-    std::sort(result.begin(), result.end());
-    return result;
-}
--- a/src/fish_indent_common.h
+++ b/src/fish_indent_common.h
@ -1,160 +0,0 @@
-#ifndef FISH_INDENT_STAGING_H
-#define FISH_INDENT_STAGING_H
-
-#include "ast.h"
-#include "common.h"
-#include "cxx.h"
-
-struct PrettyPrinter;
-struct pretty_printer_t {
-    // Note: this got somewhat more complicated after introducing the new AST, because that AST no
-    // longer encodes detailed lexical information (e.g. every newline). This feels more complex
-    // than necessary and would probably benefit from a more layered approach where we identify
-    // certain runs, weight line breaks, have a cost model, etc.
-    pretty_printer_t(const wcstring &src, bool do_indent);
-
-    // Original source.
-    const wcstring &source;
-
-    // The indents of our string.
-    // This has the same length as 'source' and describes the indentation level.
-    const std::vector<int> indents;
-
-    // The parsed ast.
-    rust::Box<Ast> ast;
-
-    rust::Box<PrettyPrinter> visitor;
-
-    // The prettifier output.
-    wcstring output;
-
-    // The indent of the source range which we are currently emitting.
-    int current_indent{0};
-
-    // Whether to indent, or just insert spaces.
-    const bool do_indent;
-
-    // Whether the next gap text should hide the first newline.
-    bool gap_text_mask_newline{false};
-
-    // The "gaps": a sorted set of ranges between tokens.
-    // These contain whitespace, comments, semicolons, and other lexical elements which are not
-    // present in the ast.
-    const std::vector<source_range_t> gaps;
-
-    // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
-    // This is computed ahead of time for convenience.
-    const std::vector<uint32_t> preferred_semi_locations;
-
-    // Flags we support.
-    using gap_flags_t = uint32_t;
-    enum {
-        default_flags = 0,
-
-        // Whether to allow line splitting via escaped newlines.
-        // For example, in argument lists:
-        //
-        //   echo a \
-        //   b
-        //
-        // If this is not set, then split-lines will be joined.
-        allow_escaped_newlines = 1 << 0,
-
-        // Whether to require a space before this token.
-        // This is used when emitting semis:
-        //    echo a; echo b;
-        // No space required between 'a' and ';', or 'b' and ';'.
-        skip_space = 1 << 1,
-    };
-
-#if INCLUDE_RUST_HEADERS
-    // \return gap text flags for the gap text that comes *before* a given node type.
-    static gap_flags_t gap_text_flags_before_node(const ast::node_t &node);
-#endif
-
-    // \return whether we are at the start of a new line.
-    bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
-
-    // \return whether we have a space before the output.
-    // This ignores escaped spaces and escaped newlines.
-    bool has_preceding_space() const;
-
-    // Entry point. Prettify our source code and return it.
-    wcstring prettify();
-
-    // \return a substring of source.
-    wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
-
-    // Return the gap ranges from our ast.
-    std::vector<source_range_t> compute_gaps() const;
-
-    // Return sorted list of semi-preferring semi_nl nodes.
-    std::vector<uint32_t> compute_preferred_semi_locations() const;
-
-    // Emit a space or indent as necessary, depending on the previous output.
-    void emit_space_or_indent(gap_flags_t flags = default_flags);
-
-    // Emit "gap text:" newlines and comments from the original source.
-    // Gap text may be a few things:
-    //
-    // 1. Just a space is common. We will trim the spaces to be empty.
-    //
-    // Here the gap text is the comment, followed by the newline:
-    //
-    //    echo abc # arg
-    //    echo def
-    //
-    // 2. It may also be an escaped newline:
-    // Here the gap text is a space, backslash, newline, space.
-    //
-    //     echo \
-    //       hi
-    //
-    // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
-    //
-    //   begin | stuff
-    //
-    //  We do not handle errors here - instead our caller does.
-    bool emit_gap_text(source_range_t range, gap_flags_t flags);
-
-    /// \return the gap text ending at a given index into the string, or empty if none.
-    source_range_t gap_text_to(uint32_t end) const;
-
-    /// \return whether a range \p r overlaps an error range from our ast.
-    bool range_contained_error(source_range_t r) const;
-
-    // Emit the gap text before a source range.
-    bool emit_gap_text_before(source_range_t r, gap_flags_t flags);
-
-    /// Given a string \p input, remove unnecessary quotes, etc.
-    wcstring clean_text(const wcstring &input);
-
-    // Emit a range of original text. This indents as needed, and also inserts preceding gap text.
-    // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
-    // lines.
-    void emit_text(source_range_t r, gap_flags_t flags);
-
-    void emit_node_text(const void *node);
-
-    // Emit one newline.
-    void emit_newline() { output.push_back(L'\n'); }
-
-    // Emit a semicolon.
-    void emit_semi() { output.push_back(L';'); }
-
-    void visit_semi_nl(const void *node_);
-
-    void visit_redirection(const void *node_);
-
-    void visit_maybe_newlines(const void *node_);
-
-    void visit_begin_header();
-
-    // The flags we use to parse.
-    static parse_tree_flags_t parse_flags() {
-        return parse_flag_continue_after_error | parse_flag_include_comments |
-               parse_flag_leave_unterminated | parse_flag_show_blank_lines;
-    }
-};
-
-#endif  // FISH_INDENT_STAGING_H