fish-shell/src/fish_indent.cpp

// The fish_indent program.
/*
Copyright (C) 2014 ridiculous_fish

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as
published by the Free Software Foundation.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
*/
#include "config.h"  // IWYU pragma: keep

#include <errno.h>
#include <getopt.h>
#include <locale.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <cwchar>
#include <cwctype>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "ast.h"
#include "common.h"
#include "env.h"
#include "expand.h"
#include "fds.h"
#include "fish_version.h"
#include "flog.h"
#include "future_feature_flags.h"
#include "global_safety.h"
#include "highlight.h"
#include "maybe.h"
#include "operation_context.h"
#include "parse_constants.h"
#include "parse_util.h"
#include "print_help.h"
#include "tokenizer.h"
#include "wcstringutil.h"
#include "wutil.h"  // IWYU pragma: keep

// The number of spaces per indent isn't supposed to be configurable.
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
#define SPACES_PER_INDENT 4

static bool dump_parse_tree = false;
static int ret = 0;

// Read the entire contents of a file into the specified string.
static wcstring read_file(FILE *f) {
    wcstring result;
    while (true) {
        wint_t c = std::fgetwc(f);

        if (c == WEOF) {
            if (ferror(f)) {
                if (errno == EILSEQ) {
                    // Illegal byte sequence. Try to skip past it.
                    clearerr(f);
                    int ch = fgetc(f);  // for printing the warning, and seeks forward 1 byte.
                    FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch);
                    ret = 1;
                    continue;
                } else {
                    wperror(L"fgetwc");
                    exit(1);
                }
            }
            break;
        }
        result.push_back(static_cast<wchar_t>(c));
    }
    return result;
}

namespace {
/// From C++14.
template <bool B, typename T = void>
using enable_if_t = typename std::enable_if<B, T>::type;

/// \return whether a character at a given index is escaped.
/// A character is escaped if it has an odd number of backslashes.
bool char_is_escaped(const wcstring &text, size_t idx) {
    return count_preceding_backslashes(text, idx) % 2 == 1;
}

using namespace ast;
struct pretty_printer_t {
    // Note: this got somewhat more complicated after introducing the new AST, because that AST no
    // longer encodes detailed lexical information (e.g. every newline). This feels more complex
    // than necessary and would probably benefit from a more layered approach where we identify
    // certain runs, weight line breaks, have a cost model, etc.
    pretty_printer_t(const wcstring &src, bool do_indent)
        : source(src),
          indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
          ast(ast_t::parse(src, parse_flags())),
          do_indent(do_indent),
          gaps(compute_gaps()),
          preferred_semi_locations(compute_preferred_semi_locations()) {
        assert(indents.size() == source.size() && "indents and source should be same length");
    }

    // Original source.
    const wcstring &source;

    // The indents of our string.
    // This has the same length as 'source' and describes the indentation level.
    const std::vector<int> indents;

    // The parsed ast.
    const ast_t ast;

    // The prettifier output.
    wcstring output;

    // The indent of the source range which we are currently emitting.
    int current_indent{0};

    // Whether to indent, or just insert spaces.
    const bool do_indent;

    // Whether the next gap text should hide the first newline.
    bool gap_text_mask_newline{false};

    // The "gaps": a sorted set of ranges between tokens.
    // These contain whitespace, comments, semicolons, and other lexical elements which are not
    // present in the ast.
    const std::vector<source_range_t> gaps;

    // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
    // This is computed ahead of time for convenience.
    const std::vector<uint32_t> preferred_semi_locations;

    // Flags we support.
    using gap_flags_t = uint32_t;
    enum {
        default_flags = 0,

        // Whether to allow line splitting via escaped newlines.
        // For example, in argument lists:
        //
        //   echo a \
        //   b
        //
        // If this is not set, then split-lines will be joined.
        allow_escaped_newlines = 1 << 0,

        // Whether to require a space before this token.
        // This is used when emitting semis:
        //    echo a; echo b;
        // No space required between 'a' and ';', or 'b' and ';'.
        skip_space = 1 << 1,
    };

    // \return gap text flags for the gap text that comes *before* a given node type.
    static gap_flags_t gap_text_flags_before_node(const node_t &node) {
        gap_flags_t result = default_flags;
        switch (node.type) {
            // Allow escaped newlines before leaf nodes that can be part of a long command.
            case type_t::argument:
            case type_t::redirection:
            case type_t::variable_assignment:
                result |= allow_escaped_newlines;
                break;

            case type_t::token_base:
                // Allow escaped newlines before && and ||, and also pipes.
                switch (node.as<token_base_t>()->type) {
                    case parse_token_type_t::andand:
                    case parse_token_type_t::oror:
                    case parse_token_type_t::pipe:
                        result |= allow_escaped_newlines;
                        break;
                    case parse_token_type_t::string: {
                        // Allow escaped newlines before commands that follow a variable assignment
                        // since both can be long (#7955).
                        const node_t *p = node.parent;
                        if (p->type != type_t::decorated_statement) break;
                        p = p->parent;
                        assert(p->type == type_t::statement);
                        p = p->parent;
                        if (auto job = p->try_as<job_t>()) {
                            if (!job->variables.empty()) result |= allow_escaped_newlines;
                        } else if (auto job_cnt = p->try_as<job_continuation_t>()) {
                            if (!job_cnt->variables.empty()) result |= allow_escaped_newlines;
                        } else if (auto not_stmt = p->try_as<not_statement_t>()) {
                            if (!not_stmt->variables.empty()) result |= allow_escaped_newlines;
                        }
                        break;
                    }
                    default:
                        break;
                }
                break;

            default:
                break;
        }
        return result;
    }

    // \return whether we are at the start of a new line.
    bool at_line_start() const { return output.empty() || output.back() == L'\n'; }

    // \return whether we have a space before the output.
    // This ignores escaped spaces and escaped newlines.
    bool has_preceding_space() const {
        long idx = static_cast<long>(output.size()) - 1;
        // Skip escaped newlines.
        // This is historical. Example:
        //
        // cmd1 \
        // | cmd2
        //
        // we want the pipe to "see" the space after cmd1.
        // TODO: this is too tricky, we should factor this better.
        while (idx >= 0 && output.at(idx) == L'\n') {
            size_t backslashes = count_preceding_backslashes(source, idx);
            if (backslashes % 2 == 0) {
                // Not escaped.
                return false;
            }
            idx -= (1 + backslashes);
        }
        return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
    }

    // Entry point. Prettify our source code and return it.
    wcstring prettify() {
        output = wcstring{};
        node_visitor(*this).accept(ast.top());

        // Trailing gap text.
        emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);

        // Replace all trailing newlines with just a single one.
        while (!output.empty() && at_line_start()) {
            output.pop_back();
        }
        emit_newline();

        wcstring result = std::move(output);
        return result;
    }

    // \return a substring of source.
    wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }

    // Return the gap ranges from our ast.
    std::vector<source_range_t> compute_gaps() const {
        auto range_compare = [](source_range_t r1, source_range_t r2) {
            if (r1.start != r2.start) return r1.start < r2.start;
            return r1.length < r2.length;
        };
        // Collect the token ranges into a list.
        std::vector<source_range_t> tok_ranges;
        for (const node_t &node : ast) {
            if (node.category == category_t::leaf) {
                auto r = node.source_range();
                if (r.length > 0) tok_ranges.push_back(r);
            }
        }
        // Place a zero length range at end to aid in our inverting.
        tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});

        // Our tokens should be sorted.
        assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));

        // For each range, add a gap range between the previous range and this range.
        std::vector<source_range_t> gaps;
        uint32_t prev_end = 0;
        for (source_range_t tok_range : tok_ranges) {
            assert(tok_range.start >= prev_end &&
                   "Token range should not overlap or be out of order");
            if (tok_range.start >= prev_end) {
                gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
            }
            prev_end = tok_range.start + tok_range.length;
        }
        return gaps;
    }

    // Return sorted list of semi-preferring semi_nl nodes.
    std::vector<uint32_t> compute_preferred_semi_locations() const {
        std::vector<uint32_t> result;
        auto mark_semi_from_input = [&](const optional_t<semi_nl_t> &n) {
            if (n && n->has_source() && substr(n->range) == L";") {
                result.push_back(n->range.start);
            }
        };

        // andor_job_lists get semis if the input uses semis.
        for (const auto &node : ast) {
            // See if we have a condition and an andor_job_list.
            const optional_t<semi_nl_t> *condition = nullptr;
            const andor_job_list_t *andors = nullptr;
            if (const auto *ifc = node.try_as<if_clause_t>()) {
                condition = &ifc->condition.semi_nl;
                andors = &ifc->andor_tail;
            } else if (const auto *wc = node.try_as<while_header_t>()) {
                condition = &wc->condition.semi_nl;
                andors = &wc->andor_tail;
            }

            // If there is no and-or tail then we always use a newline.
            if (andors && andors->count() > 0) {
                if (condition) mark_semi_from_input(*condition);
                // Mark all but last of the andor list.
                for (uint32_t i = 0; i + 1 < andors->count(); i++) {
                    mark_semi_from_input(andors->at(i)->job.semi_nl);
                }
            }
        }

        // `x ; and y` gets semis if it has them already, and they are on the same line.
        for (const auto &node : ast) {
            if (const auto *job_list = node.try_as<job_list_t>()) {
                const semi_nl_t *prev_job_semi_nl = nullptr;
                for (const job_conjunction_t &job : *job_list) {
                    // Set up prev_job_semi_nl for the next iteration to make control flow easier.
                    const semi_nl_t *prev = prev_job_semi_nl;
                    prev_job_semi_nl = job.semi_nl.contents.get();

                    // Is this an 'and' or 'or' job?
                    if (!job.decorator) continue;

                    // Now see if we want to mark 'prev' as allowing a semi.
                    // Did we have a previous semi_nl which was a newline?
                    if (!prev || substr(prev->range) != L";") continue;

                    // Is there a newline between them?
                    assert(prev->range.start <= job.decorator->range.start &&
                           "Ranges out of order");
                    auto start = source.begin() + prev->range.start;
                    auto end = source.begin() + job.decorator->range.end();
                    if (std::find(start, end, L'\n') == end) {
                        // We're going to allow the previous semi_nl to be a semi.
                        result.push_back(prev->range.start);
                    }
                }
            }
        }
        std::sort(result.begin(), result.end());
        return result;
    }

    // Emit a space or indent as necessary, depending on the previous output.
    void emit_space_or_indent(gap_flags_t flags = default_flags) {
        if (at_line_start()) {
            output.append(SPACES_PER_INDENT * current_indent, L' ');
        } else if (!(flags & skip_space) && !has_preceding_space()) {
            output.append(1, L' ');
        }
    }

    // Emit "gap text:" newlines and comments from the original source.
    // Gap text may be a few things:
    //
    // 1. Just a space is common. We will trim the spaces to be empty.
    //
    // Here the gap text is the comment, followed by the newline:
    //
    //    echo abc # arg
    //    echo def
    //
    // 2. It may also be an escaped newline:
    // Here the gap text is a space, backslash, newline, space.
    //
    //     echo \
    //       hi
    //
    // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
    //
    //   begin | stuff
    //
    //  We do not handle errors here - instead our caller does.
    bool emit_gap_text(source_range_t range, gap_flags_t flags) {
        wcstring gap_text = substr(range);
        // Common case: if we are only spaces, do nothing.
        if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;

        // Look to see if there is an escaped newline.
        // Emit it if either we allow it, or it comes before the first comment.
        // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
        // text - we already know it has no semantic significance.
        size_t escaped_nl = gap_text.find(L"\\\n");
        if (escaped_nl != wcstring::npos) {
            size_t comment_idx = gap_text.find(L'#');
            if ((flags & allow_escaped_newlines) ||
                (comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
                // Emit a space before the escaped newline.
                if (!at_line_start() && !has_preceding_space()) {
                    output.append(L" ");
                }
                output.append(L"\\\n");
                // Indent the continuation line and any leading comments (#7252).
                // Use the indentation level of the next newline.
                current_indent = indents.at(range.start + escaped_nl + 1);
                emit_space_or_indent();
            }
        }

        // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
        // always emit one.
        bool needs_nl = false;

        tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
        while (maybe_t<tok_t> tok = tokenizer.next()) {
            wcstring tok_text = tokenizer.text_of(*tok);

            if (needs_nl) {
                emit_newline();
                needs_nl = false;
                if (tok_text == L"\n") continue;
            } else if (gap_text_mask_newline) {
                // We only respect mask_newline the first time through the loop.
                gap_text_mask_newline = false;
                if (tok_text == L"\n") continue;
            }

            if (tok->type == token_type_t::comment) {
                emit_space_or_indent();
                output.append(tok_text);
                needs_nl = true;
            } else if (tok->type == token_type_t::end) {
                // This may be either a newline or semicolon.
                // Semicolons found here are not part of the ast and can simply be removed.
                // Newlines are preserved unless mask_newline is set.
                if (tok_text == L"\n") {
                    emit_newline();
                }
            } else {
                fprintf(stderr,
                        "Gap text should only have comments and newlines - instead found token "
                        "type %d with text: %ls\n",
                        (int)tok->type, tok_text.c_str());
                DIE("Gap text should only have comments and newlines");
            }
        }
        if (needs_nl) emit_newline();
        return needs_nl;
    }

    /// \return the gap text ending at a given index into the string, or empty if none.
    source_range_t gap_text_to(uint32_t end) const {
        auto where = std::lower_bound(
            gaps.begin(), gaps.end(), end,
            [](source_range_t r, uint32_t end) { return r.start + r.length < end; });
        if (where == gaps.end() || where->start + where->length != end) {
            // Not found.
            return source_range_t{0, 0};
        } else {
            return *where;
        }
    }

    /// \return whether a range \p r overlaps an error range from our ast.
    bool range_contained_error(source_range_t r) const {
        const auto &errs = ast.extras().errors;
        auto range_is_before = [](source_range_t x, source_range_t y) {
            return x.start + x.length <= y.start;
        };
        assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
               "Error ranges should be sorted");
        return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
    }

    // Emit the gap text before a source range.
    bool emit_gap_text_before(source_range_t r, gap_flags_t flags) {
        assert(r.start <= source.size() && "source out of bounds");
        bool added_newline = false;

        // Find the gap text which ends at start.
        source_range_t range = gap_text_to(r.start);
        if (range.length > 0) {
            // Set the indent from the beginning of this gap text.
            // For example:
            // begin
            //    cmd
            //    # comment
            // end
            // Here the comment is the gap text before the end, but we want the indent from the
            // command.
            if (range.start < indents.size()) current_indent = indents.at(range.start);

            // If this range contained an error, append the gap text without modification.
            // For example in: echo foo "
            // We don't want to mess with the quote.
            if (range_contained_error(range)) {
                output.append(substr(range));
            } else {
                added_newline = emit_gap_text(range, flags);
            }
        }
        // Always clear gap_text_mask_newline after emitting even empty gap text.
        gap_text_mask_newline = false;
        return added_newline;
    }

    /// Given a string \p input, remove unnecessary quotes, etc.
    wcstring clean_text(const wcstring &input) {
        // Unescape the string - this leaves special markers around if there are any
        // expansions or anything. We specifically tell it to not compute backslash-escapes
        // like \U or \x, because we want to leave them intact.
        wcstring unescaped = input;
        unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);

        // Remove INTERNAL_SEPARATOR because that's a quote.
        auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
        unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());

        // If no non-"good" char is left, use the unescaped version.
        // This can be extended to other characters, but giving the precise list is tough,
        // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
        // people feel more at ease.
        auto goodchars = [](wchar_t ch) {
            return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
        };
        if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
            !unescaped.empty()) {
            return unescaped;
        } else {
            return input;
        }
    }

    // Emit a range of original text. This indents as needed, and also inserts preceding gap text.
    // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
    // lines.
    void emit_text(source_range_t r, gap_flags_t flags) {
        emit_gap_text_before(r, flags);
        current_indent = indents.at(r.start);
        if (r.length > 0) {
            emit_space_or_indent(flags);
            output.append(clean_text(substr(r)));
        }
    }

    template <type_t Type>
    void emit_node_text(const leaf_t<Type> &node) {
        source_range_t range = node.range;

        // Weird special-case: a token may end in an escaped newline. Notably, the newline is
        // not part of the following gap text, handle indentation here (#8197).
        bool ends_with_escaped_nl = node.range.length >= 2 &&
                                    source.at(node.range.end() - 2) == L'\\' &&
                                    source.at(node.range.end() - 1) == L'\n';
        if (ends_with_escaped_nl) {
            range = {range.start, range.length - 2};
        }

        emit_text(range, gap_text_flags_before_node(node));

        if (ends_with_escaped_nl) {
            // By convention, escaped newlines are preceded with a space.
            output.append(L" \\\n");
            // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
            // The cases where this matters are probably very rare.
            current_indent++;
            emit_space_or_indent();
            current_indent--;
        }
    }

    // Emit one newline.
    void emit_newline() { output.push_back(L'\n'); }

    // Emit a semicolon.
    void emit_semi() { output.push_back(L';'); }

    // For branch and list nodes, default is to visit their children.
    template <typename Node>
    enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
        node_visitor(*this).accept_children_of(node);
    }

    template <typename Node>
    enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
        node_visitor(*this).accept_children_of(node);
    }

    // Leaf nodes we just visit their text.
    void visit(const keyword_base_t &node) { emit_node_text(node); }
    void visit(const token_base_t &node) { emit_node_text(node); }
    void visit(const argument_t &node) { emit_node_text(node); }
    void visit(const variable_assignment_t &node) { emit_node_text(node); }

    void visit(const semi_nl_t &node) {
        // These are semicolons or newlines which are part of the ast. That means it includes e.g.
        // ones terminating a job or 'if' header, but not random semis in job lists. We respect
        // preferred_semi_locations to decide whether or not these should stay as newlines or
        // become semicolons.

        // Check if we should prefer a semicolon.
        bool prefer_semi = node.range.length > 0 &&
                           std::binary_search(preferred_semi_locations.begin(),
                                              preferred_semi_locations.end(), node.range.start);
        emit_gap_text_before(node.range, gap_text_flags_before_node(node));

        // Don't emit anything if the gap text put us on a newline (because it had a comment).
        if (!at_line_start()) {
            prefer_semi ? emit_semi() : emit_newline();

            // If it was a semi but we emitted a newline, swallow a subsequent newline.
            if (!prefer_semi && substr(node.range) == L";") {
                gap_text_mask_newline = true;
            }
        }
    }

    void visit(const redirection_t &node) {
        // No space between a redirection operator and its target (#2899).
        emit_text(node.oper.range, default_flags);
        emit_text(node.target.range, skip_space);
    }

    void visit(const maybe_newlines_t &node) {
        // Our newlines may have comments embedded in them, example:
        //    cmd |
        //    # something
        //    cmd2
        // Treat it as gap text.
        if (node.range.length > 0) {
            auto flags = gap_text_flags_before_node(node);
            current_indent = indents.at(node.range.start);
            bool added_newline = emit_gap_text_before(node.range, flags);
            source_range_t gap_range = node.range;
            if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
                gap_range.start++;
            }
            emit_gap_text(gap_range, flags);
        }
    }

    void visit(const begin_header_t &node) {
        // 'begin' does not require a newline after it, but we insert one.
        node_visitor(*this).accept_children_of(node);
        if (!at_line_start()) {
            emit_newline();
        }
    }

    // The flags we use to parse.
    static parse_tree_flags_t parse_flags() {
        return parse_flag_continue_after_error | parse_flag_include_comments |
               parse_flag_leave_unterminated | parse_flag_show_blank_lines;
    }
};
}  // namespace

static const char *highlight_role_to_string(highlight_role_t role) {
#define TEST_ROLE(x)          \
    case highlight_role_t::x: \
        return #x;
    switch (role) {
        TEST_ROLE(normal)
        TEST_ROLE(error)
        TEST_ROLE(command)
        TEST_ROLE(keyword)
        TEST_ROLE(statement_terminator)
        TEST_ROLE(param)
        TEST_ROLE(option)
        TEST_ROLE(comment)
        TEST_ROLE(search_match)
        TEST_ROLE(operat)
        TEST_ROLE(escape)
        TEST_ROLE(quote)
        TEST_ROLE(redirection)
        TEST_ROLE(autosuggestion)
        TEST_ROLE(selection)
        TEST_ROLE(pager_progress)
        TEST_ROLE(pager_background)
        TEST_ROLE(pager_prefix)
        TEST_ROLE(pager_completion)
        TEST_ROLE(pager_description)
        TEST_ROLE(pager_secondary_background)
        TEST_ROLE(pager_secondary_prefix)
        TEST_ROLE(pager_secondary_completion)
        TEST_ROLE(pager_secondary_description)
        TEST_ROLE(pager_selected_background)
        TEST_ROLE(pager_selected_prefix)
        TEST_ROLE(pager_selected_completion)
        TEST_ROLE(pager_selected_description)
        default:
            DIE("UNKNOWN ROLE");
    }
#undef TEST_ROLE
}

// Entry point for Pygments CSV output.
// Our output is a newline-separated string.
// Each line is of the form `start,end,role`
// start and end is the half-open token range, value is a string from highlight_role_t.
// Example:
// 3,7,command
static std::string make_pygments_csv(const wcstring &src) {
    const size_t len = src.size();
    std::vector<highlight_spec_t> colors;
    highlight_shell(src, colors, operation_context_t::globals());
    assert(colors.size() == len && "Colors and src should have same size");

    struct token_range_t {
        unsigned long start;
        unsigned long end;
        highlight_role_t role;
    };

    std::vector<token_range_t> token_ranges;
    for (size_t i = 0; i < len; i++) {
        highlight_role_t role = colors.at(i).foreground;
        // See if we can extend the last range.
        if (!token_ranges.empty()) {
            auto &last = token_ranges.back();
            if (last.role == role && last.end == i) {
                last.end = i + 1;
                continue;
            }
        }
        // We need a new range.
        token_ranges.push_back(token_range_t{i, i + 1, role});
    }

    // Now render these to a string.
    std::string result;
    for (const auto &range : token_ranges) {
        char buff[128];
        snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
                 highlight_role_to_string(range.role));
        result.append(buff);
    }
    return result;
}

// Entry point for prettification.
static wcstring prettify(const wcstring &src, bool do_indent) {
    if (dump_parse_tree) {
        auto ast =
            ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
                                       parse_flag_show_extra_semis);
        wcstring ast_dump = ast.dump(src);
        std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
    }

    pretty_printer_t printer{src, do_indent};
    wcstring output = printer.prettify();
    return output;
}

/// Given a string and list of colors of the same size, return the string with HTML span elements
/// for the various colors.
static const wchar_t *html_class_name_for_color(highlight_spec_t spec) {
#define P(x) L"fish_color_" #x
    switch (spec.foreground) {
        case highlight_role_t::normal: {
            return P(normal);
        }
        case highlight_role_t::error: {
            return P(error);
        }
        case highlight_role_t::command: {
            return P(command);
        }
        case highlight_role_t::statement_terminator: {
            return P(statement_terminator);
        }
        case highlight_role_t::param: {
            return P(param);
        }
        case highlight_role_t::option: {
            return P(option);
        }
        case highlight_role_t::comment: {
            return P(comment);
        }
        case highlight_role_t::search_match: {
            return P(search_match);
        }
        case highlight_role_t::operat: {
            return P(operator);
        }
        case highlight_role_t::escape: {
            return P(escape);
        }
        case highlight_role_t::quote: {
            return P(quote);
        }
        case highlight_role_t::redirection: {
            return P(redirection);
        }
        case highlight_role_t::autosuggestion: {
            return P(autosuggestion);
        }
        case highlight_role_t::selection: {
            return P(selection);
        }
        default: {
            return P(other);
        }
    }
}

static std::string html_colorize(const wcstring &text,
                                 const std::vector<highlight_spec_t> &colors) {
    if (text.empty()) {
        return "";
    }

    assert(colors.size() == text.size());
    wcstring html = L"<pre><code>";
    highlight_spec_t last_color = highlight_role_t::normal;
    for (size_t i = 0; i < text.size(); i++) {
        // Handle colors.
        highlight_spec_t color = colors.at(i);
        if (i > 0 && color != last_color) {
            html.append(L"</span>");
        }
        if (i == 0 || color != last_color) {
            append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
        }
        last_color = color;

        // Handle text.
        wchar_t wc = text.at(i);
        switch (wc) {
            case L'&': {
                html.append(L"&amp;");
                break;
            }
            case L'\'': {
                html.append(L"&apos;");
                break;
            }
            case L'"': {
                html.append(L"&quot;");
                break;
            }
            case L'<': {
                html.append(L"&lt;");
                break;
            }
            case L'>': {
                html.append(L"&gt;");
                break;
            }
            default: {
                html.push_back(wc);
                break;
            }
        }
    }
    html.append(L"</span></code></pre>");
    return wcs2string(html);
}

static std::string no_colorize(const wcstring &text) { return wcs2string(text); }

int main(int argc, char *argv[]) {
    program_name = L"fish_indent";
    set_main_thread();
    setup_fork_guards();
    // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
    // because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
    //
    // TODO: Auto-detect the encoding of the script. We should look for a vim style comment
    // (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
    // (e.g., "# -*- coding: <encoding-name> -*-").
    setlocale(LC_ALL, "");
    env_init();

    if (auto features_var = env_stack_t::globals().get(L"fish_features")) {
        for (const wcstring &s : features_var->as_list()) {
            mutable_fish_features().set_from_string(s);
        }
    }

    // Types of output we support.
    enum {
        output_type_plain_text,
        output_type_file,
        output_type_ansi,
        output_type_pygments_csv,
        output_type_check,
        output_type_html
    } output_type = output_type_plain_text;
    const char *output_location = "";
    bool do_indent = true;
    // File path for debug output.
    std::string debug_output;

    const char *short_opts = "+d:hvwicD:";
    const struct option long_opts[] = {{"debug", required_argument, nullptr, 'd'},
                                       {"debug-output", required_argument, nullptr, 'o'},
                                       {"debug-stack-frames", required_argument, nullptr, 'D'},
                                       {"dump-parse-tree", no_argument, nullptr, 'P'},
                                       {"no-indent", no_argument, nullptr, 'i'},
                                       {"help", no_argument, nullptr, 'h'},
                                       {"version", no_argument, nullptr, 'v'},
                                       {"write", no_argument, nullptr, 'w'},
                                       {"html", no_argument, nullptr, 1},
                                       {"ansi", no_argument, nullptr, 2},
                                       {"pygments", no_argument, nullptr, 3},
                                       {"check", no_argument, nullptr, 'c'},
                                       {}};

    int opt;
    while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) {
        switch (opt) {
            case 'P': {
                dump_parse_tree = true;
                break;
            }
            case 'h': {
                print_help("fish_indent", 1);
                exit(0);
            }
            case 'v': {
                std::fwprintf(stdout, _(L"%ls, version %s\n"), program_name, get_fish_version());
                exit(0);
            }
            case 'w': {
                output_type = output_type_file;
                break;
            }
            case 'i': {
                do_indent = false;
                break;
            }
            case 1: {
                output_type = output_type_html;
                break;
            }
            case 2: {
                output_type = output_type_ansi;
                break;
            }
            case 3: {
                output_type = output_type_pygments_csv;
                break;
            }
            case 'c': {
                output_type = output_type_check;
                break;
            }
            case 'd': {
                activate_flog_categories_by_pattern(str2wcstring(optarg));
                for (auto cat : get_flog_categories()) {
                    if (cat->enabled) {
                        std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name);
                    }
                }
                break;
            }
            case 'D': {
                // TODO: Option is currently useless.
                // Either remove it or make it work with FLOG.
                break;
            }
            case 'o': {
                debug_output = optarg;
                break;
            }
            default: {
                // We assume getopt_long() has already emitted a diagnostic msg.
                exit(1);
            }
        }
    }

    argc -= optind;
    argv += optind;

    // Direct any debug output right away.
    FILE *debug_output_file = nullptr;
    if (!debug_output.empty()) {
        debug_output_file = fopen(debug_output.c_str(), "w");
        if (!debug_output_file) {
            fprintf(stderr, "Could not open file %s\n", debug_output.c_str());
            perror("fopen");
            exit(-1);
        }
        set_cloexec(fileno(debug_output_file));
        setlinebuf(debug_output_file);
        set_flog_output_file(debug_output_file);
    }

    int retval = 0;

    wcstring src;
    for (int i = 0; i < argc || (argc == 0 && i == 0); i++) {
        if (argc == 0 && i == 0) {
            if (output_type == output_type_file) {
                std::fwprintf(
                    stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"),
                    program_name);
                exit(1);
            }
            src = read_file(stdin);
        } else {
            FILE *fh = fopen(argv[i], "r");
            if (fh) {
                src = read_file(fh);
                fclose(fh);
                output_location = argv[i];
            } else {
                std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), argv[i],
                              std::strerror(errno));
                exit(1);
            }
        }

        if (output_type == output_type_pygments_csv) {
            std::string output = make_pygments_csv(src);
            fputs(output.c_str(), stdout);
            continue;
        }

        const wcstring output_wtext = prettify(src, do_indent);

        // Maybe colorize.
        std::vector<highlight_spec_t> colors;
        if (output_type != output_type_plain_text) {
            highlight_shell(output_wtext, colors, operation_context_t::globals());
        }

        std::string colored_output;
        switch (output_type) {
            case output_type_plain_text: {
                colored_output = no_colorize(output_wtext);
                break;
            }
            case output_type_file: {
                FILE *fh = fopen(output_location, "w");
                if (fh) {
                    std::fputws(output_wtext.c_str(), fh);
                    fclose(fh);
                } else {
                    std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location,
                                  std::strerror(errno));
                    exit(1);
                }
                break;
            }
            case output_type_ansi: {
                colored_output = colorize(output_wtext, colors, env_stack_t::globals());
                break;
            }
            case output_type_html: {
                colored_output = html_colorize(output_wtext, colors);
                break;
            }
            case output_type_pygments_csv: {
                DIE("pygments_csv should have been handled above");
            }
            case output_type_check: {
                if (output_wtext != src) {
                    if (argc) {
                        std::fwprintf(stderr, _(L"%s\n"), argv[i]);
                    }
                    retval++;
                }
                break;
            }
        }

        std::fputws(str2wcstring(colored_output).c_str(), stdout);
    }
    return retval;
}