fish-shell/src/fish_indent.cpp
Fabian Homborg b25f72f391 Teach fish_indent to remove useless quotes
This tries to see if quotes guard some expansion from happening. If it
detects a "weird" character it'll leave the quotes in place, even in
some cases where it might not trigger.

So

    for i in 'c' 'color'

turns into

    for i in c color

The rationale here is that these quotes are useless, wasting
space (and line length), but more importantly that they are
superstitions. They don't do anything, but look like they do.

The counter argument is that they can be kept in case of later
changes, or that they make the intent clear - "this is supposed to be
a string we pass".
2020-03-09 19:46:43 +01:00

696 lines
24 KiB
C++

// The fish_indent program.
/*
Copyright (C) 2014 ridiculous_fish
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "config.h" // IWYU pragma: keep
#include <errno.h>
#include <getopt.h>
#include <locale.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <wctype.h>
#include <cstring>
#include <cwchar>
#include <memory>
#include <stack>
#include <string>
#include <tuple>
#include <vector>
#include "color.h"
#include "common.h"
#include "env.h"
#include "expand.h"
#include "fish_version.h"
#include "flog.h"
#include "highlight.h"
#include "operation_context.h"
#include "output.h"
#include "parse_constants.h"
#include "print_help.h"
#include "tnode.h"
#include "wutil.h" // IWYU pragma: keep
#define SPACES_PER_INDENT 4
// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc.
using indent_t = unsigned int;
static bool dump_parse_tree = false;
static int ret = 0;
// Read the entire contents of a file into the specified string.
static wcstring read_file(FILE *f) {
wcstring result;
while (true) {
wint_t c = std::fgetwc(f);
if (c == WEOF) {
if (ferror(f)) {
if (errno == EILSEQ) {
// Illegal byte sequence. Try to skip past it.
clearerr(f);
int ch = fgetc(f); // for printing the warning, and seeks forward 1 byte.
FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch);
ret = 1;
continue;
} else {
wperror(L"fgetwc");
exit(1);
}
}
break;
}
result.push_back(static_cast<wchar_t>(c));
}
return result;
}
struct prettifier_t {
// Original source.
const wcstring &source;
// The prettifier output.
wcstring output;
// Whether to indent, or just insert spaces.
const bool do_indent;
// Whether we are at the beginning of a new line.
bool has_new_line = true;
// Whether the last token was a semicolon.
bool last_was_semicolon = false;
// Whether we need to append a continuation new line before continuing.
bool needs_continuation_newline = false;
// Additional indentation due to line continuation (escaped newline)
uint32_t line_continuation_indent = 0;
prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {}
void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent,
parse_token_type_t parent_type);
void maybe_prepend_escaped_newline(const parse_node_t &node) {
if (node.has_preceding_escaped_newline()) {
output.append(L" \\");
append_newline(true);
}
}
void append_newline(bool is_continuation = false) {
output.push_back('\n');
has_new_line = true;
needs_continuation_newline = false;
line_continuation_indent = is_continuation ? 1 : 0;
}
// Append whitespace as necessary. If we have a newline, append the appropriate indent.
// Otherwise, append a space.
void append_whitespace(indent_t node_indent) {
if (needs_continuation_newline) {
append_newline(true);
}
if (!has_new_line) {
output.push_back(L' ');
} else if (do_indent) {
output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' ');
}
}
};
// Dump a parse tree node in a form helpful to someone debugging the behavior of this program.
static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) {
wchar_t nextc = L' ';
wchar_t prevc = L' ';
wcstring source_txt;
if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) {
int nextc_idx = node.source_start + node.source_length;
if (static_cast<size_t>(nextc_idx) < source.size()) {
nextc = source[node.source_start + node.source_length];
}
if (node.source_start > 0) prevc = source[node.source_start - 1];
source_txt = source.substr(node.source_start, node.source_length);
}
wchar_t prevc_str[4] = {prevc, 0, 0, 0};
wchar_t nextc_str[4] = {nextc, 0, 0, 0};
if (prevc < L' ') {
prevc_str[0] = L'\\';
prevc_str[1] = L'c';
prevc_str[2] = prevc + '@';
}
if (nextc < L' ') {
nextc_str[0] = L'\\';
nextc_str[1] = L'c';
nextc_str[2] = nextc + '@';
}
std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n",
node.source_start, node.source_length, node_indent,
keyword_description(node.keyword), token_type_description(node.type), prevc_str,
source_txt.c_str(), nextc_str);
}
void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx,
indent_t node_indent, parse_token_type_t parent_type) {
// Use an explicit stack to avoid stack overflow.
struct pending_node_t {
node_offset_t index;
indent_t indent;
parse_token_type_t parent_type;
};
std::stack<pending_node_t> pending_node_stack;
pending_node_stack.push({node_idx, node_indent, parent_type});
while (!pending_node_stack.empty()) {
pending_node_t args = pending_node_stack.top();
pending_node_stack.pop();
auto node_idx = args.index;
auto node_indent = args.indent;
auto parent_type = args.parent_type;
const parse_node_t &node = tree.at(node_idx);
const parse_token_type_t node_type = node.type;
const parse_token_type_t prev_node_type =
node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid;
// Increment the indent if we are either a root job_list, or root case_item_list, or in an
// if or while header (#1665).
const bool is_root_job_list =
node_type == symbol_job_list && parent_type != symbol_job_list;
const bool is_root_case_list =
node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
const bool is_if_while_header =
(node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) &&
(parent_type == symbol_if_clause || parent_type == symbol_while_header);
if (is_root_job_list || is_root_case_list || is_if_while_header) {
node_indent += 1;
}
if (dump_parse_tree) dump_node(node_indent, node, source);
// Prepend any escaped newline.
maybe_prepend_escaped_newline(node);
// handle comments, which come before the text
if (node.has_comments()) {
auto comment_nodes = tree.comment_nodes_for_node(node);
for (const auto &comment : comment_nodes) {
maybe_prepend_escaped_newline(*comment.node());
append_whitespace(node_indent);
auto source_range = comment.source_range();
output.append(source, source_range->start, source_range->length);
needs_continuation_newline = true;
}
}
if (node_type == parse_token_type_end) {
// For historical reasons, semicolon also get "TOK_END".
// We need to distinguish between them, because otherwise `a;;;;` gets extra lines
// instead of the semicolons. Semicolons are just ignored, unless they are followed by a
// command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a
// newline.
last_was_semicolon = false;
if (node.get_source(source) == L"\n") {
append_newline();
} else if (!has_new_line) {
// The semicolon is only useful if we haven't just had a newline.
last_was_semicolon = true;
}
} else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) ||
node_type == parse_special_type_parse_error) {
if (last_was_semicolon) {
// We keep the semicolon for `; and` and `; or`,
// others we turn into newlines.
if (node.keyword != parse_keyword_and && node.keyword != parse_keyword_or) {
append_newline();
} else {
output.push_back(L';');
}
last_was_semicolon = false;
}
if (node.has_source()) {
// Some type representing a particular token.
if (prev_node_type != parse_token_type_redirection) {
append_whitespace(node_indent);
}
wcstring unescaped{source, node.source_start, node.source_length};
// Unescape the string - this leaves special markers around if there are any expansions or anything.
// TODO: This also already computes backslash-escapes like \u or \x.
// We probably don't want that - if someone picked `\x20` to spell space, they have a reason.
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote),
unescaped.end());
// If no non-alphanumeric char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes people feel more at ease.
if (std::find_if_not(unescaped.begin(), unescaped.end(), fish_iswalnum) == unescaped.end() && !unescaped.empty()) {
output.append(unescaped);
} else {
output.append(source, node.source_start, node.source_length);
}
has_new_line = false;
}
}
// Put all children in stack in reversed order
// This way they will be processed in correct order.
for (node_offset_t idx = node.child_count; idx > 0; idx--) {
// Note: We pass our type to our child, which becomes its parent node type.
// Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe
// because we won't execute this call in that case since node.child_count should be
// zero.
pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type});
}
}
}
static const char *highlight_role_to_string(highlight_role_t role) {
#define TEST_ROLE(x) \
case highlight_role_t::x: \
return #x;
switch (role) {
TEST_ROLE(normal)
TEST_ROLE(error)
TEST_ROLE(command)
TEST_ROLE(statement_terminator)
TEST_ROLE(param)
TEST_ROLE(comment)
TEST_ROLE(match)
TEST_ROLE(search_match)
TEST_ROLE(operat)
TEST_ROLE(escape)
TEST_ROLE(quote)
TEST_ROLE(redirection)
TEST_ROLE(autosuggestion)
TEST_ROLE(selection)
TEST_ROLE(pager_progress)
TEST_ROLE(pager_background)
TEST_ROLE(pager_prefix)
TEST_ROLE(pager_completion)
TEST_ROLE(pager_description)
TEST_ROLE(pager_secondary_background)
TEST_ROLE(pager_secondary_prefix)
TEST_ROLE(pager_secondary_completion)
TEST_ROLE(pager_secondary_description)
TEST_ROLE(pager_selected_background)
TEST_ROLE(pager_selected_prefix)
TEST_ROLE(pager_selected_completion)
TEST_ROLE(pager_selected_description)
default:
DIE("UNKNOWN ROLE");
}
#undef TEST_ROLE
}
// Entry point for Pygments CSV output.
// Our output is a newline-separated string.
// Each line is of the form `start,end,role`
// start and end is the half-open token range, value is a string from highlight_role_t.
// Example:
// 3,7,command
static std::string make_pygments_csv(const wcstring &src) {
const size_t len = src.size();
std::vector<highlight_spec_t> colors;
highlight_shell_no_io(src, colors, src.size(), operation_context_t::globals());
assert(colors.size() == len && "Colors and src should have same size");
struct token_range_t {
unsigned long start;
unsigned long end;
highlight_role_t role;
};
std::vector<token_range_t> token_ranges;
for (size_t i = 0; i < len; i++) {
highlight_role_t role = colors.at(i).foreground;
// See if we can extend the last range.
if (!token_ranges.empty()) {
auto &last = token_ranges.back();
if (last.role == role && last.end == i) {
last.end = i + 1;
continue;
}
}
// We need a new range.
token_ranges.push_back(token_range_t{i, i + 1, role});
}
// Now render these to a string.
std::string result;
for (const auto &range : token_ranges) {
char buff[128];
snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
highlight_role_to_string(range.role));
result.append(buff);
}
return result;
}
// Entry point for prettification.
static wcstring prettify(const wcstring &src, bool do_indent) {
parse_node_tree_t parse_tree;
int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines);
if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) {
return src; // we return the original string on failure
}
if (dump_parse_tree) {
const wcstring dump = parse_dump_tree(parse_tree, src);
std::fwprintf(stderr, L"%ls\n", dump.c_str());
}
// We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
// that have no parent, and all parse errors.
prettifier_t prettifier{src, do_indent};
for (node_offset_t i = 0; i < parse_tree.size(); i++) {
const parse_node_t &node = parse_tree.at(i);
if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) {
// A root node.
prettifier.prettify_node(parse_tree, i, 0, symbol_job_list);
}
}
return std::move(prettifier.output);
}
/// Given a string and list of colors of the same size, return the string with HTML span elements
/// for the various colors.
static const wchar_t *html_class_name_for_color(highlight_spec_t spec) {
#define P(x) L"fish_color_" #x
switch (spec.foreground) {
case highlight_role_t::normal: {
return P(normal);
}
case highlight_role_t::error: {
return P(error);
}
case highlight_role_t::command: {
return P(command);
}
case highlight_role_t::statement_terminator: {
return P(statement_terminator);
}
case highlight_role_t::param: {
return P(param);
}
case highlight_role_t::comment: {
return P(comment);
}
case highlight_role_t::match: {
return P(match);
}
case highlight_role_t::search_match: {
return P(search_match);
}
case highlight_role_t::operat: {
return P(operator);
}
case highlight_role_t::escape: {
return P(escape);
}
case highlight_role_t::quote: {
return P(quote);
}
case highlight_role_t::redirection: {
return P(redirection);
}
case highlight_role_t::autosuggestion: {
return P(autosuggestion);
}
case highlight_role_t::selection: {
return P(selection);
}
default: {
return P(other);
}
}
}
static std::string html_colorize(const wcstring &text,
const std::vector<highlight_spec_t> &colors) {
if (text.empty()) {
return "";
}
assert(colors.size() == text.size());
wcstring html = L"<pre><code>";
highlight_spec_t last_color = highlight_role_t::normal;
for (size_t i = 0; i < text.size(); i++) {
// Handle colors.
highlight_spec_t color = colors.at(i);
if (i > 0 && color != last_color) {
html.append(L"</span>");
}
if (i == 0 || color != last_color) {
append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
}
last_color = color;
// Handle text.
wchar_t wc = text.at(i);
switch (wc) {
case L'&': {
html.append(L"&amp;");
break;
}
case L'\'': {
html.append(L"&apos;");
break;
}
case L'"': {
html.append(L"&quot;");
break;
}
case L'<': {
html.append(L"&lt;");
break;
}
case L'>': {
html.append(L"&gt;");
break;
}
default: {
html.push_back(wc);
break;
}
}
}
html.append(L"</span></code></pre>");
return wcs2string(html);
}
static std::string no_colorize(const wcstring &text) { return wcs2string(text); }
int main(int argc, char *argv[]) {
program_name = L"fish_indent";
set_main_thread();
setup_fork_guards();
// Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
// because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
//
// TODO: Auto-detect the encoding of the script. We should look for a vim style comment
// (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
// (e.g., "# -*- coding: <encoding-name> -*-").
setlocale(LC_ALL, "");
env_init();
// Types of output we support.
enum {
output_type_plain_text,
output_type_file,
output_type_ansi,
output_type_pygments_csv,
output_type_html
} output_type = output_type_plain_text;
const char *output_location = "";
bool do_indent = true;
const char *short_opts = "+d:hvwiD:";
const struct option long_opts[] = {{"debug-level", required_argument, nullptr, 'd'},
{"debug-stack-frames", required_argument, nullptr, 'D'},
{"dump-parse-tree", no_argument, nullptr, 'P'},
{"no-indent", no_argument, nullptr, 'i'},
{"help", no_argument, nullptr, 'h'},
{"version", no_argument, nullptr, 'v'},
{"write", no_argument, nullptr, 'w'},
{"html", no_argument, nullptr, 1},
{"ansi", no_argument, nullptr, 2},
{"pygments", no_argument, nullptr, 3},
{nullptr, 0, nullptr, 0}};
int opt;
while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) {
switch (opt) {
case 'P': {
dump_parse_tree = true;
break;
}
case 'h': {
print_help("fish_indent", 1);
exit(0);
break;
}
case 'v': {
std::fwprintf(stderr, _(L"%ls, version %s\n"), program_name, get_fish_version());
exit(0);
break;
}
case 'w': {
output_type = output_type_file;
break;
}
case 'i': {
do_indent = false;
break;
}
case 1: {
output_type = output_type_html;
break;
}
case 2: {
output_type = output_type_ansi;
break;
}
case 3: {
output_type = output_type_pygments_csv;
break;
}
case 'd': {
char *end;
long tmp;
errno = 0;
tmp = strtol(optarg, &end, 10);
if (tmp >= 0 && tmp <= 10 && !*end && !errno) {
debug_level = static_cast<int>(tmp);
} else {
std::fwprintf(stderr, _(L"Invalid value '%s' for debug-level flag"), optarg);
exit(1);
}
break;
}
case 'D': {
char *end;
long tmp;
errno = 0;
tmp = strtol(optarg, &end, 10);
if (tmp > 0 && tmp <= 128 && !*end && !errno) {
set_debug_stack_frames(static_cast<int>(tmp));
} else {
std::fwprintf(stderr, _(L"Invalid value '%s' for debug-stack-frames flag"),
optarg);
exit(1);
}
break;
}
default: {
// We assume getopt_long() has already emitted a diagnostic msg.
exit(1);
break;
}
}
}
argc -= optind;
argv += optind;
wcstring src;
for (int i = 0; i < argc || (argc == 0 && i == 0); i++) {
if (argc == 0 && i == 0) {
if (output_type == output_type_file) {
std::fwprintf(
stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"),
program_name);
exit(1);
}
src = read_file(stdin);
} else {
FILE *fh = fopen(argv[i], "r");
if (fh) {
src = read_file(fh);
fclose(fh);
output_location = argv[i];
} else {
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), *argv,
std::strerror(errno));
exit(1);
}
}
if (output_type == output_type_pygments_csv) {
std::string output = make_pygments_csv(src);
fputs(output.c_str(), stdout);
continue;
}
const wcstring output_wtext = prettify(src, do_indent);
// Maybe colorize.
std::vector<highlight_spec_t> colors;
if (output_type != output_type_plain_text) {
highlight_shell_no_io(output_wtext, colors, output_wtext.size(),
operation_context_t::globals());
}
std::string colored_output;
switch (output_type) {
case output_type_plain_text: {
colored_output = no_colorize(output_wtext);
break;
}
case output_type_file: {
FILE *fh = fopen(output_location, "w");
if (fh) {
std::fputws(output_wtext.c_str(), fh);
fclose(fh);
} else {
std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location,
std::strerror(errno));
exit(1);
}
break;
}
case output_type_ansi: {
colored_output = colorize(output_wtext, colors);
break;
}
case output_type_html: {
colored_output = html_colorize(output_wtext, colors);
break;
}
case output_type_pygments_csv: {
DIE("pygments_csv should have been handled above");
break;
}
}
std::fputws(str2wcstring(colored_output).c_str(), stdout);
}
return 0;
}