Rewrite of fish_indent

Changes fish_indent to leverage new parse tree Also supports colorizing output via --html and --ansi flags.
2024-12-25 12:23:09 +00:00 · 2014-12-23 15:29:42 -08:00 · 2014-12-23 15:29:42 -08:00 · 8ba0ab2172
commit 8ba0ab2172
parent 96589920d5
3 changed files with 273 additions and 273 deletions
--- a/Makefile.in
+++ b/Makefile.in
@ -93,8 +93,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o		\
 	builtin_test.o parse_tree.o parse_productions.o parse_execution.o \
 	pager.o utf8.o fish_version.o wcstringutil.o

-FISH_INDENT_OBJS := fish_indent.o print_help.o common.o	\
-parser_keywords.o wutil.o tokenizer.o fish_version.o
+FISH_INDENT_OBJS := fish_indent.o print_help.o $(FISH_OBJS) 

 #
 # Additional files used by builtin.o
--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@ -1,5 +1,5 @@
 /*
-Copyright (C) 2005-2008 Axel Liljencrantz
+Copyright (C) 2014 ridiculous_fish

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License version 2 as
@ -15,7 +15,6 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

-
 /** \file fish_indent.cpp
  The fish_indent proegram.
 */
@ -25,33 +24,31 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include <stdlib.h>
 #include <stdio.h>
 #include <wchar.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
+#include <vector>
+#include <map>
 #ifdef HAVE_GETOPT_H
 #include <getopt.h>
 #endif
-#include <locale.h>

-#include "fallback.h"
-#include "util.h"
-#include "common.h"
 #include "wutil.h"
-#include "tokenizer.h"
+#include "common.h"
+#include "output.h"
+#include "screen.h"
+#include "env.h"
+#include "input.h"
+#include "parse_tree.h"
 #include "print_help.h"
-#include "parser_keywords.h"
 #include "fish_version.h"

-/**
-   The string describing the single-character options accepted by the main fish binary
-*/
-#define GETOPT_STRING "hvi"
+#define SPACES_PER_INDENT 4

-/**
-   Read the entire contents of a file into the specified string
- */
-static void read_file(FILE *f, wcstring &b)
+/* An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc. */
+typedef unsigned int indent_t;
+
+/* Read the entire contents of a file into the specified string */
+static wcstring read_file(FILE *f)
 {
+    wcstring result;
    while (1)
    {
        wint_t c = fgetwc(f);
@ -62,266 +59,255 @@ static void read_file(FILE *f, wcstring &b)
                wperror(L"fgetwc");
                exit(1);
            }
-
            break;
        }
-        b.push_back((wchar_t)c);
+        result.push_back((wchar_t)c);
    }
+    return result;
 }

-/**
-   Insert the specified number of tabs into the output buffer
- */
-static void insert_tabs(wcstring &out, int indent)
+/* Append whitespace as necessary. If we have a newline, append the appropriate indent. Otherwise, append a space. */
+static void append_whitespace(indent_t node_indent, bool do_indent, bool has_new_line, wcstring *out_result)
 {
-    if (indent > 0)
-        out.append((size_t)indent, L'\t');
-
-}
-
-/**
-   Indent the specified input
- */
-static int indent(wcstring &out, const wcstring &in, int flags)
-{
-    int res=0;
-    int is_command = 1;
-    int indent = 0;
-    int do_indent = 1;
-    int prev_type = 0;
-    int prev_prev_type = 0;
-
-    tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
-    for (; tok_has_next(&tok); tok_next(&tok))
+    if (! has_new_line)
    {
-        int type = tok_last_type(&tok);
-        const wchar_t *last = tok_last(&tok);
+        out_result->push_back(L' ');
+    }
+    else if (do_indent)
+    {
+        out_result->append(node_indent * SPACES_PER_INDENT, L' ');
+    }
+}

-        switch (type)
-        {
-            case TOK_STRING:
-            {
-                if (is_command)
-                {
-                    int next_indent = indent;
-                    is_command = 0;
-
-                    wcstring unesc;
-                    unescape_string(last, &unesc, UNESCAPE_SPECIAL);
-
-                    if (parser_keywords_is_block(unesc))
-                    {
-                        next_indent++;
-                    }
-                    else if (unesc == L"else")
-                    {
-                        indent--;
-                    }
-                    /* case should have the same indent level as switch*/
-                    else if (unesc == L"case")
-                    {
-                        indent--;
-                    }
-                    else if (unesc == L"end")
-                    {
-                        indent--;
-                        next_indent--;
-                    }
-
-
-                    if (do_indent && flags && prev_type != TOK_PIPE)
-                    {
-                        insert_tabs(out, indent);
-                    }
-
-                    append_format(out, L"%ls", last);
-
-                    indent = next_indent;
-
-                }
-                else
-                {
-                    if (prev_type != TOK_REDIRECT_FD)
-                        out.append(L" ");
-                    out.append(last);
-                }
-
-                break;
-            }
-
-            case TOK_END:
-            {
-                if (prev_type != TOK_END || prev_prev_type != TOK_END)
-                    out.append(L"\n");
-                do_indent = 1;
-                is_command = 1;
-                break;
-            }
-
-            case TOK_PIPE:
-            {
-                out.append(L" ");
-                if (last[0] == '2' && !last[1])
-                {
-                    out.append(L"^");
-                }
-                else if (last[0] != '1' || last[1])
-                {
-                    out.append(last);
-                    out.append(L">");
-                }
-                out.append(L" | ");
-                is_command = 1;
-                break;
-            }
-
-            case TOK_REDIRECT_OUT:
-            {
-                out.append(L" ");
-                if (wcscmp(last, L"2") == 0)
-                {
-                    out.append(L"^");
-                }
-                else
-                {
-                    if (wcscmp(last, L"1") != 0)
-                        out.append(last);
-                    out.append(L"> ");
-                }
-                break;
-            }
-
-            case TOK_REDIRECT_APPEND:
-            {
-                out.append(L" ");
-                if (wcscmp(last, L"2") == 0)
-                {
-                    out.append(L"^^");
-                }
-                else
-                {
-                    if (wcscmp(last, L"1") != 0)
-                        out.append(last);
-                    out.append(L">> ");
-                }
-                break;
-            }
-
-            case TOK_REDIRECT_IN:
-            {
-                out.append(L" ");
-                if (wcscmp(last, L"0") != 0)
-                    out.append(last);
-                out.append(L"< ");
-                break;
-            }
-
-            case TOK_REDIRECT_FD:
-            {
-                out.append(L" ");
-                if (wcscmp(last, L"1") != 0)
-                    out.append(last);
-                out.append(L">& ");
-                break;
-            }
-
-            case TOK_BACKGROUND:
-            {
-                out.append(L"&\n");
-                do_indent = 1;
-                is_command = 1;
-                break;
-            }
-
-            case TOK_COMMENT:
-            {
-                if (do_indent && flags)
-                {
-                    insert_tabs(out, indent);
-                }
-
-                append_format(out, L"%ls", last);
-                do_indent = 1;
-                break;
-            }
-
-            default:
-            {
-                debug(0, L"Unknown token '%ls'", last);
-                exit(1);
-            }
-        }
-
-        prev_prev_type = prev_type;
-        prev_type = type;
+static void prettify_node_recursive(const wcstring &source, const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent, parse_token_type_t parent_type, bool *has_new_line, wcstring *out_result, bool do_indent)
+{
+    const parse_node_t &node = tree.at(node_idx);
+    const parse_token_type_t node_type = node.type;

+    /* Increment the indent if we are either a root job_list, or root case_item_list */
+    const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list);
+    const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list);
+    if (is_root_job_list || is_root_case_item_list)
+    {
+        node_indent += 1;
    }

-    return res;
+    /* Handle comments, which come before the text */
+    if (node.has_comments())
+    {
+        const parse_node_tree_t::parse_node_list_t comment_nodes = tree.comment_nodes_for_node(node);
+        for (size_t i=0; i < comment_nodes.size(); i++)
+        {
+            const parse_node_t &comment_node = *comment_nodes.at(i);
+            append_whitespace(node_indent, do_indent, *has_new_line, out_result);
+            out_result->append(source, comment_node.source_start, comment_node.source_length);
+        }
+    }
+
+    if (node_type == parse_token_type_end)
+    {
+        /* Newline */
+        out_result->push_back(L'\n');
+        *has_new_line = true;
+    }
+    else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) || node_type == parse_special_type_parse_error)
+    {
+        if (node.has_source())
+        {
+            /* Some type representing a particular token */
+            append_whitespace(node_indent, do_indent, *has_new_line, out_result);
+            out_result->append(source, node.source_start, node.source_length);
+            *has_new_line = false;
+        }
+    }
+
+    /* Recurse to all our children */
+    for (node_offset_t idx = 0; idx < node.child_count; idx++)
+    {
+        /* Note we pass our type to our child, which becomes its parent node type */
+        prettify_node_recursive(source, tree, node.child_start + idx, node_indent, node_type, has_new_line, out_result, do_indent);
+    }
 }

-/**
-   Remove any prefix and suffix newlines from the specified
-   string.
- */
-static void trim(wcstring &str)
+/* Entry point for prettification. */
+static wcstring prettify(const wcstring &src, bool do_indent)
 {
-    if (str.empty())
-        return;
+    parse_node_tree_t tree;
+    if (! parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_leave_unterminated | parse_flag_show_blank_lines, &tree, NULL /* errors */))
+    {
+        /* We return the initial string on failure */
+        return src;
+    }

-    size_t pos = str.find_first_not_of(L" \n");
-    if (pos > 0)
-        str.erase(0, pos);
-
-    pos = str.find_last_not_of(L" \n");
-    if (pos != wcstring::npos && pos + 1 < str.length())
-        str.erase(pos + 1);
+    /* We may have a forest of disconnected trees on a parse failure. We have to handle all nodes that have no parent, and all parse errors. */
+    bool has_new_line = true;
+    wcstring result;
+    for (size_t i=0; i < tree.size(); i++)
+    {
+        const parse_node_t &node = tree.at(i);
+        if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error)
+        {
+            /* A root node */
+            prettify_node_recursive(src, tree, i, 0, symbol_job_list, &has_new_line, &result, do_indent);
+        }
+    }
+    return result;
 }


-/**
-   The main mathod. Run the program.
- */
-int main(int argc, char **argv)
+// Helper for output_set_writer
+static std::string output_receiver;
+static int write_to_output_receiver(char c)
+{
+    output_receiver.push_back(c);
+    return 0;
+}
+
+/* Given a string and list of colors of the same size, return the string with ANSI escape sequences representing the colors. */
+static std::string ansi_colorize(const wcstring &text, const std::vector<highlight_spec_t> &colors)
+{
+    assert(colors.size() == text.size());
+    assert(output_receiver.empty());
+
+    int (*saved)(char) = output_get_writer();
+    output_set_writer(write_to_output_receiver);
+
+    highlight_spec_t last_color = highlight_spec_normal;
+    for (size_t i=0; i < text.size(); i++)
+    {
+        highlight_spec_t color = colors.at(i);
+        if (color != last_color)
+        {
+            write_color(highlight_get_color(color, false), true);
+            last_color = color;
+        }
+        writech(text.at(i));
+    }
+
+    output_set_writer(saved);
+    std::string result;
+    result.swap(output_receiver);
+    return result;
+}
+
+/* Given a string and list of colors of the same size, return the string with HTML span elements for the various colors. */
+static const wchar_t *html_class_name_for_color(highlight_spec_t spec)
+{
+    #define P(x) L"fish_color_"  #x
+    switch (spec & HIGHLIGHT_SPEC_PRIMARY_MASK)
+    {
+        case highlight_spec_normal: return P(normal);
+        case highlight_spec_error: return P(error);
+        case highlight_spec_command: return P(command);
+        case highlight_spec_statement_terminator: return P(statement_terminator);
+        case highlight_spec_param: return P(param);
+        case highlight_spec_comment: return P(comment);
+        case highlight_spec_match: return P(match);
+        case highlight_spec_search_match: return P(search_match);
+        case highlight_spec_operator: return P(operator);
+        case highlight_spec_escape: return P(escape);
+        case highlight_spec_quote: return P(quote);
+        case highlight_spec_redirection: return P(redirection);
+        case highlight_spec_autosuggestion: return P(autosuggestion);
+        case highlight_spec_selection: return P(selection);
+
+        default: return P(other);
+    }
+}
+
+static std::string html_colorize(const wcstring &text, const std::vector<highlight_spec_t> &colors)
+{
+    if (text.empty())
+    {
+        return "";
+    }
+
+    assert(colors.size() == text.size());
+    wcstring html = L"<pre>";
+    highlight_spec_t last_color = highlight_spec_normal;
+    for (size_t i=0; i < text.size(); i++)
+    {
+        /* Handle colors */
+        highlight_spec_t color = colors.at(i);
+        if (i > 0 && color != last_color)
+        {
+            html.append(L"</span>");
+        }
+        if (i == 0 || color != last_color)
+        {
+            append_format(html, L"<span class=\"%ls\">", html_class_name_for_color(color));
+        }
+        last_color = color;
+
+        /* Handle text */
+        wchar_t wc = text.at(i);
+        switch (wc)
+        {
+            case L'&':
+                html.append(L"&amp;");
+                break;
+            case L'\'':
+                html.append(L"&apos;");
+                break;
+            case L'"':
+                html.append(L"&quot;");
+                break;
+            case L'<':
+                html.append(L"&lt;");
+                break;
+            case L'>':
+                html.append(L"&gt;");
+                break;
+            default:
+                html.push_back(wc);
+                break;
+        }
+    }
+    html.append(L"</span></pre>");
+    return wcs2string(html);
+}
+
+static std::string no_colorize(const wcstring &text)
+{
+    return wcs2string(text);
+}
+
+int main(int argc, char *argv[])
 {
-    int do_indent=1;
    set_main_thread();
    setup_fork_guards();

    wsetlocale(LC_ALL, L"");
    program_name=L"fish_indent";

+    env_init();
+    input_init();
+
+    /* Types of output we support */
+    enum
+    {
+        output_type_plain_text,
+        output_type_ansi,
+        output_type_html
+    } output_type = output_type_plain_text;
+
+    /* Whether to indent (true) or just reformat to one job per line (false) */
+    bool do_indent = true;
+
    while (1)
    {
-        static struct option
-                long_options[] =
+        const struct option long_options[] =
        {
-            {
-                "no-indent", no_argument, 0, 'i'
-            }
-            ,
-            {
-                "help", no_argument, 0, 'h'
-            }
-            ,
-            {
-                "version", no_argument, 0, 'v'
-            }
-            ,
-            {
-                0, 0, 0, 0
-            }
-        }
-        ;
+            { "no-indent", no_argument, 0, 'i' },
+            { "help", no_argument, 0, 'h' },
+            { "version", no_argument, 0, 'v' },
+            { "html", no_argument, 0, 1 },
+            { "ansi", no_argument, 0, 2 },
+            { 0, 0, 0, 0 }
+        };

        int opt_index = 0;
-
-        int opt = getopt_long(argc,
-                              argv,
-                              GETOPT_STRING,
-                              long_options,
-                              &opt_index);
-
+        int opt = getopt_long(argc, argv, "hvi", long_options, &opt_index);
        if (opt == -1)
            break;

@ -342,10 +328,7 @@ int main(int argc, char **argv)

            case 'v':
            {
-                fwprintf(stderr,
-                         _(L"%ls, version %s\n"),
-                         program_name,
-                         get_fish_version());
+                fwprintf(stderr, _(L"%ls, version %s\n"), program_name, get_fish_version());
                exit(0);
                assert(0 && "Unreachable code reached");
                break;
@ -353,39 +336,55 @@ int main(int argc, char **argv)

            case 'i':
            {
-                do_indent = 0;
+                do_indent = false;
                break;
            }

+            case 1:
+            {
+                output_type = output_type_html;
+                break;
+            }
+
+            case 2:
+            {
+                output_type = output_type_ansi;
+                break;
+            }

            case '?':
            {
                exit(1);
            }
-
        }
    }

-    wcstring sb_in, sb_out;
-    read_file(stdin, sb_in);
+    const wcstring src = read_file(stdin);
+    const wcstring output_wtext = prettify(src, do_indent);

-    wutil_init();
-
-    if (!indent(sb_out, sb_in, do_indent))
+    /* Maybe colorize */
+    std::vector<highlight_spec_t> colors;
+    if (output_type != output_type_plain_text)
    {
-        trim(sb_out);
-        fwprintf(stdout, L"%ls", sb_out.c_str());
-    }
-    else
-    {
-        /*
-          Indenting failed - print original input
-        */
-        fwprintf(stdout, L"%ls", sb_in.c_str());
+        highlight_shell_no_io(output_wtext, colors, output_wtext.size(), NULL, env_vars_snapshot_t::current());
    }

+    std::string colored_output;
+    switch (output_type)
+    {
+        case output_type_plain_text:
+            colored_output = no_colorize(output_wtext);
+            break;

-    wutil_destroy();
+        case output_type_ansi:
+            colored_output = ansi_colorize(output_wtext, colors);
+            break;

+        case output_type_html:
+            colored_output = html_colorize(output_wtext, colors);
+            break;
+    }
+
+    fputs(colored_output.c_str(), stdout);
    return 0;
 }
--- a/parse_constants.h
+++ b/parse_constants.h
@ -74,7 +74,9 @@ enum parse_token_type_t
    LAST_TERMINAL_TYPE = parse_token_type_terminate,

    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
-    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
+
+    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
+    LAST_PARSE_TOKEN_TYPE = parse_token_type_end
 } __packed;

 /* These must be maintained in sorted order (except for none, which isn't a keyword). This enables us to do binary search. */