Merge branch 'pygments'

This adds support for a fish_indent driven pygments syntax highlighter to the sphinx docs build.
2025-01-13 05:28:49 +00:00 · 2019-04-08 20:14:12 -07:00 · 2019-04-08 20:14:12 -07:00 · ef88e959ac
commit ef88e959ac
parent 51e5077d98 1fb05d8fa0
9 changed files with 332 additions and 8 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -133,9 +133,6 @@ ADD_DEFINITIONS(-D_REENTRANT)
 # Set up PCRE2
 INCLUDE(cmake/PCRE2.cmake)

-# Set up the docs.
-INCLUDE(cmake/Docs.cmake)
-
 # Define a function to link dependencies.
 FUNCTION(FISH_LINK_DEPS target)
  TARGET_LINK_LIBRARIES(${target} fishlib)
@ -162,6 +159,9 @@ ADD_EXECUTABLE(fish_key_reader
               src/fish_key_reader.cpp src/print_help.cpp)
 FISH_LINK_DEPS(fish_key_reader)

+# Set up the docs.
+INCLUDE(cmake/Docs.cmake)
+
 # A helper for running tests.
 ADD_EXECUTABLE(fish_test_helper src/fish_test_helper.cpp)

--- a/cmake/Docs.cmake
+++ b/cmake/Docs.cmake
@ -13,13 +13,17 @@ SET(SPHINX_CACHE_DIR "${SPHINX_ROOT_DIR}/doctrees")
 SET(SPHINX_HTML_DIR "${SPHINX_ROOT_DIR}/html")
 SET(SPHINX_MANPAGE_DIR "${SPHINX_ROOT_DIR}/man")

+# sphinx-docs uses fish_indent for highlighting.
+# Prepend the output dir of fish_indent to PATH.
 ADD_CUSTOM_TARGET(sphinx-docs
-    ${SPHINX_EXECUTABLE}
+    env PATH="$<TARGET_FILE_DIR:fish_indent>:$$PATH"
+        ${SPHINX_EXECUTABLE}
        -q -b html
        -c "${SPHINX_SRC_DIR}"
        -d "${SPHINX_CACHE_DIR}"
        "${SPHINX_SRC_DIR}"
        "${SPHINX_HTML_DIR}"
+    DEPENDS sphinx_doc_src/fish_indent_lexer.py fish_indent
    COMMENT "Building HTML documentation with Sphinx")

 ADD_CUSTOM_TARGET(sphinx-manpages
--- a/sphinx_doc_src/_static/pygments.css
+++ b/sphinx_doc_src/_static/pygments.css
@ -0,0 +1,79 @@
+@import "nature.css";
+
+.highlight .hll { background-color: #ffffcc }
+.highlight  { background: #f8f8f8; }
+.highlight .c { color: #8f5902; } /* Comment */
+.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */
+.highlight .g { color: #000000 } /* Generic */
+.highlight .k { color: #204a87; font-weight: bold } /* Keyword */
+.highlight .l { color: #000000 } /* Literal */
+.highlight .n { color: #000000 } /* Name */
+.highlight .o { color: #00a6b2; } /* Operator */
+.highlight .x { color: #000000 } /* Other */
+.highlight .p { color: #00afff; } /* Punctuation */
+.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */
+.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */
+.highlight .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */
+.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */
+.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */
+.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */
+.highlight .gd { color: #a40000 } /* Generic.Deleted */
+.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */
+.highlight .gr { color: #ef2929 } /* Generic.Error */
+.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.highlight .gi { color: #00A000 } /* Generic.Inserted */
+.highlight .go { color: #000000; font-style: italic } /* Generic.Output */
+.highlight .gp { color: #8f5902 } /* Generic.Prompt */
+.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */
+.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */
+.highlight .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */
+.highlight .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */
+.highlight .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */
+.highlight .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */
+.highlight .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */
+.highlight .kt { color: #204a87; font-weight: bold } /* Keyword.Type */
+.highlight .ld { color: #000000 } /* Literal.Date */
+.highlight .m { color: #0000cf; font-weight: bold } /* Literal.Number */
+.highlight .s { color: #4e9a06 } /* Literal.String */
+.highlight .na { color: #c4a000 } /* Name.Attribute */
+.highlight .nb { color: #204a87 } /* Name.Builtin */
+.highlight .nc { color: #000000 } /* Name.Class */
+.highlight .no { color: #00afff } /* Name.Constant */
+.highlight .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */
+.highlight .ni { color: #ce5c00 } /* Name.Entity */
+.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */
+.highlight .nf { color: #005fd7 } /* Name.Function */
+.highlight .nl { color: #f57900 } /* Name.Label */
+.highlight .nn { color: #000000 } /* Name.Namespace */
+.highlight .nx { color: #000000 } /* Name.Other */
+.highlight .py { color: #000000 } /* Name.Property */
+.highlight .nt { color: #204a87; font-weight: bold } /* Name.Tag */
+.highlight .nv { color: #000000 } /* Name.Variable */
+.highlight .ow { color: #204a87; font-weight: bold } /* Operator.Word */
+.highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */
+.highlight .mb { color: #0000cf; font-weight: bold } /* Literal.Number.Bin */
+.highlight .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */
+.highlight .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */
+.highlight .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */
+.highlight .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */
+.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */
+.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */
+.highlight .sc { color: #4e9a06 } /* Literal.String.Char */
+.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */
+.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #999900 } /* Literal.String.Double */
+.highlight .se { color: #00a6b2 } /* Literal.String.Escape */
+.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */
+.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */
+.highlight .sx { color: #4e9a06 } /* Literal.String.Other */
+.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */
+.highlight .s1 { color: #999900 } /* Literal.String.Single */
+.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */
+.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */
+.highlight .fm { color: #000000 } /* Name.Function.Magic */
+.highlight .vc { color: #000000 } /* Name.Variable.Class */
+.highlight .vg { color: #000000 } /* Name.Variable.Global */
+.highlight .vi { color: #000000 } /* Name.Variable.Instance */
+.highlight .vm { color: #000000 } /* Name.Variable.Magic */
+.highlight .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */
--- a/sphinx_doc_src/commands.rst
+++ b/sphinx_doc_src/commands.rst
@ -1,4 +1,4 @@
-.. highlight:: fish
+.. highlight:: fish-docs-samples

 Commands
 ============
--- a/sphinx_doc_src/conf.py
+++ b/sphinx_doc_src/conf.py
@ -8,6 +8,7 @@

 import glob
 import os.path
+import pygments

 # -- Helper functions --------------------------------------------------------

@ -15,6 +16,16 @@ def strip_ext(path):
    """ Remove the extension from a path. """
    return os.path.splitext(path)[0]

+# -- Load our Pygments lexer -------------------------------------------------
+def setup(app):
+    from sphinx.highlighting import lexers
+    this_dir = os.path.dirname(os.path.realpath(__file__))
+    fish_indent_lexer = pygments.lexers.load_lexer_from_file(
+        os.path.join(this_dir, 'fish_indent_lexer.py'),
+        lexername='FishIndentLexer')
+    lexers['fish-docs-samples'] = fish_indent_lexer
+
+
 # -- Path setup --------------------------------------------------------------

 # If extensions (or modules to document with autodoc) are in another directory,
@ -82,7 +93,8 @@ pygments_style = None

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-#
+# !!! If you change this you also need to update the @import at the top
+# of _static/fish-syntax-style.css
 html_theme = 'nature'

 # Theme options are theme-specific and customize the look and feel of a theme
--- a/sphinx_doc_src/fish_indent_lexer.py
+++ b/sphinx_doc_src/fish_indent_lexer.py
@ -0,0 +1,133 @@
+# This is a plugin for pygments that shells out to fish_indent.
+
+# Example of how to use this:
+# env PATH="/dir/containing/fish/indent/:$PATH" pygmentize -f terminal256 -l /path/to/fish_indent_lexer.py:FishIndentLexer -x ~/test.fish
+
+import os
+from pygments.lexer import Lexer
+from pygments.token import (
+    Keyword,
+    Name,
+    Comment,
+    String,
+    Error,
+    Number,
+    Operator,
+    Other,
+    Generic,
+    Whitespace,
+    String,
+    Text,
+    Punctuation,
+)
+import re
+import subprocess
+
+# The token type representing output to the console.
+OUTPUT_TOKEN = Text
+
+# A fallback token type.
+DEFAULT = Text
+
+# Mapping from fish token types to Pygments types.
+ROLE_TO_TOKEN = {
+    "normal": Name.Variable,
+    "error": Generic.Error,
+    "command": Name.Function,
+    "statement_terminator": Punctuation,
+    "param": Name.Constant,
+    "comment": Comment,
+    "match": DEFAULT,
+    "search_match": DEFAULT,
+    "operat": Operator,
+    "escape": String.Escape,
+    "quote": String.Single,  # note, may be changed to double dynamically
+    "redirection": Punctuation,  # ?
+    "autosuggestion": Other,  # in practice won't be generated
+    "selection": DEFAULT,
+    "pager_progress": DEFAULT,
+    "pager_background": DEFAULT,
+    "pager_prefix": DEFAULT,
+    "pager_completion": DEFAULT,
+    "pager_description": DEFAULT,
+    "pager_secondary_background": DEFAULT,
+    "pager_secondary_prefix": DEFAULT,
+    "pager_secondary_completion": DEFAULT,
+    "pager_secondary_description": DEFAULT,
+    "pager_selected_background": DEFAULT,
+    "pager_selected_prefix": DEFAULT,
+    "pager_selected_completion": DEFAULT,
+    "pager_selected_description": DEFAULT,
+}
+
+
+def token_for_text_and_role(text, role):
+    """ Return the pygments token for some input text and a fish role
+    
+        This applies any special cases of ROLE_TO_TOKEN.
+    """
+    if text.isspace():
+        # Here fish will return 'normal' or 'statement_terminator' for newline.
+        return Text.Whitespace
+    elif role == "quote":
+        # Check for single or double.
+        return String.Single if text.startswith("'") else String.Double
+    else:
+        return ROLE_TO_TOKEN[role]
+
+
+def tokenize_fish_command(code, offset):
+    """ Tokenize some fish code, offset in a parent string, by shelling
+        out to fish_indent.
+        
+        fish_indent will output a list of csv lines: start,end,type.
+
+        This function returns a list of (start, tok, value) tuples, as
+        Pygments expects.
+    """
+    proc = subprocess.Popen(
+        ["fish_indent", "--pygments"],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        universal_newlines=True,
+    )
+    stdout, _ = proc.communicate(code)
+    result = []
+    for line in stdout.splitlines():
+        start, end, role = line.split(",")
+        start, end = int(start), int(end)
+        value = code[start:end]
+        tok = token_for_text_and_role(value, role)
+        result.append((start + offset, tok, value))
+    return result
+
+
+class FishIndentLexer(Lexer):
+    name = "FishIndentLexer"
+    aliases = ["fish", "fish-docs-samples"]
+    filenames = ["*.fish"]
+
+    def get_tokens_unprocessed(self, input_text):
+        """ Return a list of (start, tok, value) tuples.
+
+            start is the index into the string
+            tok is the token type (as above)
+            value is the string contents of the token
+        """
+        result = []
+        if not any(s.startswith(">") for s in input_text.splitlines()):
+            # No prompt, just tokenize everything.
+            result = tokenize_fish_command(input_text, 0)
+        else:
+            # We have a prompt line.
+            # Use a regexp because it will maintain string indexes for us.
+            regex = re.compile(r"^(>\s*)?(.*\n?)", re.MULTILINE)
+            for m in regex.finditer(input_text):
+                if m.group(1):
+                    # Prompt line; highlight via fish syntax.
+                    result.append((m.start(1), Generic.Prompt, m.group(1)))
+                    result.extend(tokenize_fish_command(m.group(2), m.start(2)))
+                else:
+                    # Non-prompt line representing output from a command.
+                    result.append((m.start(2), OUTPUT_TOKEN, m.group(2)))
+        return result
--- a/sphinx_doc_src/index.rst
+++ b/sphinx_doc_src/index.rst
@ -1,4 +1,4 @@
-.. highlight:: fish
+.. highlight:: fish-docs-samples
 .. _intro:

 Introduction
--- a/sphinx_doc_src/tutorial.rst
+++ b/sphinx_doc_src/tutorial.rst
@ -1,4 +1,4 @@
-.. highlight:: fish
+.. highlight:: fish-docs-samples

 Tutorial
 ========
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@ -243,6 +243,86 @@ void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t no
    }
 }

+static const char *highlight_role_to_string(highlight_role_t role) {
+#define TEST_ROLE(x)          \
+    case highlight_role_t::x: \
+        return #x;
+    switch (role) {
+        TEST_ROLE(normal)
+        TEST_ROLE(error)
+        TEST_ROLE(command)
+        TEST_ROLE(statement_terminator)
+        TEST_ROLE(param)
+        TEST_ROLE(comment)
+        TEST_ROLE(match)
+        TEST_ROLE(search_match)
+        TEST_ROLE(operat)
+        TEST_ROLE(escape)
+        TEST_ROLE(quote)
+        TEST_ROLE(redirection)
+        TEST_ROLE(autosuggestion)
+        TEST_ROLE(selection)
+        TEST_ROLE(pager_progress)
+        TEST_ROLE(pager_background)
+        TEST_ROLE(pager_prefix)
+        TEST_ROLE(pager_completion)
+        TEST_ROLE(pager_description)
+        TEST_ROLE(pager_secondary_background)
+        TEST_ROLE(pager_secondary_prefix)
+        TEST_ROLE(pager_secondary_completion)
+        TEST_ROLE(pager_secondary_description)
+        TEST_ROLE(pager_selected_background)
+        TEST_ROLE(pager_selected_prefix)
+        TEST_ROLE(pager_selected_completion)
+        TEST_ROLE(pager_selected_description)
+    }
+#undef TEST_ROLE
+}
+
+// Entry point for Pygments CSV output.
+// Our output is a newline-separated string.
+// Each line is of the form `start,end,role`
+// start and end is the half-open token range, value is a string from highlight_role_t.
+// Example:
+// 3,7,command
+static std::string make_pygments_csv(const wcstring &src) {
+    const size_t len = src.size();
+    std::vector<highlight_spec_t> colors;
+    highlight_shell_no_io(src, colors, src.size(), nullptr, env_stack_t::globals());
+    assert(colors.size() == len && "Colors and src should have same size");
+
+    struct token_range_t {
+        unsigned long start;
+        unsigned long end;
+        highlight_role_t role;
+    };
+
+    std::vector<token_range_t> token_ranges;
+    for (size_t i = 0; i < len; i++) {
+        highlight_role_t role = colors.at(i).foreground;
+        // See if we can extend the last range.
+        if (!token_ranges.empty()) {
+            auto &last = token_ranges.back();
+            if (last.role == role && last.end == i) {
+                last.end = i + 1;
+                continue;
+            }
+        }
+        // We need a new range.
+        token_ranges.push_back(token_range_t{i, i + 1, role});
+    }
+
+    // Now render these to a string.
+    std::string result;
+    for (const auto &range : token_ranges) {
+        char buff[128];
+        snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end,
+                 highlight_role_to_string(range.role));
+        result.append(buff);
+    }
+    return result;
+}
+
 // Entry point for prettification.
 static wcstring prettify(const wcstring &src, bool do_indent) {
    parse_node_tree_t parse_tree;
@ -414,6 +494,7 @@ int main(int argc, char *argv[]) {
        output_type_plain_text,
        output_type_file,
        output_type_ansi,
+        output_type_pygments_csv,
        output_type_html
    } output_type = output_type_plain_text;
    const char *output_location = "";
@ -429,6 +510,7 @@ int main(int argc, char *argv[]) {
                                       {"write", no_argument, NULL, 'w'},
                                       {"html", no_argument, NULL, 1},
                                       {"ansi", no_argument, NULL, 2},
+                                       {"pygments", no_argument, NULL, 3},
                                       {NULL, 0, NULL, 0}};

    int opt;
@ -464,6 +546,10 @@ int main(int argc, char *argv[]) {
                output_type = output_type_ansi;
                break;
            }
+            case 3: {
+                output_type = output_type_pygments_csv;
+                break;
+            }
            case 'd': {
                char *end;
                long tmp;
@ -528,6 +614,12 @@ int main(int argc, char *argv[]) {
        exit(1);
    }

+    if (output_type == output_type_pygments_csv) {
+        std::string output = make_pygments_csv(src);
+        fputs(output.c_str(), stdout);
+        return EXIT_SUCCESS;
+    }
+
    const wcstring output_wtext = prettify(src, do_indent);

    // Maybe colorize.
@ -564,6 +656,10 @@ int main(int argc, char *argv[]) {
            colored_output = html_colorize(output_wtext, colors);
            break;
        }
+        case output_type_pygments_csv: {
+            DIE("pygments_csv should have been handled above");
+            break;
+        }
    }

    std::fputws(str2wcstring(colored_output).c_str(), stdout);