Don't indent multi-line quoted strings; do indent inside ()

On a command with multiline quoted string like begin echo "line1 line2" end we actually indent line2 which seeems misleading because the indentation changes the behavior when typed into a script. This has become more prominent since commits - a37629f86 (fish_clipboard_copy: indent multiline commands, 2024-04-13) - 611a0572b (builtins type/functions: indent interactively-defined functions, 2024-04-12) - 222673f33 (edit_command_buffer: send indented commandline to editor, 2024-04-12) which add indentation to an exported commandline. Never indent quoted strings, to make sure the rendering matches the semantics. Note that we do need to indent the opening quote which is fine because it's on the same line. While at it, indent command substitutions recursively. That feature should also be added to fish_indent's formatting mode (which is the default). Fortunately the formatting mode already works fine with quoted strings; it does not indent them. Not sure how that's done and whether indentation can use the same logic.
2024-11-14 00:47:30 +00:00 · 2024-04-28 11:46:48 +02:00 · 2024-04-28 11:46:48 +02:00 · b00899179f
commit b00899179f
parent 2f6ed61833
5 changed files with 212 additions and 18 deletions
--- a/src/ast.rs
+++ b/src/ast.rs
@ -2626,7 +2626,7 @@ struct TokenStream<'a> {
    src: &'a wstr,

    // The tokenizer to generate new tokens.
-    tok: Tokenizer,
+    tok: Tokenizer<'static>,

    /// Any comment nodes are collected here.
    /// These are only collected if parse_flag_include_comments is set.
--- a/src/parse_util.rs
+++ b/src/parse_util.rs
@ -13,11 +13,11 @@ use crate::future_feature_flags::{feature_test, FeatureFlag};
 use crate::operation_context::OperationContext;
 use crate::parse_constants::{
    parse_error_offset_source_start, ParseError, ParseErrorCode, ParseErrorList, ParseKeyword,
-    ParseTokenType, ParseTreeFlags, ParserTestErrorBits, PipelinePosition, StatementDecoration,
-    ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE1, ERROR_BRACKETED_VARIABLE_QUOTED1,
-    ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID, ERROR_NOT_STATUS,
-    ERROR_NO_VAR_NAME, INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG,
-    INVALID_PIPELINE_CMD_ERR_MSG, UNKNOWN_BUILTIN_ERR_MSG,
+    ParseTokenType, ParseTreeFlags, ParserTestErrorBits, PipelinePosition, SourceRange,
+    StatementDecoration, ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE1,
+    ERROR_BRACKETED_VARIABLE_QUOTED1, ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR,
+    ERROR_NOT_PID, ERROR_NOT_STATUS, ERROR_NO_VAR_NAME, INVALID_BREAK_ERR_MSG,
+    INVALID_CONTINUE_ERR_MSG, INVALID_PIPELINE_CMD_ERR_MSG, UNKNOWN_BUILTIN_ERR_MSG,
 };
 use crate::tokenizer::{
    comment_end, is_token_delimiter, quote_end, Tok, TokenType, Tokenizer, TOK_ACCEPT_UNFINISHED,
@ -746,6 +746,10 @@ pub fn parse_util_escape_string_with_quote(
 /// Given a string, parse it as fish code and then return the indents. The return value has the same
 /// size as the string.
 pub fn parse_util_compute_indents(src: &wstr) -> Vec<i32> {
+    compute_indents(src, 0)
+}
+
+fn compute_indents(src: &wstr, initial_indent: i32) -> Vec<i32> {
    // Make a vector the same size as the input string, which contains the indents. Initialize them
    // to 0.
    let mut indents = vec![0; src.len()];
@ -768,7 +772,7 @@ pub fn parse_util_compute_indents(src: &wstr) -> Vec<i32> {
        None,
    );
    {
-        let mut iv = IndentVisitor::new(src, &mut indents);
+        let mut iv = IndentVisitor::new(src, &mut indents, initial_indent);
        iv.visit(ast.top());
        iv.record_line_continuations_until(iv.indents.len());
        iv.indents[iv.last_leaf_end..].fill(iv.last_indent);
@ -787,7 +791,8 @@ pub fn parse_util_compute_indents(src: &wstr) -> Vec<i32> {
            idx -= 1;
            if src[idx] == '\n' {
                let empty_middle_line = src.get(idx + 1) == Some(&'\n');
-                if !empty_middle_line {
+                let is_trailing_unclosed = idx == src.len() - 1 && iv.unclosed;
+                if !empty_middle_line && !is_trailing_unclosed {
                    iv.indents[idx] = next_indent;
                }
            } else {
@ -838,6 +843,9 @@ struct IndentVisitor<'a> {
    // The last indent which we assigned.
    last_indent: i32,

+    // Whether we have an unfinished quote or command substitution.
+    unclosed: bool,
+
    // The source we are indenting.
    src: &'a wstr,

@ -852,13 +860,14 @@ struct IndentVisitor<'a> {
    line_continuations: Vec<usize>,
 }
 impl<'a> IndentVisitor<'a> {
-    fn new(src: &'a wstr, indents: &'a mut Vec<i32>) -> Self {
+    fn new(src: &'a wstr, indents: &'a mut Vec<i32>, initial_indent: i32) -> Self {
        Self {
            last_leaf_end: 0,
-            last_indent: -1,
+            last_indent: initial_indent - 1,
+            unclosed: false,
            src,
            indents,
-            indent: -1,
+            indent: initial_indent - 1,
            line_continuations: vec![],
        }
    }
@ -887,6 +896,95 @@ impl<'a> IndentVisitor<'a> {
            }
        }
    }
+
+    fn indent_leaf(&mut self, range: SourceRange) {
+        let node_src = &self.src[range.start()..range.end()];
+        // Common case optimization.
+        if node_src.contains('(') /*)*/ && !node_src.contains('\n') {
+            self.indents[range.start()..range.end()].fill(self.indent);
+            return;
+        }
+
+        let mut done = range.start();
+        let mut cursor = 0;
+        let mut is_double_quoted = false;
+        let mut was_double_quoted;
+        loop {
+            was_double_quoted = is_double_quoted;
+            let parens = match parse_util_locate_cmdsubst_range(
+                node_src,
+                &mut cursor,
+                /*accept_incomplete=*/ true,
+                Some(&mut is_double_quoted),
+                None,
+            ) {
+                MaybeParentheses::Error => break,
+                MaybeParentheses::None => {
+                    break;
+                }
+                MaybeParentheses::CommandSubstitution(parens) => parens,
+            };
+
+            let command = parens.command();
+            self.indent_string_part(done..range.start() + command.start, was_double_quoted);
+            let cmdsub_contents = &node_src[command.clone()];
+            let indents = compute_indents(cmdsub_contents, self.indent + 1);
+            self.indents[range.start() + command.start..range.start() + command.end]
+                .copy_from_slice(&indents);
+
+            done = range.start() + command.end;
+            if parens.closing().len() < 1 {
+                self.unclosed = true;
+            }
+        }
+        self.indent_string_part(done..range.end(), was_double_quoted);
+    }
+
+    fn indent_string_part(&mut self, range: Range<usize>, is_double_quoted: bool) {
+        let mut done = range.start;
+        let mut quoted = false;
+        {
+            if is_double_quoted {
+                match quote_end(self.src, range.start, '"') {
+                    Some(q_end) => {
+                        // We may be (in) a multi-line string, so don't indent.
+                        done = q_end + 1;
+                    }
+                    None => quoted = true,
+                }
+            }
+            let part = &self.src[done..range.end];
+            if !quoted {
+                let mut callback = |offset| {
+                    if !quoted {
+                        // Quote open event. Indent unquoted part, including the opening quote.
+                        self.indents[done..range.start + offset + 1].fill(self.indent);
+                        done = range.start + offset + 1;
+                    } else {
+                        // Quote close. Don't indent, in case it's a multiline string.
+                        // Mark the first line as indented but only to make tests look prettier.
+                        let first_line_length = self.src[range.start..range.start + offset]
+                            .chars()
+                            .take_while(|&c| c != '\n')
+                            .count();
+                        self.indents[range.start..range.start + first_line_length]
+                            .fill(self.indent);
+                        done = range.start + offset;
+                    }
+                    quoted = !quoted;
+                };
+                for _token in
+                    Tokenizer::with_quote_events(part, TOK_ACCEPT_UNFINISHED, &mut callback)
+                {
+                }
+            }
+        }
+        if !quoted {
+            self.indents[done..range.end].fill(self.indent);
+        } else {
+            self.unclosed = true;
+        }
+    }
 }
 impl<'a> NodeVisitor<'a> for IndentVisitor<'a> {
    // Default implementation is to just visit children.
@ -996,7 +1094,8 @@ impl<'a> NodeVisitor<'a> for IndentVisitor<'a> {
                .rev()
                .take_while(|&c| c == ' ')
                .count();
-            self.indents[range.start() - leading_spaces..range.end()].fill(self.indent);
+            self.indents[range.start() - leading_spaces..range.start()].fill(self.indent);
+            self.indent_leaf(range);
            self.last_leaf_end = range.end();
            self.last_indent = self.indent;
        }
--- a/src/tests/parse_util.rs
+++ b/src/tests/parse_util.rs
@ -230,7 +230,7 @@ fn test_indents() {
        assert_eq!(indents, expected_indents);
    }
    macro_rules! validate {
-        ( $( $(,)? $indent:literal, $text:literal )* ) => {
+        ( $( $(,)? $indent:literal, $text:literal )* $(,)?  ) => {
            let segments = vec![
                $(
                    Segment{ indent: $indent, text: $text },
@ -392,5 +392,48 @@ fn test_indents() {
            1, "\n",
            0, "\nend"
        );
+        validate!(
+            0, "if", 1,  " true",
+            1, "\n    begin",
+            2,  "\n        echo",
+            1,  "\n    end",
+            0,  "\nend",
+        );
+
+        // Quotes and command substitutions.
+        validate!(
+            0, "if", 1, " foo \"",
+            0, "\nquoted",
+        );
+        validate!(
+            0, "if", 1, " foo \"",
+            0, "\n",
+        );
+        validate!(
+            0, "echo (",
+            1, "\n", // )
+        );
+        validate!(
+            0, "echo \"$(",
+            1, "\n" // )
+        );
+        validate!(
+            0, "echo (", // )
+            1, "\necho \"",
+            0, "\n"
+        );
+        validate!(
+            0, "echo (", // )
+            1, "\necho (", // )
+            2, "\necho"
+        );
+        validate!(
+            0, "if", 1, " true",
+            1, "\n    echo \"line1",
+            0, "\nline2 ", 1, "$(",
+            2, "\n    echo line3",
+            0, "\n) line4",
+            0, "\nline5\"",
+        );
    })();
 }
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -245,7 +245,7 @@ impl Tok {
 }

 /// The tokenizer struct.
-pub struct Tokenizer {
+pub struct Tokenizer<'c> {
    /// A pointer into the original string, showing where the next token begins.
    token_cursor: usize,
    /// The start of the original string.
@ -262,9 +262,11 @@ pub struct Tokenizer {
    continue_after_error: bool,
    /// Whether to continue the previous line after the comment.
    continue_line_after_comment: bool,
+    /// Called on every quote change.
+    on_quote_toggle: Option<&'c mut dyn FnMut(usize)>,
 }

-impl Tokenizer {
+impl<'c> Tokenizer<'c> {
    /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
    /// should not be freed by the caller until after the tokenizer is destroyed.
    ///
@ -273,6 +275,20 @@ impl Tokenizer {
    /// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid
    /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
    pub fn new(start: &wstr, flags: TokFlags) -> Self {
+        Self::new_impl(start, flags, None)
+    }
+    pub fn with_quote_events(
+        start: &wstr,
+        flags: TokFlags,
+        on_quote_toggle: &'c mut dyn FnMut(usize),
+    ) -> Self {
+        Self::new_impl(start, flags, Some(on_quote_toggle))
+    }
+    fn new_impl(
+        start: &wstr,
+        flags: TokFlags,
+        on_quote_toggle: Option<&'c mut dyn FnMut(usize)>,
+    ) -> Self {
        Tokenizer {
            token_cursor: 0,
            start: start.to_owned(),
@ -282,11 +298,12 @@ impl Tokenizer {
            show_blank_lines: flags & TOK_SHOW_BLANK_LINES,
            continue_after_error: flags & TOK_CONTINUE_AFTER_ERROR,
            continue_line_after_comment: false,
+            on_quote_toggle,
        }
    }
 }

-impl Iterator for Tokenizer {
+impl<'c> Iterator for Tokenizer<'c> {
    type Item = Tok;

    fn next(&mut self) -> Option<Self::Item> {
@ -491,7 +508,7 @@ fn iswspace_not_nl(c: char) -> bool {
    }
 }

-impl Tokenizer {
+impl<'c> Tokenizer<'c> {
    /// Returns the text of a token, as a string.
    pub fn text_of(&self, tok: &Tok) -> &wstr {
        tok.get_source(&self.start)
@ -537,7 +554,7 @@ impl Tokenizer {
    }
 }

-impl Tokenizer {
+impl<'c> Tokenizer<'c> {
    /// Read the next token as a string.
    fn read_string(&mut self) -> Tok {
        let mut mode = TOK_MODE_REGULAR_TEXT;
@ -555,11 +572,17 @@ impl Tokenizer {
            paran_offsets: &Vec<usize>,
            quote: char,
        ) -> Result<(), usize> {
+            this.on_quote_toggle
+                .as_mut()
+                .map(|cb| (cb)(this.token_cursor));
            if let Some(end) = quote_end(&this.start, this.token_cursor, quote) {
+                let mut one_past_end = end + 1;
                if this.start.char_at(end) == '$' {
+                    one_past_end = end;
                    quoted_cmdsubs.push(paran_offsets.len());
                }
                this.token_cursor = end;
+                this.on_quote_toggle.as_mut().map(|cb| (cb)(one_past_end));
                Ok(())
            } else {
                let error_loc = this.token_cursor;
--- a/tests/checks/indent.fish
+++ b/tests/checks/indent.fish
@ -496,3 +496,32 @@ end' | $fish_indent --only-unindent
 # CHECK: {{^}}    echo
 # CHECK: {{^}}  not indented properly
 # CHECK: {{^}}end
+
+
+echo 'echo (
+if true
+echo
+end
+)' | $fish_indent --only-indent
+# CHECK: {{^}}echo (
+# CHECK: {{^}}    if true
+# CHECK: {{^}}        echo
+# CHECK: {{^}}    end
+# CHECK: {{^}})
+
+echo 'echo (
+if true
+echo "
+multi
+line
+"
+end
+)' | $fish_indent --only-indent
+# CHECK: {{^}}echo (
+# CHECK: {{^}}    if true
+# CHECK: {{^}}        echo "
+# CHECK: {{^}}multi
+# CHECK: {{^}}line
+# CHECK: {{^}}"
+# CHECK: {{^}}    end
+# CHECK: {{^}})