diff --git a/CMakeLists.txt b/CMakeLists.txt index 305166c81..981d8266b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,7 +113,6 @@ set(FISH_SRCS src/expand.cpp src/fallback.cpp src/fds.cpp - src/fish_indent_common.cpp src/fish_version.cpp src/flog.cpp src/highlight.cpp diff --git a/fish-rust/src/compat.rs b/fish-rust/src/compat.rs index be529e65a..87e24e3ad 100644 --- a/fish-rust/src/compat.rs +++ b/fish-rust/src/compat.rs @@ -50,6 +50,7 @@ extern "C" { pub fn C_O_EXLOCK() -> c_int; pub fn stdout_stream() -> *mut libc::FILE; pub fn UVAR_FILE_SET_MTIME_HACK() -> bool; + pub fn setlinebuf(stream: *mut libc::FILE); } macro_rules! CVAR { diff --git a/fish-rust/src/fds.rs b/fish-rust/src/fds.rs index 6d148a8cf..742599ff7 100644 --- a/fish-rust/src/fds.rs +++ b/fish-rust/src/fds.rs @@ -164,7 +164,7 @@ pub fn make_autoclose_pipes() -> Option { } /// Sets CLO_EXEC on a given fd according to the value of \p should_set. -pub fn set_cloexec(fd: RawFd, should_set: bool) -> c_int { +pub fn set_cloexec(fd: RawFd, should_set: bool /* = true */) -> c_int { // Note we don't want to overwrite existing flags like O_NONBLOCK which may be set. So fetch the // existing flags and modify them. let flags = unsafe { libc::fcntl(fd, F_GETFD, 0) }; diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 872d522c5..70e7e644a 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -24,7 +24,6 @@ include_cpp! { #include "exec.h" #include "fallback.h" #include "fds.h" - #include "fish_indent_common.h" #include "flog.h" #include "function.h" #include "highlight.h" @@ -83,8 +82,6 @@ include_cpp! { generate!("wgettext_ptr") - generate!("pretty_printer_t") - generate!("fd_event_signaller_t") generate!("highlight_role_t") diff --git a/fish-rust/src/fish_indent.rs b/fish-rust/src/fish_indent.rs index cc655a719..70d8ef2f6 100644 --- a/fish-rust/src/fish_indent.rs +++ b/fish-rust/src/fish_indent.rs @@ -1,54 +1,700 @@ -use crate::ast::{self, Category, Node, NodeFfi, NodeVisitor, Type}; -use crate::ffi::pretty_printer_t; -use crate::parse_constants::ParseTokenType; -use std::pin::Pin; +//! The fish_indent program. -struct PrettyPrinter<'a> { - companion: Pin<&'a mut pretty_printer_t>, +use std::ffi::{CString, OsStr}; +use std::io::{stdin, Read, Write}; +use std::os::unix::ffi::OsStrExt; +use std::sync::atomic::Ordering; + +use libc::{LC_ALL, STDOUT_FILENO}; + +use crate::ast::{ + self, Ast, Category, Leaf, List, Node, NodeVisitor, SourceRangeList, Traversal, Type, +}; +use crate::builtins::shared::STATUS_CMD_ERROR; +use crate::common::{ + str2wcstring, unescape_string, wcs2string, wcs2zstring, UnescapeFlags, UnescapeStringStyle, + PROGRAM_NAME, +}; +use crate::compat::setlinebuf; +use crate::env::env_init; +use crate::env::environment::Environment; +use crate::env::EnvStack; +use crate::expand::INTERNAL_SEPARATOR; +use crate::fds::set_cloexec; +use crate::future::IsSomeAnd; +use crate::future::IsSorted; +use crate::global_safety::RelaxedAtomicBool; +use crate::highlight::{colorize, highlight_shell, HighlightRole, HighlightSpec}; +use crate::operation_context::OperationContext; +use crate::parse_constants::{ParseTokenType, ParseTreeFlags, SourceRange}; +use crate::parse_util::parse_util_compute_indents; +use crate::threads; +use crate::tokenizer::{TokenType, Tokenizer, TOK_SHOW_BLANK_LINES, TOK_SHOW_COMMENTS}; +use crate::topic_monitor::topic_monitor_init; +use crate::wchar::prelude::*; +use crate::wchar_ffi::WCharToFFI; +use crate::wcstringutil::count_preceding_backslashes; +use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; +use crate::wutil::perror; +use crate::wutil::{fish_iswalnum, write_to_fd}; +use crate::{ffi, print_help::print_help}; +use crate::{ + flog::{self, activate_flog_categories_by_pattern, set_flog_file_fd}, + future_feature_flags, +}; + +// The number of spaces per indent isn't supposed to be configurable. +// See discussion at https://github.com/fish-shell/fish-shell/pull/6790 +const SPACES_PER_INDENT: usize = 4; + +/// Note: this got somewhat more complicated after introducing the new AST, because that AST no +/// longer encodes detailed lexical information (e.g. every newline). This feels more complex +/// than necessary and would probably benefit from a more layered approach where we identify +/// certain runs, weight line breaks, have a cost model, etc. +struct PrettyPrinter<'source, 'ast> { + /// The parsed ast. + ast: Ast, + + state: PrettyPrinterState<'source, 'ast>, } -impl<'a> NodeVisitor<'a> for &mut PrettyPrinter<'a> { + +struct PrettyPrinterState<'source, 'ast> { + /// Original source. + source: &'source wstr, + + /// The indents of our string. + /// This has the same length as 'source' and describes the indentation level. + indents: Vec, + + /// The prettifier output. + output: WString, + + // The indent of the source range which we are currently emitting. + current_indent: usize, + + // Whether the next gap text should hide the first newline. + gap_text_mask_newline: bool, + + // The "gaps": a sorted set of ranges between tokens. + // These contain whitespace, comments, semicolons, and other lexical elements which are not + // present in the ast. + gaps: Vec, + + // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines. + // This is computed ahead of time for convenience. + preferred_semi_locations: Vec, + + errors: Option<&'ast SourceRangeList>, +} + +/// Flags we support. +#[derive(Copy, Clone, Default)] +struct GapFlags { + /// Whether to allow line splitting via escaped newlines. + /// For example, in argument lists: + /// + /// echo a \ + /// b + /// + /// If this is not set, then split-lines will be joined. + allow_escaped_newlines: bool, + + /// Whether to require a space before this token. + /// This is used when emitting semis: + /// echo a; echo b; + /// No space required between 'a' and ';', or 'b' and ';'. + skip_space: bool, +} + +impl<'source, 'ast> PrettyPrinter<'source, 'ast> { + fn new(source: &'source wstr, do_indent: bool) -> Self { + let mut zelf = Self { + ast: Ast::parse(source, parse_flags(), None), + state: PrettyPrinterState { + source, + indents: if do_indent + /* Whether to indent, or just insert spaces. */ + { + parse_util_compute_indents(source) + } else { + vec![0; source.len()] + }, + output: WString::default(), + current_indent: 0, + gap_text_mask_newline: false, + gaps: vec![], + preferred_semi_locations: vec![], + errors: None, + }, + }; + zelf.state.gaps = zelf.compute_gaps(); + zelf.state.preferred_semi_locations = zelf.compute_preferred_semi_locations(); + zelf + } + + // Entry point. Prettify our source code and return it. + fn prettify(&'ast mut self) -> WString { + self.state.output.clear(); + self.state.errors = Some(&self.ast.extras.errors); + self.state.visit(self.ast.top()); + + // Trailing gap text. + self.state.emit_gap_text_before( + SourceRange::new(self.state.source.len(), 0), + GapFlags::default(), + ); + + // Replace all trailing newlines with just a single one. + while !self.state.output.is_empty() && self.state.at_line_start() { + self.state.output.pop(); + } + self.state.emit_newline(); + + std::mem::replace(&mut self.state.output, WString::new()) + } + + // Return the gap ranges from our ast. + fn compute_gaps(&self) -> Vec { + let range_compare = |r1: SourceRange, r2: SourceRange| { + (r1.start(), r1.length()).cmp(&(r2.start(), r2.length())) + }; + // Collect the token ranges into a list. + let mut tok_ranges = vec![]; + for node in Traversal::new(self.ast.top()) { + if node.category() == Category::leaf { + let r = node.source_range(); + if r.length() > 0 { + tok_ranges.push(r); + } + } + } + // Place a zero length range at end to aid in our inverting. + tok_ranges.push(SourceRange::new(self.state.source.len(), 0)); + + // Our tokens should be sorted. + assert!(IsSorted::is_sorted_by(&tok_ranges, |x, y| Some( + range_compare(*x, *y) + ))); + + // For each range, add a gap range between the previous range and this range. + let mut gaps = vec![]; + let mut prev_end = 0; + for tok_range in tok_ranges { + assert!( + tok_range.start() >= prev_end, + "Token range should not overlap or be out of order" + ); + if tok_range.start() >= prev_end { + gaps.push(SourceRange::new(prev_end, tok_range.start() - prev_end)); + } + prev_end = tok_range.start() + tok_range.length(); + } + gaps + } + + // Return sorted list of semi-preferring semi_nl nodes. + fn compute_preferred_semi_locations(&self) -> Vec { + let mut result = vec![]; + let mut mark_semi_from_input = |n: &ast::SemiNl| { + let Some(range) = n.range() else { + return; + }; + if self.state.substr(range) == ";" { + result.push(range.start()); + } + }; + + // andor_job_lists get semis if the input uses semis. + for node in Traversal::new(self.ast.top()) { + // See if we have a condition and an andor_job_list. + let condition; + let andors; + if let Some(ifc) = node.as_if_clause() { + condition = ifc.condition.semi_nl.as_ref(); + andors = &ifc.andor_tail; + } else if let Some(wc) = node.as_while_header() { + condition = wc.condition.semi_nl.as_ref(); + andors = &wc.andor_tail; + } else { + continue; + } + + // If there is no and-or tail then we always use a newline. + if andors.count() > 0 { + condition.map(&mut mark_semi_from_input); + // Mark all but last of the andor list. + for andor in andors.iter().take(andors.count() - 1) { + mark_semi_from_input(andor.job.semi_nl.as_ref().unwrap()); + } + } + } + + // `x ; and y` gets semis if it has them already, and they are on the same line. + for node in Traversal::new(self.ast.top()) { + let Some(job_list) = node.as_job_list() else { + continue; + }; + let mut prev_job_semi_nl = None; + for job in job_list { + // Set up prev_job_semi_nl for the next iteration to make control flow easier. + let prev = prev_job_semi_nl; + prev_job_semi_nl = job.semi_nl.as_ref(); + + // Is this an 'and' or 'or' job? + let Some(decorator) = job.decorator.as_ref() else { + continue; + }; + + // Now see if we want to mark 'prev' as allowing a semi. + // Did we have a previous semi_nl which was a newline? + let Some(prev) = prev else { + continue; + }; + if self.state.substr(prev.range().unwrap()) != ";" { + continue; + } + + // Is there a newline between them? + let prev_start = prev.range().unwrap().start(); + let decorator_range = decorator.range().unwrap(); + assert!(prev_start <= decorator_range.start(), "Ranges out of order"); + if !self.state.source[prev_start..decorator_range.end()].contains('\n') { + // We're going to allow the previous semi_nl to be a semi. + result.push(prev_start); + } + } + } + result.sort_unstable(); + result + } +} + +impl<'source, 'ast> PrettyPrinterState<'source, 'ast> { + fn indent(&self, index: usize) -> usize { + usize::try_from(self.indents[index]).unwrap() + } + + // \return gap text flags for the gap text that comes *before* a given node type. + fn gap_text_flags_before_node(&self, node: &dyn Node) -> GapFlags { + let mut result = GapFlags::default(); + match node.typ() { + // Allow escaped newlines before leaf nodes that can be part of a long command. + Type::argument | Type::redirection | Type::variable_assignment => { + result.allow_escaped_newlines = true + } + Type::token_base => { + // Allow escaped newlines before && and ||, and also pipes. + match node.as_token().unwrap().token_type() { + ParseTokenType::andand | ParseTokenType::oror | ParseTokenType::pipe => { + result.allow_escaped_newlines = true; + } + ParseTokenType::string => { + // Allow escaped newlines before commands that follow a variable assignment + // since both can be long (#7955). + let p = node.parent().unwrap(); + if p.typ() != Type::decorated_statement { + return result; + } + let p = p.parent().unwrap(); + assert_eq!(p.typ(), Type::statement); + let p = p.parent().unwrap(); + if let Some(job) = p.as_job_pipeline() { + if !job.variables.is_empty() { + result.allow_escaped_newlines = true; + } + } else if let Some(job_cnt) = p.as_job_continuation() { + if !job_cnt.variables.is_empty() { + result.allow_escaped_newlines = true; + } + } else if let Some(not_stmt) = p.as_not_statement() { + if !not_stmt.variables.is_empty() { + result.allow_escaped_newlines = true; + } + } + } + _ => (), + } + } + _ => (), + } + result + } + + // \return whether we are at the start of a new line. + fn at_line_start(&self) -> bool { + self.output.chars().last().is_none_or(|c| c == '\n') + } + + // \return whether we have a space before the output. + // This ignores escaped spaces and escaped newlines. + fn has_preceding_space(&self) -> bool { + let mut idx = isize::try_from(self.output.len()).unwrap() - 1; + // Skip escaped newlines. + // This is historical. Example: + // + // cmd1 \ + // | cmd2 + // + // we want the pipe to "see" the space after cmd1. + // TODO: this is too tricky, we should factor this better. + while idx >= 0 && self.output.as_char_slice()[usize::try_from(idx).unwrap()] == '\n' { + let backslashes = + count_preceding_backslashes(self.source, usize::try_from(idx).unwrap()); + if backslashes % 2 == 0 { + // Not escaped. + return false; + } + idx -= 1 + isize::try_from(backslashes).unwrap(); + } + usize::try_from(idx).is_ok_and(|idx| { + self.output.as_char_slice()[idx] == ' ' && !char_is_escaped(&self.output, idx) + }) + } + + // \return a substring of source. + fn substr(&self, r: SourceRange) -> &wstr { + &self.source[r.start()..r.end()] + } + + // Emit a space or indent as necessary, depending on the previous output. + fn emit_space_or_indent(&mut self, flags: GapFlags) { + if self.at_line_start() { + self.output + .extend(std::iter::repeat(' ').take(SPACES_PER_INDENT * self.current_indent)); + } else if !flags.skip_space && !self.has_preceding_space() { + self.output.push(' '); + } + } + + // Emit "gap text:" newlines and comments from the original source. + // Gap text may be a few things: + // + // 1. Just a space is common. We will trim the spaces to be empty. + // + // Here the gap text is the comment, followed by the newline: + // + // echo abc # arg + // echo def + // + // 2. It may also be an escaped newline: + // Here the gap text is a space, backslash, newline, space. + // + // echo \ + // hi + // + // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe: + // + // begin | stuff + // + // We do not handle errors here - instead our caller does. + fn emit_gap_text(&mut self, range: SourceRange, flags: GapFlags) -> bool { + let gap_text = &self.source[range.start()..range.end()]; + // Common case: if we are only spaces, do nothing. + if !gap_text.chars().any(|c| c != ' ') { + return false; + } + + // Look to see if there is an escaped newline. + // Emit it if either we allow it, or it comes before the first comment. + // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap + // text - we already know it has no semantic significance. + if let Some(escaped_nl) = gap_text.find(L!("\\\n")) { + let comment_idx = gap_text.find(L!("#")); + if flags.allow_escaped_newlines + || comment_idx.is_some_and(|comment_idx| escaped_nl < comment_idx) + { + // Emit a space before the escaped newline. + if !self.at_line_start() && !self.has_preceding_space() { + self.output.push_str(" "); + } + self.output.push_str("\\\n"); + // Indent the continuation line and any leading comments (#7252). + // Use the indentation level of the next newline. + self.current_indent = self.indent(range.start() + escaped_nl + 1); + self.emit_space_or_indent(GapFlags::default()); + } + } + + // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we + // always emit one. + let mut needs_nl = false; + + let mut tokenizer = Tokenizer::new(gap_text, TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); + while let Some(tok) = tokenizer.next() { + let tok_text = tokenizer.text_of(&tok); + + if needs_nl { + self.emit_newline(); + needs_nl = false; + if tok_text == "\n" { + continue; + } + } else if self.gap_text_mask_newline { + // We only respect mask_newline the first time through the loop. + self.gap_text_mask_newline = false; + if tok_text == "\n" { + continue; + } + } + + if tok.type_ == TokenType::comment { + self.emit_space_or_indent(GapFlags::default()); + self.output.push_utfstr(tok_text); + needs_nl = true; + } else if tok.type_ == TokenType::end { + // This may be either a newline or semicolon. + // Semicolons found here are not part of the ast and can simply be removed. + // Newlines are preserved unless mask_newline is set. + if tok_text == "\n" { + self.emit_newline(); + } + } else { + panic!("Gap text should only have comments and newlines - instead found token type {:?} with text: {}", + tok.type_, tok_text); + } + } + if needs_nl { + self.emit_newline(); + } + needs_nl + } + + /// \return the gap text ending at a given index into the string, or empty if none. + fn gap_text_to(&self, end: usize) -> SourceRange { + match self.gaps.binary_search_by(|r| r.end().cmp(&end)) { + Ok(pos) => self.gaps[pos], + Err(_) => { + // Not found. + SourceRange::new(0, 0) + } + } + } + + /// \return whether a range \p r overlaps an error range from our ast. + fn range_contained_error(&self, r: SourceRange) -> bool { + let errs = self.errors.as_ref().unwrap(); + let range_is_before = |x: SourceRange, y: SourceRange| x.end().cmp(&y.start()); + assert!(IsSorted::is_sorted_by(errs, |&x, &y| Some( + range_is_before(x, y) + ))); + errs.partition_point(|&range| range_is_before(range, r).is_lt()) != errs.len() + } + + // Emit the gap text before a source range. + fn emit_gap_text_before(&mut self, r: SourceRange, flags: GapFlags) -> bool { + assert!(r.start() <= self.source.len(), "source out of bounds"); + let mut added_newline = false; + + // Find the gap text which ends at start. + let range = self.gap_text_to(r.start()); + if range.length() > 0 { + // Set the indent from the beginning of this gap text. + // For example: + // begin + // cmd + // # comment + // end + // Here the comment is the gap text before the end, but we want the indent from the + // command. + if range.start() < self.indents.len() { + self.current_indent = self.indent(range.start()); + } + + // If this range contained an error, append the gap text without modification. + // For example in: echo foo " + // We don't want to mess with the quote. + if self.range_contained_error(range) { + self.output + .push_utfstr(&self.source[range.start()..range.end()]); + } else { + added_newline = self.emit_gap_text(range, flags); + } + } + // Always clear gap_text_mask_newline after emitting even empty gap text. + self.gap_text_mask_newline = false; + added_newline + } + + /// Given a string \p input, remove unnecessary quotes, etc. + fn clean_text(&self, input: &wstr) -> WString { + // Unescape the string - this leaves special markers around if there are any + // expansions or anything. We specifically tell it to not compute backslash-escapes + // like \U or \x, because we want to leave them intact. + let mut unescaped = unescape_string( + input, + UnescapeStringStyle::Script(UnescapeFlags::SPECIAL | UnescapeFlags::NO_BACKSLASHES), + ) + .unwrap(); + + // Remove INTERNAL_SEPARATOR because that's a quote. + let quote = |ch| ch == INTERNAL_SEPARATOR; + unescaped.retain(|c| !quote(c)); + + // If only "good" chars are left, use the unescaped version. + // This can be extended to other characters, but giving the precise list is tough, + // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes + // people feel more at ease. + let goodchars = |ch| fish_iswalnum(ch) || matches!(ch, '_' | '-' | '/'); + if unescaped.chars().all(goodchars) && !unescaped.is_empty() { + unescaped + } else { + input.to_owned() + } + } + + // Emit a range of original text. This indents as needed, and also inserts preceding gap text. + // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such + // lines. + fn emit_text(&mut self, r: SourceRange, flags: GapFlags) { + self.emit_gap_text_before(r, flags); + self.current_indent = self.indent(r.start()); + if r.length() > 0 { + self.emit_space_or_indent(flags); + self.output.push_utfstr(&self.clean_text(self.substr(r))); + } + } + + fn emit_node_text(&mut self, node: &dyn Node) { + // Weird special-case: a token may end in an escaped newline. Notably, the newline is + // not part of the following gap text, handle indentation here (#8197). + let mut range = node.source_range(); + let ends_with_escaped_nl = self.substr(range).ends_with("\\\n"); + if ends_with_escaped_nl { + range.length -= 2; + } + + self.emit_text(range, self.gap_text_flags_before_node(node)); + + if ends_with_escaped_nl { + // By convention, escaped newlines are preceded with a space. + self.output.push_str(" \\\n"); + // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents. + // The cases where this matters are probably very rare. + self.current_indent += 1; + self.emit_space_or_indent(GapFlags::default()); + self.current_indent -= 1; + } + } + + // Emit one newline. + fn emit_newline(&mut self) { + self.output.push('\n'); + } + + // Emit a semicolon. + fn emit_semi(&mut self) { + self.output.push(';'); + } + + fn visit_semi_nl(&mut self, node: &dyn ast::Token) { + // These are semicolons or newlines which are part of the ast. That means it includes e.g. + // ones terminating a job or 'if' header, but not random semis in job lists. We respect + // preferred_semi_locations to decide whether or not these should stay as newlines or + // become semicolons. + let range = node.source_range(); + + // Check if we should prefer a semicolon. + let prefer_semi = range.length() > 0 + && self + .preferred_semi_locations + .binary_search(&range.start()) + .is_ok(); + + self.emit_gap_text_before(range, self.gap_text_flags_before_node(node.as_node())); + + // Don't emit anything if the gap text put us on a newline (because it had a comment). + if !self.at_line_start() { + if prefer_semi { + self.emit_semi(); + } else { + self.emit_newline(); + } + + // If it was a semi but we emitted a newline, swallow a subsequent newline. + if !prefer_semi && self.substr(range) == ";" { + self.gap_text_mask_newline = true; + } + } + } + + fn visit_redirection(&mut self, node: &ast::Redirection) { + // No space between a redirection operator and its target (#2899). + self.emit_text(node.oper.range().unwrap(), GapFlags::default()); + self.emit_text( + node.target.range().unwrap(), + GapFlags { + skip_space: true, + ..Default::default() + }, + ); + } + + fn visit_maybe_newlines(&mut self, node: &ast::MaybeNewlines) { + // Our newlines may have comments embedded in them, example: + // cmd | + // # something + // cmd2 + // Treat it as gap text. + if node.range().unwrap().length() == 0 { + return; + } + let flags = self.gap_text_flags_before_node(node); + let range = node.range().unwrap(); + self.current_indent = self.indent(range.start()); + let added_newline = self.emit_gap_text_before(range, flags); + let mut gap_range = range; + if added_newline && gap_range.length() > 0 && self.source.char_at(gap_range.start()) == '\n' + { + gap_range.start += 1; + } + self.emit_gap_text(gap_range, flags); + } + + fn visit_begin_header(&mut self) { + if !self.at_line_start() { + self.emit_newline(); + } + } +} + +// The flags we use to parse. +fn parse_flags() -> ParseTreeFlags { + ParseTreeFlags::CONTINUE_AFTER_ERROR + | ParseTreeFlags::INCLUDE_COMMENTS + | ParseTreeFlags::LEAVE_UNTERMINATED + | ParseTreeFlags::SHOW_BLANK_LINES +} + +impl<'source, 'ast> NodeVisitor<'_> for PrettyPrinterState<'source, 'ast> { // Default implementation is to just visit children. - fn visit(&mut self, node: &'a dyn Node) { - let ffi_node = NodeFfi::new(node); + fn visit(&mut self, node: &'_ dyn Node) { // Leaf nodes we just visit their text. if node.as_keyword().is_some() { - self.companion - .as_mut() - .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + self.emit_node_text(node); return; } if let Some(token) = node.as_token() { if token.token_type() == ParseTokenType::end { - self.companion - .as_mut() - .visit_semi_nl((&ffi_node as *const NodeFfi<'_>).cast()); + self.visit_semi_nl(token); return; } - self.companion - .as_mut() - .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + self.emit_node_text(node); return; } match node.typ() { Type::argument | Type::variable_assignment => { - self.companion - .as_mut() - .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + self.emit_node_text(node); } Type::redirection => { - self.companion.as_mut().visit_redirection( - (node.as_redirection().unwrap() as *const ast::Redirection).cast(), - ); + self.visit_redirection(node.as_redirection().unwrap()); } Type::maybe_newlines => { - self.companion.as_mut().visit_maybe_newlines( - (node.as_maybe_newlines().unwrap() as *const ast::MaybeNewlines).cast(), - ); + self.visit_maybe_newlines(node.as_maybe_newlines().unwrap()); } Type::begin_header => { // 'begin' does not require a newline after it, but we insert one. node.accept(self, false); - self.companion.as_mut().visit_begin_header(); + self.visit_begin_header(); } _ => { // For branch and list nodes, default is to visit their children. @@ -62,31 +708,426 @@ impl<'a> NodeVisitor<'a> for &mut PrettyPrinter<'a> { } } -#[cxx::bridge] -#[allow(clippy::needless_lifetimes)] // false positive -mod fish_indent_ffi { - extern "C++" { - include!("ast.h"); - include!("fish_indent_common.h"); - type pretty_printer_t = crate::ffi::pretty_printer_t; - type Ast = crate::ast::Ast; - type NodeFfi<'a> = crate::ast::NodeFfi<'a>; +/// \return whether a character at a given index is escaped. +/// A character is escaped if it has an odd number of backslashes. +fn char_is_escaped(text: &wstr, idx: usize) -> bool { + count_preceding_backslashes(text, idx) % 2 == 1 +} + +fn fish_indent_main() -> i32 { + PROGRAM_NAME.set(L!("fish_indent")).unwrap(); + + topic_monitor_init(); + threads::init(); + // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's + // because the fish project assumes Unicode UTF-8 encoding in all of its scripts. + // + // TODO: Auto-detect the encoding of the script. We should look for a vim style comment + // (e.g., "# vim: set fileencoding=:") or an emacs style comment + // (e.g., "# -*- coding: -*-"). + { + let s = CString::new("").unwrap(); + unsafe { libc::setlocale(LC_ALL, s.as_ptr()) }; } - extern "Rust" { - type PrettyPrinter<'a>; - unsafe fn new_pretty_printer( - companion: Pin<&mut pretty_printer_t>, - ) -> Box>; - #[cxx_name = "visit"] - unsafe fn visit_ffi<'a>(self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>); + env_init(None, true, false); + + if let Some(features_var) = EnvStack::globals().get(L!("fish_features")) { + for s in features_var.as_list() { + future_feature_flags::set_from_string(s.as_utfstr()); + } + } + + // Types of output we support. + #[derive(Eq, PartialEq)] + enum OutputType { + PlainText, + File, + Ansi, + PygmentsCsv, + Check, + Html, + } + + let mut output_type = OutputType::PlainText; + let mut output_location = L!(""); + let mut do_indent = true; + // File path for debug output. + let mut debug_output = None; + + const short_opts: &wstr = L!("+d:hvwicD:"); + const long_opts: &[woption] = &[ + wopt(L!("debug"), woption_argument_t::required_argument, 'd'), + wopt( + L!("debug-output"), + woption_argument_t::required_argument, + 'o', + ), + wopt( + L!("debug-stack-frames"), + woption_argument_t::required_argument, + 'D', + ), + wopt(L!("dump-parse-tree"), woption_argument_t::no_argument, 'P'), + wopt(L!("no-indent"), woption_argument_t::no_argument, 'i'), + wopt(L!("help"), woption_argument_t::no_argument, 'h'), + wopt(L!("version"), woption_argument_t::no_argument, 'v'), + wopt(L!("write"), woption_argument_t::no_argument, 'w'), + wopt(L!("html"), woption_argument_t::no_argument, '\x01'), + wopt(L!("ansi"), woption_argument_t::no_argument, '\x02'), + wopt(L!("pygments"), woption_argument_t::no_argument, '\x03'), + wopt(L!("check"), woption_argument_t::no_argument, 'c'), + ]; + + let args: Vec = std::env::args_os() + .map(|osstr| str2wcstring(osstr.as_bytes())) + .collect(); + let mut shim_args: Vec<&wstr> = args.iter().map(|s| s.as_ref()).collect(); + let mut w = wgetopter_t::new(short_opts, long_opts, &mut shim_args); + + while let Some(c) = w.wgetopt_long() { + match c { + 'P' => DUMP_PARSE_TREE.store(true), + 'h' => print_help("fish_indent"), + 'v' => printf!( + "%s", + wgettext_fmt!( + "%s, version %s\n", + PROGRAM_NAME.get().unwrap(), + crate::BUILD_VERSION + ) + ), + 'w' => output_type = OutputType::File, + 'i' => do_indent = false, + '\x01' => output_type = OutputType::Html, + '\x02' => output_type = OutputType::Ansi, + '\x03' => output_type = OutputType::PygmentsCsv, + 'c' => output_type = OutputType::Check, + 'd' => { + ffi::activate_flog_categories_by_pattern(w.woptarg.unwrap().to_ffi()); + activate_flog_categories_by_pattern(w.woptarg.unwrap()); + for cat in flog::categories::all_categories() { + if cat.enabled.load(Ordering::Relaxed) { + printf!("Debug enabled for category: %s\n", cat.name); + } + } + } + 'D' => { + // TODO: Option is currently useless. + // Either remove it or make it work with FLOG. + } + 'o' => { + debug_output = Some(w.woptarg.unwrap()); + } + _ => return STATUS_CMD_ERROR.unwrap(), + } + } + + let args = &w.argv[w.woptind..]; + + // Direct any debug output right away. + if let Some(debug_output) = debug_output { + let file = { + let debug_output = wcs2zstring(debug_output); + let mode = CString::new("w").unwrap(); + unsafe { libc::fopen(debug_output.as_ptr(), mode.as_ptr()) } + }; + if file.is_null() { + eprintf!("Could not open file %s\n", debug_output); + perror("fopen"); + return -1; + } + let fd = unsafe { libc::fileno(file) }; + set_cloexec(fd, true); + unsafe { setlinebuf(file) }; + set_flog_file_fd(fd); + } + + let mut retval = 0; + + let mut src; + let mut i = 0; + while i < args.len() || (args.is_empty() && i == 0) { + if args.is_empty() && i == 0 { + if output_type == OutputType::File { + eprintf!( + "%s", + wgettext_fmt!( + "Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n", + PROGRAM_NAME.get().unwrap() + ) + ); + return STATUS_CMD_ERROR.unwrap(); + } + match read_file(stdin()) { + Ok(s) => src = s, + Err(()) => return STATUS_CMD_ERROR.unwrap(), + } + } else { + let arg = args[i]; + match std::fs::File::open(OsStr::from_bytes(&wcs2string(arg))) { + Ok(file) => { + match read_file(file) { + Ok(s) => src = s, + Err(()) => return STATUS_CMD_ERROR.unwrap(), + } + output_location = arg; + } + Err(err) => { + eprintf!( + "%s", + wgettext_fmt!("Opening \"%s\" failed: %s\n", arg, err.to_string()) + ); + return STATUS_CMD_ERROR.unwrap(); + } + } + } + + if output_type == OutputType::PygmentsCsv { + let output = make_pygments_csv(&src); + let _ = write_to_fd(&output, STDOUT_FILENO); + i += 1; + continue; + } + + let output_wtext = prettify(&src, do_indent); + + // Maybe colorize. + let mut colors = vec![]; + if output_type != OutputType::PlainText { + highlight_shell( + &output_wtext, + &mut colors, + &OperationContext::globals(), + false, + None, + ); + } + + let mut colored_output = vec![]; + match output_type { + OutputType::PlainText => { + colored_output = no_colorize(&output_wtext); + } + OutputType::File => { + match std::fs::File::create(OsStr::from_bytes(&wcs2string(output_location))) { + Ok(mut file) => { + let _ = file.write_all(&wcs2string(&output_wtext)); + } + Err(err) => { + eprintf!( + "%s", + wgettext_fmt!( + "Opening \"%s\" failed: %s\n", + output_location, + err.to_string() + ) + ); + return STATUS_CMD_ERROR.unwrap(); + } + } + } + OutputType::Ansi => { + colored_output = colorize( + &output_wtext, + &colors, + EnvStack::globals().as_ref().get_ref(), + ); + } + OutputType::Html => { + colored_output = html_colorize(&output_wtext, &colors); + } + OutputType::PygmentsCsv => { + unreachable!() + } + OutputType::Check => { + if output_wtext != src { + if let Some(arg) = args.get(i) { + eprintf!("%s\n", arg); + } + retval += 1; + } + } + } + + let _ = write_to_fd(&colored_output, STDOUT_FILENO); + i += 1; + } + retval +} + +static DUMP_PARSE_TREE: RelaxedAtomicBool = RelaxedAtomicBool::new(false); + +// Read the entire contents of a file into the specified string. +fn read_file(mut f: impl Read) -> Result { + let mut buf = vec![]; + f.read_to_end(&mut buf).map_err(|_| ())?; + Ok(str2wcstring(&buf)) +} + +fn highlight_role_to_string(role: HighlightRole) -> &'static wstr { + match role { + HighlightRole::normal => L!("normal"), + HighlightRole::error => L!("error"), + HighlightRole::command => L!("command"), + HighlightRole::keyword => L!("keyword"), + HighlightRole::statement_terminator => L!("statement_terminator"), + HighlightRole::param => L!("param"), + HighlightRole::option => L!("option"), + HighlightRole::comment => L!("comment"), + HighlightRole::search_match => L!("search_match"), + HighlightRole::operat => L!("operat"), + HighlightRole::escape => L!("escape"), + HighlightRole::quote => L!("quote"), + HighlightRole::redirection => L!("redirection"), + HighlightRole::autosuggestion => L!("autosuggestion"), + HighlightRole::selection => L!("selection"), + HighlightRole::pager_progress => L!("pager_progress"), + HighlightRole::pager_background => L!("pager_background"), + HighlightRole::pager_prefix => L!("pager_prefix"), + HighlightRole::pager_completion => L!("pager_completion"), + HighlightRole::pager_description => L!("pager_description"), + HighlightRole::pager_secondary_background => L!("pager_secondary_background"), + HighlightRole::pager_secondary_prefix => L!("pager_secondary_prefix"), + HighlightRole::pager_secondary_completion => L!("pager_secondary_completion"), + HighlightRole::pager_secondary_description => L!("pager_secondary_description"), + HighlightRole::pager_selected_background => L!("pager_selected_background"), + HighlightRole::pager_selected_prefix => L!("pager_selected_prefix"), + HighlightRole::pager_selected_completion => L!("pager_selected_completion"), + HighlightRole::pager_selected_description => L!("pager_selected_description"), + _ => unreachable!(), } } -fn new_pretty_printer(companion: Pin<&mut pretty_printer_t>) -> Box> { - Box::new(PrettyPrinter { companion }) +// Entry point for Pygments CSV output. +// Our output is a newline-separated string. +// Each line is of the form `start,end,role` +// start and end is the half-open token range, value is a string from highlight_role_t. +// Example: +// 3,7,command +fn make_pygments_csv(src: &wstr) -> Vec { + let mut colors = vec![]; + highlight_shell(src, &mut colors, &OperationContext::globals(), false, None); + assert_eq!( + colors.len(), + src.len(), + "Colors and src should have same size" + ); + + struct TokenRange { + start: usize, + end: usize, + role: HighlightRole, + } + + let mut token_ranges: Vec = vec![]; + for (i, color) in colors.iter().cloned().enumerate() { + let role = color.foreground; + // See if we can extend the last range. + if let Some(last) = token_ranges.last_mut() { + if last.role == role && last.end == i { + last.end = i + 1; + continue; + } + } + // We need a new range. + token_ranges.push(TokenRange { + start: i, + end: i + 1, + role, + }); + } + + // Now render these to a string. + let mut result = String::new(); + for range in token_ranges { + result += &format!( + "{},{},{}\n", + range.start, + range.end, + highlight_role_to_string(range.role) + ); + } + result.into_bytes() } -impl<'a> PrettyPrinter<'a> { - fn visit_ffi(mut self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>) { - self.visit(node.as_node()); + +// Entry point for prettification. +fn prettify(src: &wstr, do_indent: bool) -> WString { + if DUMP_PARSE_TREE.load() { + let ast = Ast::parse( + src, + ParseTreeFlags::LEAVE_UNTERMINATED + | ParseTreeFlags::INCLUDE_COMMENTS + | ParseTreeFlags::SHOW_EXTRA_SEMIS, + None, + ); + let ast_dump = ast.dump(src); + eprintf!("%s\n", ast_dump); + } + let mut printer = PrettyPrinter::new(src, do_indent); + printer.prettify() +} + +/// Given a string and list of colors of the same size, return the string with HTML span elements +/// for the various colors. +fn html_class_name_for_color(spec: HighlightSpec) -> &'static wstr { + match spec.foreground { + HighlightRole::normal => L!("fish_color_normal"), + HighlightRole::error => L!("fish_color_error"), + HighlightRole::command => L!("fish_color_command"), + HighlightRole::statement_terminator => L!("fish_color_statement_terminator"), + HighlightRole::param => L!("fish_color_param"), + HighlightRole::option => L!("fish_color_option"), + HighlightRole::comment => L!("fish_color_comment"), + HighlightRole::search_match => L!("fish_color_search_match"), + HighlightRole::operat => L!("fish_color_operator"), + HighlightRole::escape => L!("fish_color_escape"), + HighlightRole::quote => L!("fish_color_quote"), + HighlightRole::redirection => L!("fish_color_redirection"), + HighlightRole::autosuggestion => L!("fish_color_autosuggestion"), + HighlightRole::selection => L!("fish_color_selection"), + _ => L!("fish_color_other"), + } +} + +fn html_colorize(text: &wstr, colors: &[HighlightSpec]) -> Vec { + if text.is_empty() { + return vec![]; + } + + assert_eq!(colors.len(), text.len()); + let mut html = L!("
").to_owned();
+    let mut last_color = HighlightSpec::new();
+    for (i, (wc, &color)) in text.chars().zip(colors).enumerate() {
+        // Handle colors.
+        if i > 0 && color != last_color {
+            html.push_str("");
+        }
+        if i == 0 || color != last_color {
+            sprintf!(=> &mut html, "", html_class_name_for_color(color));
+        }
+        last_color = color;
+
+        // Handle text.
+        match wc {
+            '&' => html.push_str("&"),
+            '\'' => html.push_str("'"),
+            '"' => html.push_str("""),
+            '<' => html.push_str("<"),
+            '>' => html.push_str(">"),
+            _ => html.push(wc),
+        }
+    }
+    html.push_str("
"); + wcs2string(&html) +} + +fn no_colorize(text: &wstr) -> Vec { + wcs2string(text) +} + +#[cxx::bridge] +mod fish_indent_ffi { + extern "Rust" { + fn fish_indent_main() -> i32; } } diff --git a/fish-rust/src/future.rs b/fish-rust/src/future.rs index 2fcc3f3bc..41174c94b 100644 --- a/fish-rust/src/future.rs +++ b/fish-rust/src/future.rs @@ -23,3 +23,28 @@ impl IsSomeAnd for Option { } } } + +pub trait IsSorted { + type T; + fn is_sorted_by(&self, pred: impl Fn(&Self::T, &Self::T) -> Option) + -> bool; +} +impl IsSorted for &[T] { + type T = T; + fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option) -> bool { + self.windows(2) + .all(|w| pred(&w[0], &w[1]).is_none_or(|order| order.is_le())) + } +} +impl IsSorted for Vec { + type T = T; + fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option) -> bool { + IsSorted::is_sorted_by(&self.as_slice(), pred) + } +} +impl IsSorted for &Vec { + type T = T; + fn is_sorted_by(&self, pred: impl Fn(&T, &T) -> Option) -> bool { + IsSorted::is_sorted_by(&self.as_slice(), pred) + } +} diff --git a/src/ffi_baggage.h b/src/ffi_baggage.h index 2869e68af..8802d1f2b 100644 --- a/src/ffi_baggage.h +++ b/src/ffi_baggage.h @@ -3,7 +3,6 @@ #include "builtins/commandline.h" #include "event.h" #include "fds.h" -#include "fish_indent_common.h" #include "highlight.h" #include "input.h" #include "parse_util.h" @@ -23,7 +22,6 @@ void mark_as_used(const parser_t& parser, env_stack_t& env_stack) { highlight_spec_t{}; init_input(); make_pipes_ffi(); - pretty_printer_t({}, {}); reader_change_cursor_selection_mode(cursor_selection_mode_t::exclusive); reader_change_history({}); reader_read_ffi({}, {}, {}); diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index b4b8cc1bf..017e161c3 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -41,7 +41,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "fds.h" #include "ffi_baggage.h" #include "ffi_init.rs.h" -#include "fish_indent_common.h" +#include "fish_indent.rs.h" #include "fish_version.h" #include "flog.h" #include "future_feature_flags.h" @@ -52,454 +52,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep -static bool dump_parse_tree = false; -static int ret = 0; - -// Read the entire contents of a file into the specified string. -static wcstring read_file(FILE *f) { - wcstring result; - while (true) { - wint_t c = std::fgetwc(f); - - if (c == WEOF) { - if (ferror(f)) { - if (errno == EILSEQ) { - // Illegal byte sequence. Try to skip past it. - clearerr(f); - int ch = fgetc(f); // for printing the warning, and seeks forward 1 byte. - FLOGF(warning, "%s (byte=%X)", std::strerror(errno), ch); - ret = 1; - continue; - } else { - wperror(L"fgetwc"); - exit(1); - } - } - break; - } - result.push_back(static_cast(c)); - } - return result; -} - -static const char *highlight_role_to_string(highlight_role_t role) { -#define TEST_ROLE(x) \ - case highlight_role_t::x: \ - return #x; - switch (role) { - TEST_ROLE(normal) - TEST_ROLE(error) - TEST_ROLE(command) - TEST_ROLE(keyword) - TEST_ROLE(statement_terminator) - TEST_ROLE(param) - TEST_ROLE(option) - TEST_ROLE(comment) - TEST_ROLE(search_match) - TEST_ROLE(operat) - TEST_ROLE(escape) - TEST_ROLE(quote) - TEST_ROLE(redirection) - TEST_ROLE(autosuggestion) - TEST_ROLE(selection) - TEST_ROLE(pager_progress) - TEST_ROLE(pager_background) - TEST_ROLE(pager_prefix) - TEST_ROLE(pager_completion) - TEST_ROLE(pager_description) - TEST_ROLE(pager_secondary_background) - TEST_ROLE(pager_secondary_prefix) - TEST_ROLE(pager_secondary_completion) - TEST_ROLE(pager_secondary_description) - TEST_ROLE(pager_selected_background) - TEST_ROLE(pager_selected_prefix) - TEST_ROLE(pager_selected_completion) - TEST_ROLE(pager_selected_description) - default: - DIE("UNKNOWN ROLE"); - } -#undef TEST_ROLE -} - -// Entry point for Pygments CSV output. -// Our output is a newline-separated string. -// Each line is of the form `start,end,role` -// start and end is the half-open token range, value is a string from highlight_role_t. -// Example: -// 3,7,command -static std::string make_pygments_csv(const wcstring &src) { - const size_t len = src.size(); - auto colors = highlight_shell_ffi(src, *operation_context_globals(), false, {}); - assert(colors->size() == len && "Colors and src should have same size"); - - struct token_range_t { - unsigned long start; - unsigned long end; - highlight_role_t role; - }; - - std::vector token_ranges; - for (size_t i = 0; i < len; i++) { - highlight_role_t role = colors->at(i).foreground; - // See if we can extend the last range. - if (!token_ranges.empty()) { - auto &last = token_ranges.back(); - if (last.role == role && last.end == i) { - last.end = i + 1; - continue; - } - } - // We need a new range. - token_ranges.push_back(token_range_t{i, i + 1, role}); - } - - // Now render these to a string. - std::string result; - for (const auto &range : token_ranges) { - char buff[128]; - snprintf(buff, sizeof buff, "%lu,%lu,%s\n", range.start, range.end, - highlight_role_to_string(range.role)); - result.append(buff); - } - return result; -} - -// Entry point for prettification. -static wcstring prettify(const wcstring &src, bool do_indent) { - if (dump_parse_tree) { - auto ast = ast_parse(src, parse_flag_leave_unterminated | parse_flag_include_comments | - parse_flag_show_extra_semis); - wcstring ast_dump = *ast->dump(src); - std::fwprintf(stderr, L"%ls\n", ast_dump.c_str()); - } - - pretty_printer_t printer{src, do_indent}; - wcstring output = printer.prettify(); - return output; -} - -/// Given a string and list of colors of the same size, return the string with HTML span elements -/// for the various colors. -static const wchar_t *html_class_name_for_color(highlight_spec_t spec) { -#define P(x) L"fish_color_" #x - switch (spec->foreground) { - case highlight_role_t::normal: { - return P(normal); - } - case highlight_role_t::error: { - return P(error); - } - case highlight_role_t::command: { - return P(command); - } - case highlight_role_t::statement_terminator: { - return P(statement_terminator); - } - case highlight_role_t::param: { - return P(param); - } - case highlight_role_t::option: { - return P(option); - } - case highlight_role_t::comment: { - return P(comment); - } - case highlight_role_t::search_match: { - return P(search_match); - } - case highlight_role_t::operat: { - return P(operator); - } - case highlight_role_t::escape: { - return P(escape); - } - case highlight_role_t::quote: { - return P(quote); - } - case highlight_role_t::redirection: { - return P(redirection); - } - case highlight_role_t::autosuggestion: { - return P(autosuggestion); - } - case highlight_role_t::selection: { - return P(selection); - } - default: { - return P(other); - } - } -} - -static std::string html_colorize(const wcstring &text, - const std::vector &colors) { - if (text.empty()) { - return ""; - } - - assert(colors.size() == text.size()); - wcstring html = L"
";
-    highlight_spec_t last_color = highlight_role_t::normal;
-    for (size_t i = 0; i < text.size(); i++) {
-        // Handle colors.
-        highlight_spec_t color = colors.at(i);
-        if (i > 0 && color != last_color) {
-            html.append(L"");
-        }
-        if (i == 0 || color != last_color) {
-            append_format(html, L"", html_class_name_for_color(color));
-        }
-        last_color = color;
-
-        // Handle text.
-        wchar_t wc = text.at(i);
-        switch (wc) {
-            case L'&': {
-                html.append(L"&");
-                break;
-            }
-            case L'\'': {
-                html.append(L"'");
-                break;
-            }
-            case L'"': {
-                html.append(L""");
-                break;
-            }
-            case L'<': {
-                html.append(L"<");
-                break;
-            }
-            case L'>': {
-                html.append(L">");
-                break;
-            }
-            default: {
-                html.push_back(wc);
-                break;
-            }
-        }
-    }
-    html.append(L"
"); - return wcs2zstring(html); -} - -static std::string no_colorize(const wcstring &text) { return wcs2zstring(text); } - -int main(int argc, char *argv[]) { +int main() { program_name = L"fish_indent"; - rust_init(); - // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's - // because the fish project assumes Unicode UTF-8 encoding in all of its scripts. - // - // TODO: Auto-detect the encoding of the script. We should look for a vim style comment - // (e.g., "# vim: set fileencoding=:") or an emacs style comment - // (e.g., "# -*- coding: -*-"). - setlocale(LC_ALL, ""); - rust_env_init(true); - - if (auto features_var = env_stack_t::globals().get(L"fish_features")) { - for (const wcstring &s : features_var->as_list()) { - feature_set_from_string(s.c_str()); - } - } - - // Types of output we support. - enum { - output_type_plain_text, - output_type_file, - output_type_ansi, - output_type_pygments_csv, - output_type_check, - output_type_html - } output_type = output_type_plain_text; - const char *output_location = ""; - bool do_indent = true; - // File path for debug output. - std::string debug_output; - - const char *short_opts = "+d:hvwicD:"; - const struct option long_opts[] = {{"debug", required_argument, nullptr, 'd'}, - {"debug-output", required_argument, nullptr, 'o'}, - {"debug-stack-frames", required_argument, nullptr, 'D'}, - {"dump-parse-tree", no_argument, nullptr, 'P'}, - {"no-indent", no_argument, nullptr, 'i'}, - {"help", no_argument, nullptr, 'h'}, - {"version", no_argument, nullptr, 'v'}, - {"write", no_argument, nullptr, 'w'}, - {"html", no_argument, nullptr, 1}, - {"ansi", no_argument, nullptr, 2}, - {"pygments", no_argument, nullptr, 3}, - {"check", no_argument, nullptr, 'c'}, - {}}; - - int opt; - while ((opt = getopt_long(argc, argv, short_opts, long_opts, nullptr)) != -1) { - switch (opt) { - case 'P': { - dump_parse_tree = true; - break; - } - case 'h': { - unsafe_print_help("fish_indent"); - exit(0); - } - case 'v': { - std::fwprintf(stdout, _(L"%ls, version %s\n"), program_name, get_fish_version()); - exit(0); - } - case 'w': { - output_type = output_type_file; - break; - } - case 'i': { - do_indent = false; - break; - } - case 1: { - output_type = output_type_html; - break; - } - case 2: { - output_type = output_type_ansi; - break; - } - case 3: { - output_type = output_type_pygments_csv; - break; - } - case 'c': { - output_type = output_type_check; - break; - } - case 'd': { - activate_flog_categories_by_pattern(str2wcstring(optarg)); - for (auto cat : get_flog_categories()) { - if (cat->enabled) { - std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name); - } - } - break; - } - case 'D': { - // TODO: Option is currently useless. - // Either remove it or make it work with FLOG. - break; - } - case 'o': { - debug_output = optarg; - break; - } - default: { - // We assume getopt_long() has already emitted a diagnostic msg. - exit(1); - } - } - } - - argc -= optind; - argv += optind; - - // Direct any debug output right away. - FILE *debug_output_file = nullptr; - if (!debug_output.empty()) { - debug_output_file = fopen(debug_output.c_str(), "w"); - if (!debug_output_file) { - fprintf(stderr, "Could not open file %s\n", debug_output.c_str()); - perror("fopen"); - exit(-1); - } - set_cloexec(fileno(debug_output_file)); - setlinebuf(debug_output_file); - set_flog_output_file(debug_output_file); - } - - int retval = 0; - - wcstring src; - for (int i = 0; i < argc || (argc == 0 && i == 0); i++) { - if (argc == 0 && i == 0) { - if (output_type == output_type_file) { - std::fwprintf( - stderr, _(L"Expected file path to read/write for -w:\n\n $ %ls -w foo.fish\n"), - program_name); - exit(1); - } - src = read_file(stdin); - } else { - FILE *fh = fopen(argv[i], "r"); - if (fh) { - src = read_file(fh); - fclose(fh); - output_location = argv[i]; - } else { - std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), argv[i], - std::strerror(errno)); - exit(1); - } - } - - if (output_type == output_type_pygments_csv) { - std::string output = make_pygments_csv(src); - fputs(output.c_str(), stdout); - continue; - } - - const wcstring output_wtext = prettify(src, do_indent); - - // Maybe colorize. - std::vector colors; - maybe_t> ffi_colors; - if (output_type != output_type_plain_text) { - highlight_shell(output_wtext, colors, *operation_context_globals()); - ffi_colors = highlight_shell_ffi(output_wtext, *operation_context_globals(), false, {}); - } - - std::string colored_output; - switch (output_type) { - case output_type_plain_text: { - colored_output = no_colorize(output_wtext); - break; - } - case output_type_file: { - FILE *fh = fopen(output_location, "w"); - if (fh) { - std::fputws(output_wtext.c_str(), fh); - fclose(fh); - } else { - std::fwprintf(stderr, _(L"Opening \"%s\" failed: %s\n"), output_location, - std::strerror(errno)); - exit(1); - } - break; - } - case output_type_ansi: { - auto ffi_colored = - colorize(output_wtext, **ffi_colors, env_stack_t::globals().get_impl_ffi()); - for (uint8_t c : ffi_colored) { - colored_output.push_back(c); - } - break; - } - case output_type_html: { - colored_output = html_colorize(output_wtext, colors); - break; - } - case output_type_pygments_csv: { - DIE("pygments_csv should have been handled above"); - } - case output_type_check: { - if (output_wtext != src) { - if (argc) { - std::fwprintf(stderr, _(L"%s\n"), argv[i]); - } - retval++; - } - break; - } - } - - std::fputws(str2wcstring(colored_output).c_str(), stdout); - } - return retval; + return fish_indent_main(); } diff --git a/src/fish_indent_common.cpp b/src/fish_indent_common.cpp deleted file mode 100644 index 0cea07aab..000000000 --- a/src/fish_indent_common.cpp +++ /dev/null @@ -1,476 +0,0 @@ -#include "fish_indent_common.h" - -#include "ast.h" -#include "common.h" -#include "env.h" -#include "expand.h" -#include "flog.h" -#include "global_safety.h" -#include "maybe.h" -#include "operation_context.h" -#include "parse_constants.h" -#include "parse_util.h" -#include "tokenizer.h" -#include "wcstringutil.h" -#if INCLUDE_RUST_HEADERS -#include "fish_indent.rs.h" -#endif - -using namespace ast; - -// The number of spaces per indent isn't supposed to be configurable. -// See discussion at https://github.com/fish-shell/fish-shell/pull/6790 -#define SPACES_PER_INDENT 4 - -/// \return whether a character at a given index is escaped. -/// A character is escaped if it has an odd number of backslashes. -static bool char_is_escaped(const wcstring &text, size_t idx) { - return count_preceding_backslashes(text, idx) % 2 == 1; -} - -pretty_printer_t::pretty_printer_t(const wcstring &src, bool do_indent) - : source(src), - indents(do_indent ? parse_util_compute_indents(source) : std::vector(src.size(), 0)), - ast(ast_parse(src, parse_flags())), - visitor(new_pretty_printer(*this)), - do_indent(do_indent), - gaps(compute_gaps()), - preferred_semi_locations(compute_preferred_semi_locations()) { - assert(indents.size() == source.size() && "indents and source should be same length"); -} - -pretty_printer_t::gap_flags_t pretty_printer_t::gap_text_flags_before_node(const node_t &node) { - gap_flags_t result = default_flags; - switch (node.typ()) { - // Allow escaped newlines before leaf nodes that can be part of a long command. - case type_t::argument: - case type_t::redirection: - case type_t::variable_assignment: - result |= allow_escaped_newlines; - break; - - case type_t::token_base: - // Allow escaped newlines before && and ||, and also pipes. - switch (node.token_type()) { - case parse_token_type_t::andand: - case parse_token_type_t::oror: - case parse_token_type_t::pipe: - result |= allow_escaped_newlines; - break; - case parse_token_type_t::string: { - // Allow escaped newlines before commands that follow a variable assignment - // since both can be long (#7955). - auto p = node.parent(); - if (p->typ() != type_t::decorated_statement) break; - p = p->parent(); - assert(p->typ() == type_t::statement); - p = p->parent(); - if (auto *job = p->try_as_job_pipeline()) { - if (!job->variables().empty()) result |= allow_escaped_newlines; - } else if (auto *job_cnt = p->try_as_job_continuation()) { - if (!job_cnt->variables().empty()) result |= allow_escaped_newlines; - } else if (auto *not_stmt = p->try_as_not_statement()) { - if (!not_stmt->variables().empty()) result |= allow_escaped_newlines; - } - break; - } - default: - break; - } - break; - - default: - break; - } - return result; -} - -bool pretty_printer_t::has_preceding_space() const { - long idx = static_cast(output.size()) - 1; - // Skip escaped newlines. - // This is historical. Example: - // - // cmd1 \ - // | cmd2 - // - // we want the pipe to "see" the space after cmd1. - // TODO: this is too tricky, we should factor this better. - while (idx >= 0 && output.at(idx) == L'\n') { - size_t backslashes = count_preceding_backslashes(source, idx); - if (backslashes % 2 == 0) { - // Not escaped. - return false; - } - idx -= (1 + backslashes); - } - return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx); -} - -wcstring pretty_printer_t::prettify() { - output = wcstring{}; - visitor->visit(*ast->top()); - - // Trailing gap text. - emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags); - - // Replace all trailing newlines with just a single one. - while (!output.empty() && at_line_start()) { - output.pop_back(); - } - emit_newline(); - - wcstring result = std::move(output); - return result; -} - -std::vector pretty_printer_t::compute_gaps() const { - auto range_compare = [](source_range_t r1, source_range_t r2) { - if (r1.start != r2.start) return r1.start < r2.start; - return r1.length < r2.length; - }; - // Collect the token ranges into a list. - std::vector tok_ranges; - for (auto ast_traversal = new_ast_traversal(*ast->top());;) { - auto node = ast_traversal->next(); - if (!node->has_value()) break; - if (node->category() == category_t::leaf) { - auto r = node->source_range(); - if (r.length > 0) tok_ranges.push_back(r); - } - } - // Place a zero length range at end to aid in our inverting. - tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0}); - - // Our tokens should be sorted. - assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare)); - - // For each range, add a gap range between the previous range and this range. - std::vector gaps; - uint32_t prev_end = 0; - for (source_range_t tok_range : tok_ranges) { - assert(tok_range.start >= prev_end && "Token range should not overlap or be out of order"); - if (tok_range.start >= prev_end) { - gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end}); - } - prev_end = tok_range.start + tok_range.length; - } - return gaps; -} - -void pretty_printer_t::visit_begin_header() { - if (!at_line_start()) { - emit_newline(); - } -} - -void pretty_printer_t::visit_maybe_newlines(const void *node_) { - const auto &node = *static_cast(node_); - // Our newlines may have comments embedded in them, example: - // cmd | - // # something - // cmd2 - // Treat it as gap text. - if (node.range().length > 0) { - auto flags = gap_text_flags_before_node(*node.ptr()); - current_indent = indents.at(node.range().start); - bool added_newline = emit_gap_text_before(node.range(), flags); - source_range_t gap_range = node.range(); - if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') { - gap_range.start++; - } - emit_gap_text(gap_range, flags); - } -} - -void pretty_printer_t::visit_redirection(const void *node_) { - const auto &node = *static_cast(node_); - // No space between a redirection operator and its target (#2899). - emit_text(node.oper().range(), default_flags); - emit_text(node.target().range(), skip_space); -} - -void pretty_printer_t::visit_semi_nl(const void *node_) { - // These are semicolons or newlines which are part of the ast. That means it includes e.g. - // ones terminating a job or 'if' header, but not random semis in job lists. We respect - // preferred_semi_locations to decide whether or not these should stay as newlines or - // become semicolons. - const auto &node = *static_cast(node_); - auto range = node.source_range(); - - // Check if we should prefer a semicolon. - bool prefer_semi = - range.length > 0 && std::binary_search(preferred_semi_locations.begin(), - preferred_semi_locations.end(), range.start); - emit_gap_text_before(range, gap_text_flags_before_node(*node.ptr())); - - // Don't emit anything if the gap text put us on a newline (because it had a comment). - if (!at_line_start()) { - prefer_semi ? emit_semi() : emit_newline(); - - // If it was a semi but we emitted a newline, swallow a subsequent newline. - if (!prefer_semi && substr(range) == L";") { - gap_text_mask_newline = true; - } - } -} - -void pretty_printer_t::emit_node_text(const void *node_) { - const auto &node = *static_cast(node_); - source_range_t range = node.source_range(); - - // Weird special-case: a token may end in an escaped newline. Notably, the newline is - // not part of the following gap text, handle indentation here (#8197). - bool ends_with_escaped_nl = range.length >= 2 && source.at(range.end() - 2) == L'\\' && - source.at(range.end() - 1) == L'\n'; - if (ends_with_escaped_nl) { - range = {range.start, range.length - 2}; - } - - emit_text(range, gap_text_flags_before_node(node)); - - if (ends_with_escaped_nl) { - // By convention, escaped newlines are preceded with a space. - output.append(L" \\\n"); - // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents. - // The cases where this matters are probably very rare. - current_indent++; - emit_space_or_indent(); - current_indent--; - } -} - -void pretty_printer_t::emit_text(source_range_t r, gap_flags_t flags) { - emit_gap_text_before(r, flags); - current_indent = indents.at(r.start); - if (r.length > 0) { - emit_space_or_indent(flags); - output.append(clean_text(substr(r))); - } -} - -wcstring pretty_printer_t::clean_text(const wcstring &input) { - // Unescape the string - this leaves special markers around if there are any - // expansions or anything. We specifically tell it to not compute backslash-escapes - // like \U or \x, because we want to leave them intact. - wcstring unescaped = - *unescape_string(input.c_str(), input.size(), UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES, - STRING_STYLE_SCRIPT); - - // Remove INTERNAL_SEPARATOR because that's a quote. - auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; }; - unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end()); - - // If no non-"good" char is left, use the unescaped version. - // This can be extended to other characters, but giving the precise list is tough, - // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes - // people feel more at ease. - auto goodchars = [](wchar_t ch) { - return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/'; - }; - if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() && - !unescaped.empty()) { - return unescaped; - } else { - return input; - } -} - -bool pretty_printer_t::emit_gap_text_before(source_range_t r, gap_flags_t flags) { - assert(r.start <= source.size() && "source out of bounds"); - bool added_newline = false; - - // Find the gap text which ends at start. - source_range_t range = gap_text_to(r.start); - if (range.length > 0) { - // Set the indent from the beginning of this gap text. - // For example: - // begin - // cmd - // # comment - // end - // Here the comment is the gap text before the end, but we want the indent from the - // command. - if (range.start < indents.size()) current_indent = indents.at(range.start); - - // If this range contained an error, append the gap text without modification. - // For example in: echo foo " - // We don't want to mess with the quote. - if (range_contained_error(range)) { - output.append(substr(range)); - } else { - added_newline = emit_gap_text(range, flags); - } - } - // Always clear gap_text_mask_newline after emitting even empty gap text. - gap_text_mask_newline = false; - return added_newline; -} - -bool pretty_printer_t::range_contained_error(source_range_t r) const { - const auto &errs = ast->extras()->errors(); - auto range_is_before = [](source_range_t x, source_range_t y) { - return x.start + x.length <= y.start; - }; - assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) && - "Error ranges should be sorted"); - return std::binary_search(errs.begin(), errs.end(), r, range_is_before); -} - -source_range_t pretty_printer_t::gap_text_to(uint32_t end) const { - auto where = - std::lower_bound(gaps.begin(), gaps.end(), end, - [](source_range_t r, uint32_t end) { return r.start + r.length < end; }); - if (where == gaps.end() || where->start + where->length != end) { - // Not found. - return source_range_t{0, 0}; - } else { - return *where; - } -} - -bool pretty_printer_t::emit_gap_text(source_range_t range, gap_flags_t flags) { - wcstring gap_text = substr(range); - // Common case: if we are only spaces, do nothing. - if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false; - - // Look to see if there is an escaped newline. - // Emit it if either we allow it, or it comes before the first comment. - // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap - // text - we already know it has no semantic significance. - size_t escaped_nl = gap_text.find(L"\\\n"); - if (escaped_nl != wcstring::npos) { - size_t comment_idx = gap_text.find(L'#'); - if ((flags & allow_escaped_newlines) || - (comment_idx != wcstring::npos && escaped_nl < comment_idx)) { - // Emit a space before the escaped newline. - if (!at_line_start() && !has_preceding_space()) { - output.append(L" "); - } - output.append(L"\\\n"); - // Indent the continuation line and any leading comments (#7252). - // Use the indentation level of the next newline. - current_indent = indents.at(range.start + escaped_nl + 1); - emit_space_or_indent(); - } - } - - // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we - // always emit one. - bool needs_nl = false; - - auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); - while (auto tok = tokenizer->next()) { - wcstring tok_text = *tokenizer->text_of(*tok); - - if (needs_nl) { - emit_newline(); - needs_nl = false; - if (tok_text == L"\n") continue; - } else if (gap_text_mask_newline) { - // We only respect mask_newline the first time through the loop. - gap_text_mask_newline = false; - if (tok_text == L"\n") continue; - } - - if (tok->type_ == token_type_t::comment) { - emit_space_or_indent(); - output.append(tok_text); - needs_nl = true; - } else if (tok->type_ == token_type_t::end) { - // This may be either a newline or semicolon. - // Semicolons found here are not part of the ast and can simply be removed. - // Newlines are preserved unless mask_newline is set. - if (tok_text == L"\n") { - emit_newline(); - } - } else { - fprintf(stderr, - "Gap text should only have comments and newlines - instead found token " - "type %d with text: %ls\n", - (int)tok->type_, tok_text.c_str()); - DIE("Gap text should only have comments and newlines"); - } - } - if (needs_nl) emit_newline(); - return needs_nl; -} - -void pretty_printer_t::emit_space_or_indent(gap_flags_t flags) { - if (at_line_start()) { - output.append(SPACES_PER_INDENT * current_indent, L' '); - } else if (!(flags & skip_space) && !has_preceding_space()) { - output.append(1, L' '); - } -} - -std::vector pretty_printer_t::compute_preferred_semi_locations() const { - std::vector result; - auto mark_semi_from_input = [&](const semi_nl_t &n) { - if (n.ptr()->has_source() && substr(n.range()) == L";") { - result.push_back(n.range().start); - } - }; - - // andor_job_lists get semis if the input uses semis. - for (auto ast_traversal = new_ast_traversal(*ast->top());;) { - auto node = ast_traversal->next(); - if (!node->has_value()) break; - // See if we have a condition and an andor_job_list. - const semi_nl_t *condition = nullptr; - const andor_job_list_t *andors = nullptr; - if (const auto *ifc = node->try_as_if_clause()) { - if (ifc->condition().has_semi_nl()) { - condition = &ifc->condition().semi_nl(); - } - andors = &ifc->andor_tail(); - } else if (const auto *wc = node->try_as_while_header()) { - if (wc->condition().has_semi_nl()) { - condition = &wc->condition().semi_nl(); - } - andors = &wc->andor_tail(); - } - - // If there is no and-or tail then we always use a newline. - if (andors && andors->count() > 0) { - if (condition) mark_semi_from_input(*condition); - // Mark all but last of the andor list. - for (uint32_t i = 0; i + 1 < andors->count(); i++) { - mark_semi_from_input(andors->at(i)->job().semi_nl()); - } - } - } - - // `x ; and y` gets semis if it has them already, and they are on the same line. - for (auto ast_traversal = new_ast_traversal(*ast->top());;) { - auto node = ast_traversal->next(); - if (!node->has_value()) break; - if (const auto *job_list = node->try_as_job_list()) { - const semi_nl_t *prev_job_semi_nl = nullptr; - for (size_t i = 0; i < job_list->count(); i++) { - const job_conjunction_t &job = *job_list->at(i); - // Set up prev_job_semi_nl for the next iteration to make control flow easier. - const semi_nl_t *prev = prev_job_semi_nl; - prev_job_semi_nl = job.has_semi_nl() ? &job.semi_nl() : nullptr; - - // Is this an 'and' or 'or' job? - if (!job.has_decorator()) continue; - - // Now see if we want to mark 'prev' as allowing a semi. - // Did we have a previous semi_nl which was a newline? - if (!prev || substr(prev->range()) != L";") continue; - - // Is there a newline between them? - assert(prev->range().start <= job.decorator().range().start && - "Ranges out of order"); - auto start = source.begin() + prev->range().start; - auto end = source.begin() + job.decorator().range().end(); - if (std::find(start, end, L'\n') == end) { - // We're going to allow the previous semi_nl to be a semi. - result.push_back(prev->range().start); - } - } - } - } - std::sort(result.begin(), result.end()); - return result; -} diff --git a/src/fish_indent_common.h b/src/fish_indent_common.h deleted file mode 100644 index 67446b2be..000000000 --- a/src/fish_indent_common.h +++ /dev/null @@ -1,160 +0,0 @@ -#ifndef FISH_INDENT_STAGING_H -#define FISH_INDENT_STAGING_H - -#include "ast.h" -#include "common.h" -#include "cxx.h" - -struct PrettyPrinter; -struct pretty_printer_t { - // Note: this got somewhat more complicated after introducing the new AST, because that AST no - // longer encodes detailed lexical information (e.g. every newline). This feels more complex - // than necessary and would probably benefit from a more layered approach where we identify - // certain runs, weight line breaks, have a cost model, etc. - pretty_printer_t(const wcstring &src, bool do_indent); - - // Original source. - const wcstring &source; - - // The indents of our string. - // This has the same length as 'source' and describes the indentation level. - const std::vector indents; - - // The parsed ast. - rust::Box ast; - - rust::Box visitor; - - // The prettifier output. - wcstring output; - - // The indent of the source range which we are currently emitting. - int current_indent{0}; - - // Whether to indent, or just insert spaces. - const bool do_indent; - - // Whether the next gap text should hide the first newline. - bool gap_text_mask_newline{false}; - - // The "gaps": a sorted set of ranges between tokens. - // These contain whitespace, comments, semicolons, and other lexical elements which are not - // present in the ast. - const std::vector gaps; - - // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines. - // This is computed ahead of time for convenience. - const std::vector preferred_semi_locations; - - // Flags we support. - using gap_flags_t = uint32_t; - enum { - default_flags = 0, - - // Whether to allow line splitting via escaped newlines. - // For example, in argument lists: - // - // echo a \ - // b - // - // If this is not set, then split-lines will be joined. - allow_escaped_newlines = 1 << 0, - - // Whether to require a space before this token. - // This is used when emitting semis: - // echo a; echo b; - // No space required between 'a' and ';', or 'b' and ';'. - skip_space = 1 << 1, - }; - -#if INCLUDE_RUST_HEADERS - // \return gap text flags for the gap text that comes *before* a given node type. - static gap_flags_t gap_text_flags_before_node(const ast::node_t &node); -#endif - - // \return whether we are at the start of a new line. - bool at_line_start() const { return output.empty() || output.back() == L'\n'; } - - // \return whether we have a space before the output. - // This ignores escaped spaces and escaped newlines. - bool has_preceding_space() const; - - // Entry point. Prettify our source code and return it. - wcstring prettify(); - - // \return a substring of source. - wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); } - - // Return the gap ranges from our ast. - std::vector compute_gaps() const; - - // Return sorted list of semi-preferring semi_nl nodes. - std::vector compute_preferred_semi_locations() const; - - // Emit a space or indent as necessary, depending on the previous output. - void emit_space_or_indent(gap_flags_t flags = default_flags); - - // Emit "gap text:" newlines and comments from the original source. - // Gap text may be a few things: - // - // 1. Just a space is common. We will trim the spaces to be empty. - // - // Here the gap text is the comment, followed by the newline: - // - // echo abc # arg - // echo def - // - // 2. It may also be an escaped newline: - // Here the gap text is a space, backslash, newline, space. - // - // echo \ - // hi - // - // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe: - // - // begin | stuff - // - // We do not handle errors here - instead our caller does. - bool emit_gap_text(source_range_t range, gap_flags_t flags); - - /// \return the gap text ending at a given index into the string, or empty if none. - source_range_t gap_text_to(uint32_t end) const; - - /// \return whether a range \p r overlaps an error range from our ast. - bool range_contained_error(source_range_t r) const; - - // Emit the gap text before a source range. - bool emit_gap_text_before(source_range_t r, gap_flags_t flags); - - /// Given a string \p input, remove unnecessary quotes, etc. - wcstring clean_text(const wcstring &input); - - // Emit a range of original text. This indents as needed, and also inserts preceding gap text. - // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such - // lines. - void emit_text(source_range_t r, gap_flags_t flags); - - void emit_node_text(const void *node); - - // Emit one newline. - void emit_newline() { output.push_back(L'\n'); } - - // Emit a semicolon. - void emit_semi() { output.push_back(L';'); } - - void visit_semi_nl(const void *node_); - - void visit_redirection(const void *node_); - - void visit_maybe_newlines(const void *node_); - - void visit_begin_header(); - - // The flags we use to parse. - static parse_tree_flags_t parse_flags() { - return parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_leave_unterminated | parse_flag_show_blank_lines; - } -}; - -#endif // FISH_INDENT_STAGING_H