Merge pull request #2999 from eth-p/strip-ansi-from-input-option

Add option to remove ANSI escape sequences from bat's input.
This commit is contained in:
Ethan P 2024-06-17 18:37:22 -07:00 committed by GitHub
commit a7a9727c11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 274 additions and 16 deletions

View file

@ -7,6 +7,7 @@
- `bat --squeeze-limit` to set the maximum number of empty consecutive when using `--squeeze-blank`, see #1441 (@eth-p) and #2665 (@einfachIrgendwer0815)
- `PrettyPrinter::squeeze_empty_lines` to support line squeezing for bat as a library, see #1441 (@eth-p) and #2665 (@einfachIrgendwer0815)
- Syntax highlighting for JavaScript files that start with `#!/usr/bin/env bun` #2913 (@sharunkumar)
- `bat --strip-ansi={never,always,auto}` to remove ANSI escape sequences from bat's input, see #2999 (@eth-p)
## Bugfixes

View file

@ -759,9 +759,14 @@ bat() {
If an input file contains color codes or other ANSI escape sequences or control characters, `bat` will have problems
performing syntax highlighting and text wrapping, and thus the output can become garbled.
When displaying such files it is recommended to disable both syntax highlighting and wrapping by
If your version of `bat` supports the `--strip-ansi=auto` option, it can be used to remove such sequences
before syntax highlighting. Alternatively, you may disable both syntax highlighting and wrapping by
passing the `--color=never --wrap=never` options to `bat`.
> [!NOTE]
> The `auto` option of `--strip-ansi` avoids removing escape sequences when the syntax is plain text.
### Terminals & colors
`bat` handles terminals *with* and *without* truecolor support. However, the colors in most syntax

View file

@ -122,6 +122,11 @@ Options:
--squeeze-limit <squeeze-limit>
Set the maximum number of consecutive empty lines to be printed.
--strip-ansi <when>
Specify when to strip ANSI escape sequences from the input. The automatic mode will remove
escape sequences unless the syntax highlighting language is plain text. Possible values:
auto, always, *never*.
--style <components>
Configure which elements (line numbers, file headers, grid borders, Git modifications, ..)
to display in addition to the file contents. The argument is a comma-separated list of

View file

@ -7,6 +7,7 @@ use crate::{
clap_app,
config::{get_args_from_config_file, get_args_from_env_opts_var, get_args_from_env_vars},
};
use bat::StripAnsiMode;
use clap::ArgMatches;
use console::Term;
@ -242,6 +243,16 @@ impl App {
4
},
),
strip_ansi: match self
.matches
.get_one::<String>("strip-ansi")
.map(|s| s.as_str())
{
Some("never") => StripAnsiMode::Never,
Some("always") => StripAnsiMode::Always,
Some("auto") => StripAnsiMode::Auto,
_ => unreachable!("other values for --strip-ansi are not allowed"),
},
theme: self
.matches
.get_one::<String>("theme")

View file

@ -402,6 +402,20 @@ pub fn build_app(interactive_output: bool) -> Command {
.long_help("Set the maximum number of consecutive empty lines to be printed.")
.hide_short_help(true)
)
.arg(
Arg::new("strip-ansi")
.long("strip-ansi")
.overrides_with("strip-ansi")
.value_name("when")
.value_parser(["auto", "always", "never"])
.default_value("never")
.hide_default_value(true)
.help("Strip colors from the input (auto, always, *never*)")
.long_help("Specify when to strip ANSI escape sequences from the input. \
The automatic mode will remove escape sequences unless the syntax highlighting \
language is plain text. Possible values: auto, always, *never*.")
.hide_short_help(true)
)
.arg(
Arg::new("style")
.long("style")

View file

@ -5,6 +5,7 @@ use crate::paging::PagingMode;
use crate::style::StyleComponents;
use crate::syntax_mapping::SyntaxMapping;
use crate::wrapping::WrappingMode;
use crate::StripAnsiMode;
#[derive(Debug, Clone)]
pub enum VisibleLines {
@ -100,6 +101,9 @@ pub struct Config<'a> {
/// The maximum number of consecutive empty lines to display
pub squeeze_lines: Option<usize>,
// Weather or not to set terminal title when using a pager
pub strip_ansi: StripAnsiMode,
}
#[cfg(all(feature = "minimal-application", feature = "paging"))]

View file

@ -53,6 +53,7 @@ mod vscreen;
pub(crate) mod wrapping;
pub use nonprintable_notation::NonprintableNotation;
pub use preprocessor::StripAnsiMode;
pub use pretty_printer::{Input, PrettyPrinter, Syntax};
pub use syntax_mapping::{MappingTarget, SyntaxMapping};
pub use wrapping::WrappingMode;

View file

@ -136,6 +136,27 @@ pub fn replace_nonprintable(
output
}
/// Strips ANSI escape sequences from the input.
pub fn strip_ansi(line: &str) -> String {
let mut buffer = String::with_capacity(line.len());
for seq in EscapeSequenceOffsetsIterator::new(line) {
if let EscapeSequenceOffsets::Text { .. } = seq {
buffer.push_str(&line[seq.index_of_start()..seq.index_past_end()]);
}
}
buffer
}
#[derive(Debug, PartialEq, Clone, Copy, Default)]
pub enum StripAnsiMode {
#[default]
Never,
Always,
Auto,
}
#[test]
fn test_try_parse_utf8_char() {
assert_eq!(try_parse_utf8_char(&[0x20]), Some((' ', 1)));
@ -179,3 +200,14 @@ fn test_try_parse_utf8_char() {
assert_eq!(try_parse_utf8_char(&[0xef, 0x20]), None);
assert_eq!(try_parse_utf8_char(&[0xf0, 0xf0]), None);
}
#[test]
fn test_strip_ansi() {
// The sequence detection is covered by the tests in the vscreen module.
assert_eq!(strip_ansi("no ansi"), "no ansi");
assert_eq!(strip_ansi("\x1B[33mone"), "one");
assert_eq!(
strip_ansi("\x1B]1\x07multiple\x1B[J sequences"),
"multiple sequences"
);
}

View file

@ -11,7 +11,7 @@ use crate::{
input,
line_range::{HighlightedLineRanges, LineRange, LineRanges},
style::StyleComponent,
SyntaxMapping, WrappingMode,
StripAnsiMode, SyntaxMapping, WrappingMode,
};
#[cfg(feature = "paging")]
@ -182,6 +182,15 @@ impl<'a> PrettyPrinter<'a> {
self
}
/// Whether to remove ANSI escape sequences from the input (default: never)
///
/// If `Auto` is used, escape sequences will only be removed when the input
/// is not plain text.
pub fn strip_ansi(&mut self, mode: StripAnsiMode) -> &mut Self {
self.config.strip_ansi = mode;
self
}
/// Text wrapping mode (default: do not wrap)
pub fn wrapping_mode(&mut self, mode: WrappingMode) -> &mut Self {
self.config.wrapping_mode = mode;

View file

@ -29,11 +29,13 @@ use crate::diff::LineChanges;
use crate::error::*;
use crate::input::OpenedInput;
use crate::line_range::RangeCheckResult;
use crate::preprocessor::strip_ansi;
use crate::preprocessor::{expand_tabs, replace_nonprintable};
use crate::style::StyleComponent;
use crate::terminal::{as_terminal_escaped, to_ansi_color};
use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator};
use crate::wrapping::WrappingMode;
use crate::StripAnsiMode;
const ANSI_UNDERLINE_ENABLE: EscapeSequence = EscapeSequence::CSI {
raw_sequence: "\x1B[4m",
@ -207,6 +209,7 @@ pub(crate) struct InteractivePrinter<'a> {
highlighter_from_set: Option<HighlighterFromSet<'a>>,
background_color_highlight: Option<Color>,
consecutive_empty_lines: usize,
strip_ansi: bool,
}
impl<'a> InteractivePrinter<'a> {
@ -265,20 +268,41 @@ impl<'a> InteractivePrinter<'a> {
.content_type
.map_or(false, |c| c.is_binary() && !config.show_nonprintable);
let highlighter_from_set = if is_printing_binary || !config.colored_output {
None
} else {
// Determine the type of syntax for highlighting
let syntax_in_set =
match assets.get_syntax(config.language, input, &config.syntax_mapping) {
Ok(syntax_in_set) => syntax_in_set,
Err(Error::UndetectedSyntax(_)) => assets
.find_syntax_by_name("Plain Text")?
.expect("A plain text syntax is available"),
Err(e) => return Err(e),
};
let needs_to_match_syntax = !is_printing_binary
&& (config.colored_output || config.strip_ansi == StripAnsiMode::Auto);
Some(HighlighterFromSet::new(syntax_in_set, theme))
let (is_plain_text, highlighter_from_set) = if needs_to_match_syntax {
// Determine the type of syntax for highlighting
const PLAIN_TEXT_SYNTAX: &str = "Plain Text";
match assets.get_syntax(config.language, input, &config.syntax_mapping) {
Ok(syntax_in_set) => (
syntax_in_set.syntax.name == PLAIN_TEXT_SYNTAX,
Some(HighlighterFromSet::new(syntax_in_set, theme)),
),
Err(Error::UndetectedSyntax(_)) => (
true,
Some(
assets
.find_syntax_by_name(PLAIN_TEXT_SYNTAX)?
.map(|s| HighlighterFromSet::new(s, theme))
.expect("A plain text syntax is available"),
),
),
Err(e) => return Err(e),
}
} else {
(false, None)
};
// Determine when to strip ANSI sequences
let strip_ansi = match config.strip_ansi {
_ if config.show_nonprintable => false,
StripAnsiMode::Always => true,
StripAnsiMode::Auto if is_plain_text => false, // Plain text may already contain escape sequences.
StripAnsiMode::Auto => true,
_ => false,
};
Ok(InteractivePrinter {
@ -293,6 +317,7 @@ impl<'a> InteractivePrinter<'a> {
highlighter_from_set,
background_color_highlight,
consecutive_empty_lines: 0,
strip_ansi,
})
}
@ -573,7 +598,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
)
.into()
} else {
match self.content_type {
let mut line = match self.content_type {
Some(ContentType::BINARY) | None => {
return Ok(());
}
@ -590,7 +615,14 @@ impl<'a> Printer for InteractivePrinter<'a> {
line
}
}
};
// If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting.
if self.strip_ansi {
line = strip_ansi(&line).into()
}
line
};
let regions = self.highlight_regions_for_line(&line)?;

View file

@ -2666,3 +2666,147 @@ fn highlighting_independant_from_map_syntax_case() {
.stdout(expected)
.stderr("");
}
#[test]
fn strip_ansi_always_strips_ansi() {
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=always")
.write_stdin("\x1B[33mYellow\x1B[m")
.assert()
.success()
.stdout("Yellow");
}
#[test]
fn strip_ansi_never_does_not_strip_ansi() {
let output = String::from_utf8(
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=never")
.write_stdin("\x1B[33mYellow\x1B[m")
.assert()
.success()
.get_output()
.stdout
.clone(),
)
.expect("valid utf8");
assert!(output.contains("\x1B[33mYellow"))
}
#[test]
fn strip_ansi_does_not_affect_simple_printer() {
let output = String::from_utf8(
bat()
.arg("--style=plain")
.arg("--decorations=never")
.arg("--color=never")
.arg("--strip-ansi=always")
.write_stdin("\x1B[33mYellow\x1B[m")
.assert()
.success()
.get_output()
.stdout
.clone(),
)
.expect("valid utf8");
assert!(output.contains("\x1B[33mYellow"))
}
#[test]
fn strip_ansi_does_not_strip_when_show_nonprintable() {
let output = String::from_utf8(
bat()
.arg("--style=plain")
.arg("--decorations=never")
.arg("--color=always")
.arg("--strip-ansi=always")
.arg("--show-nonprintable")
.write_stdin("\x1B[33mY")
.assert()
.success()
.get_output()
.stdout
.clone(),
)
.expect("valid utf8");
assert!(output.contains(""))
}
#[test]
fn strip_ansi_auto_strips_ansi_when_detected_syntax_by_filename() {
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=auto")
.arg("--file-name=test.rs")
.write_stdin("fn \x1B[33mYellow\x1B[m() -> () {}")
.assert()
.success()
.stdout("fn Yellow() -> () {}");
}
#[test]
fn strip_ansi_auto_strips_ansi_when_provided_syntax_by_option() {
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=auto")
.arg("--language=rust")
.write_stdin("fn \x1B[33mYellow\x1B[m() -> () {}")
.assert()
.success()
.stdout("fn Yellow() -> () {}");
}
#[test]
fn strip_ansi_auto_does_not_strip_when_plain_text_by_filename() {
let output = String::from_utf8(
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=auto")
.arg("--file-name=ansi.txt")
.write_stdin("\x1B[33mYellow\x1B[m")
.assert()
.success()
.get_output()
.stdout
.clone(),
)
.expect("valid utf8");
assert!(output.contains("\x1B[33mYellow"))
}
#[test]
fn strip_ansi_auto_does_not_strip_ansi_when_plain_text_by_option() {
let output = String::from_utf8(
bat()
.arg("--style=plain")
.arg("--decorations=always")
.arg("--color=never")
.arg("--strip-ansi=auto")
.arg("--language=txt")
.write_stdin("\x1B[33mYellow\x1B[m")
.assert()
.success()
.get_output()
.stdout
.clone(),
)
.expect("valid utf8");
assert!(output.contains("\x1B[33mYellow"))
}