Print non-printable characters using caret notation (#2443)

When the new flag is set, non-printable characters are printed using caret notation.
This commit is contained in:
einfachIrgendwer0815 2023-03-14 22:21:30 +01:00 committed by GitHub
parent c5602f9766
commit 8f99a78cf1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 151 additions and 37 deletions

View file

@ -4,6 +4,7 @@
- Implemented `-S` and `--chop-long-lines` flags as aliases for `--wrap=never`. See #2309 (@johnmatthiggins)
- Breaking change: Environment variables can now override config file settings (but command-line arguments still have the highest precedence), see #1152, #1281, and #2381 (@aaronkollasch)
- Implemented `--nonprintable-notation=caret` to support showing non-printable characters using caret notation. See #2429 (@einfachIrgendwer0815)
## Bugfixes

View file

@ -13,6 +13,13 @@ Options:
Show non-printable characters like space, tab or newline. This option can also be used to
print binary files. Use '--tabs' to control the width of the tab-placeholders.
--nonprintable-notation <notation>
Set notation for non-printable characters.
Possible values:
* unicode (␇, ␊, ␀, ..)
* caret (^G, ^J, ^@, ..)
-p, --plain...
Only show plain style, no decorations. This is an alias for '--style=plain'. When '-p' is
used twice ('-pp'), it also disables automatic paging (alias for '--style=plain

View file

@ -7,30 +7,50 @@ Arguments:
[FILE]... File(s) to print / concatenate. Use '-' for standard input.
Options:
-A, --show-all Show non-printable characters (space, tab, newline, ..).
-p, --plain... Show plain style (alias for '--style=plain').
-l, --language <language> Set the language for syntax highlighting.
-H, --highlight-line <N:M> Highlight lines N through M.
--file-name <name> Specify the name to display for a file.
-d, --diff Only show lines that have been added/removed/modified.
--tabs <T> Set the tab width to T spaces.
--wrap <mode> Specify the text-wrapping mode (*auto*, never, character).
-S, --chop-long-lines Truncate all lines longer than screen width. Alias for
'--wrap=never'.
-n, --number Show line numbers (alias for '--style=numbers').
--color <when> When to use colors (*auto*, never, always).
--italic-text <when> Use italics in output (always, *never*)
--decorations <when> When to show the decorations (*auto*, never, always).
--paging <when> Specify when to use the pager, or use `-P` to disable (*auto*,
never, always).
-m, --map-syntax <glob:syntax> Use the specified syntax for files matching the glob pattern
('*.cpp:C++').
--theme <theme> Set the color theme for syntax highlighting.
--list-themes Display all supported highlighting themes.
--style <components> Comma-separated list of style elements to display (*default*,
auto, full, plain, changes, header, header-filename,
header-filesize, grid, rule, numbers, snip).
-r, --line-range <N:M> Only print the lines from N to M.
-L, --list-languages Display all supported languages.
-h, --help Print help information (use `--help` for more detail)
-V, --version Print version information
-A, --show-all
Show non-printable characters (space, tab, newline, ..).
--nonprintable-notation <notation>
Set notation for non-printable characters.
-p, --plain...
Show plain style (alias for '--style=plain').
-l, --language <language>
Set the language for syntax highlighting.
-H, --highlight-line <N:M>
Highlight lines N through M.
--file-name <name>
Specify the name to display for a file.
-d, --diff
Only show lines that have been added/removed/modified.
--tabs <T>
Set the tab width to T spaces.
--wrap <mode>
Specify the text-wrapping mode (*auto*, never, character).
-S, --chop-long-lines
Truncate all lines longer than screen width. Alias for '--wrap=never'.
-n, --number
Show line numbers (alias for '--style=numbers').
--color <when>
When to use colors (*auto*, never, always).
--italic-text <when>
Use italics in output (always, *never*)
--decorations <when>
When to show the decorations (*auto*, never, always).
--paging <when>
Specify when to use the pager, or use `-P` to disable (*auto*, never, always).
-m, --map-syntax <glob:syntax>
Use the specified syntax for files matching the glob pattern ('*.cpp:C++').
--theme <theme>
Set the color theme for syntax highlighting.
--list-themes
Display all supported highlighting themes.
--style <components>
Comma-separated list of style elements to display (*default*, auto, full, plain, changes,
header, header-filename, header-filesize, grid, rule, numbers, snip).
-r, --line-range <N:M>
Only print the lines from N to M.
-L, --list-languages
Display all supported languages.
-h, --help
Print help information (use `--help` for more detail)
-V, --version
Print version information

View file

@ -21,7 +21,7 @@ use bat::{
input::Input,
line_range::{HighlightedLineRanges, LineRange, LineRanges},
style::{StyleComponent, StyleComponents},
MappingTarget, PagingMode, SyntaxMapping, WrappingMode,
MappingTarget, NonprintableNotation, PagingMode, SyntaxMapping, WrappingMode,
};
fn is_truecolor_terminal() -> bool {
@ -173,6 +173,15 @@ impl App {
}
}),
show_nonprintable: self.matches.get_flag("show-all"),
nonprintable_notation: match self
.matches
.get_one::<String>("nonprintable-notation")
.map(|s| s.as_str())
{
Some("unicode") => NonprintableNotation::Unicode,
Some("caret") => NonprintableNotation::Caret,
_ => unreachable!("other values for --nonprintable-notation are not allowed"),
},
wrapping_mode: if self.interactive_output || maybe_term_width.is_some() {
if !self.matches.get_flag("chop-long-lines") {
match self.matches.get_one::<String>("wrap").map(|s| s.as_str()) {

View file

@ -59,6 +59,22 @@ pub fn build_app(interactive_output: bool) -> Command {
Use '--tabs' to control the width of the tab-placeholders.",
),
)
.arg(
Arg::new("nonprintable-notation")
.long("nonprintable-notation")
.action(ArgAction::Set)
.default_value("unicode")
.value_parser(["unicode", "caret"])
.value_name("notation")
.hide_default_value(true)
.help("Set notation for non-printable characters.")
.long_help(
"Set notation for non-printable characters.\n\n\
Possible values:\n \
* unicode (, , , ..)\n \
* caret (^G, ^J, ^@, ..)",
),
)
.arg(
Arg::new("plain")
.overrides_with("plain")

View file

@ -1,4 +1,5 @@
use crate::line_range::{HighlightedLineRanges, LineRanges};
use crate::nonprintable_notation::NonprintableNotation;
#[cfg(feature = "paging")]
use crate::paging::PagingMode;
use crate::style::StyleComponents;
@ -39,6 +40,9 @@ pub struct Config<'a> {
/// Whether or not to show/replace non-printable characters like space, tab and newline.
pub show_nonprintable: bool,
/// The configured notation for non-printable characters
pub nonprintable_notation: NonprintableNotation,
/// The character width of the terminal
pub term_width: usize,

View file

@ -35,6 +35,7 @@ pub mod error;
pub mod input;
mod less;
pub mod line_range;
pub(crate) mod nonprintable_notation;
mod output;
#[cfg(feature = "paging")]
mod pager;
@ -49,6 +50,7 @@ mod terminal;
mod vscreen;
pub(crate) mod wrapping;
pub use nonprintable_notation::NonprintableNotation;
pub use pretty_printer::{Input, PrettyPrinter, Syntax};
pub use syntax_mapping::{MappingTarget, SyntaxMapping};
pub use wrapping::WrappingMode;

View file

@ -0,0 +1,12 @@
/// How to print non-printable characters with
/// [crate::config::Config::show_nonprintable]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum NonprintableNotation {
/// Use caret notation (^G, ^J, ^@, ..)
Caret,
/// Use unicode notation (␇, ␊, ␀, ..)
#[default]
Unicode,
}

View file

@ -2,6 +2,8 @@ use std::fmt::Write;
use console::AnsiCodeIterator;
use crate::nonprintable_notation::NonprintableNotation;
/// Expand tabs like an ANSI-enabled expand(1).
pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String {
let mut buffer = String::with_capacity(line.len() * 2);
@ -49,7 +51,11 @@ fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
}
pub fn replace_nonprintable(input: &[u8], tab_width: usize) -> String {
pub fn replace_nonprintable(
input: &[u8],
tab_width: usize,
nonprintable_notation: NonprintableNotation,
) -> String {
let mut output = String::new();
let tab_width = if tab_width == 0 { 4 } else { tab_width };
@ -79,19 +85,37 @@ pub fn replace_nonprintable(input: &[u8], tab_width: usize) -> String {
}
// line feed
'\x0A' => {
output.push_str("\x0A");
output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^J\x0A",
NonprintableNotation::Unicode => "\x0A",
});
line_idx = 0;
}
// carriage return
'\x0D' => output.push('␍'),
'\x0D' => output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^M",
NonprintableNotation::Unicode => "",
}),
// null
'\x00' => output.push('␀'),
'\x00' => output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^@",
NonprintableNotation::Unicode => "",
}),
// bell
'\x07' => output.push('␇'),
'\x07' => output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^G",
NonprintableNotation::Unicode => "",
}),
// backspace
'\x08' => output.push('␈'),
'\x08' => output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^H",
NonprintableNotation::Unicode => "",
}),
// escape
'\x1B' => output.push('␛'),
'\x1B' => output.push_str(match nonprintable_notation {
NonprintableNotation::Caret => "^[",
NonprintableNotation::Unicode => "",
}),
// printable ASCII
c if c.is_ascii_alphanumeric()
|| c.is_ascii_punctuation()

View file

@ -93,7 +93,11 @@ impl<'a> Printer for SimplePrinter<'a> {
) -> Result<()> {
if !out_of_range {
if self.config.show_nonprintable {
let line = replace_nonprintable(line_buffer, self.config.tab_width);
let line = replace_nonprintable(
line_buffer,
self.config.tab_width,
self.config.nonprintable_notation,
);
write!(handle, "{}", line)?;
} else {
handle.write_all(line_buffer)?
@ -422,7 +426,11 @@ impl<'a> Printer for InteractivePrinter<'a> {
line_buffer: &[u8],
) -> Result<()> {
let line = if self.config.show_nonprintable {
replace_nonprintable(line_buffer, self.config.tab_width)
replace_nonprintable(
line_buffer,
self.config.tab_width,
self.config.nonprintable_notation,
)
} else {
let line = match self.content_type {
Some(ContentType::BINARY) | None => {

View file

@ -1623,6 +1623,17 @@ fn show_all_extends_tab_markers_to_next_tabstop_width_8() {
);
}
#[test]
fn show_all_with_caret_notation() {
bat()
.arg("--show-all")
.arg("--nonprintable-notation=caret")
.arg("nonprintable.txt")
.assert()
.stdout("hello·world^J\n├──┤^M^@^G^H^[")
.stderr("");
}
#[test]
fn no_paging_arg() {
bat()