Refactor ansi stripping into nu-utils functions (#6966)

Allows use of slightly optimized variants that check if they have to use
the heavier vte parser. Tries to avoid unnnecessary allocations. Initial
performance characteristics proven out in #4378.

Also reduces boilerplate with right-ward drift.
This commit is contained in:
Stefan Holderbach 2022-11-04 19:49:45 +01:00 committed by GitHub
parent b9195c2668
commit 2c4048eb43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 126 additions and 134 deletions

8
Cargo.lock generated
View file

@ -2535,7 +2535,6 @@ dependencies = [
"percent-encoding", "percent-encoding",
"reedline", "reedline",
"rstest", "rstest",
"strip-ansi-escapes",
"sysinfo", "sysinfo",
"thiserror", "thiserror",
] ]
@ -2630,7 +2629,6 @@ dependencies = [
"sha2", "sha2",
"shadow-rs", "shadow-rs",
"sqlparser", "sqlparser",
"strip-ansi-escapes",
"sysinfo", "sysinfo",
"terminal_size 0.2.1", "terminal_size 0.2.1",
"thiserror", "thiserror",
@ -2657,7 +2655,6 @@ dependencies = [
"nu-path", "nu-path",
"nu-protocol", "nu-protocol",
"nu-utils", "nu-utils",
"strip-ansi-escapes",
"sysinfo", "sysinfo",
] ]
@ -2777,8 +2774,8 @@ dependencies = [
"json_to_table", "json_to_table",
"nu-ansi-term", "nu-ansi-term",
"nu-protocol", "nu-protocol",
"nu-utils",
"serde_json", "serde_json",
"strip-ansi-escapes",
"tabled", "tabled",
] ]
@ -2786,7 +2783,7 @@ dependencies = [
name = "nu-term-grid" name = "nu-term-grid"
version = "0.70.1" version = "0.70.1"
dependencies = [ dependencies = [
"strip-ansi-escapes", "nu-utils",
"unicode-width", "unicode-width",
] ]
@ -2811,6 +2808,7 @@ dependencies = [
"crossterm_winapi", "crossterm_winapi",
"lscolors", "lscolors",
"num-format", "num-format",
"strip-ansi-escapes",
"sys-locale", "sys-locale",
] ]

View file

@ -32,7 +32,6 @@ lazy_static = "1.4.0"
log = "0.4" log = "0.4"
miette = { version = "5.1.0", features = ["fancy-no-backtrace"] } miette = { version = "5.1.0", features = ["fancy-no-backtrace"] }
percent-encoding = "2" percent-encoding = "2"
strip-ansi-escapes = "0.1.1"
sysinfo = "0.26.2" sysinfo = "0.26.2"
thiserror = "1.0.31" thiserror = "1.0.31"

View file

@ -24,7 +24,6 @@ use std::{
sync::atomic::Ordering, sync::atomic::Ordering,
time::Instant, time::Instant,
}; };
use strip_ansi_escapes::strip;
use sysinfo::SystemExt; use sysinfo::SystemExt;
// According to Daniel Imms @Tyriar, we need to do these this way: // According to Daniel Imms @Tyriar, we need to do these this way:
@ -140,15 +139,7 @@ pub fn evaluate_repl(
if use_ansi { if use_ansi {
println!("{}", banner); println!("{}", banner);
} else { } else {
let stripped_string = { println!("{}", nu_utils::strip_ansi_string_likely(banner));
if let Ok(bytes) = strip(&banner) {
String::from_utf8_lossy(&bytes).to_string()
} else {
banner
}
};
println!("{}", stripped_string);
} }
} }

View file

@ -77,7 +77,6 @@ serde_yaml = "0.9.4"
sha2 = "0.10.0" sha2 = "0.10.0"
# Disable default features b/c the default features build Git (very slow to compile) # Disable default features b/c the default features build Git (very slow to compile)
shadow-rs = { version = "0.16.1", default-features = false } shadow-rs = { version = "0.16.1", default-features = false }
strip-ansi-escapes = "0.1.1"
sysinfo = "0.26.2" sysinfo = "0.26.2"
terminal_size = "0.2.1" terminal_size = "0.2.1"
thiserror = "1.0.31" thiserror = "1.0.31"

View file

@ -363,15 +363,12 @@ pub fn highlight_search_string(
} }
}; };
// strip haystack to remove existing ansi style // strip haystack to remove existing ansi style
let stripped_haystack: String = match strip_ansi_escapes::strip(haystack) { let stripped_haystack = nu_utils::strip_ansi_likely(haystack);
Ok(i) => String::from_utf8(i).unwrap_or_else(|_| String::from(haystack)),
Err(_) => String::from(haystack),
};
let mut last_match_end = 0; let mut last_match_end = 0;
let style = Style::new().fg(White).on(Red); let style = Style::new().fg(White).on(Red);
let mut highlighted = String::new(); let mut highlighted = String::new();
for cap in regex.captures_iter(stripped_haystack.as_str()) { for cap in regex.captures_iter(stripped_haystack.as_ref()) {
match cap { match cap {
Ok(capture) => { Ok(capture) => {
let start = match capture.get(0) { let start = match capture.get(0) {

View file

@ -248,14 +248,8 @@ fn nu_value_to_string(value: Value, separator: &str, config: &Config) -> String
} }
Value::String { val, .. } => { Value::String { val, .. } => {
// don't store ansi escape sequences in the database // don't store ansi escape sequences in the database
let stripped = {
match strip_ansi_escapes::strip(&val) {
Ok(item) => String::from_utf8(item).unwrap_or(val),
Err(_) => val,
}
};
// escape single quotes // escape single quotes
stripped.replace('\'', "''") nu_utils::strip_ansi_unlikely(&val).replace('\'', "''")
} }
Value::List { vals: val, .. } => val Value::List { vals: val, .. } => val
.iter() .iter()

View file

@ -67,10 +67,7 @@ impl Command for Cd {
let path_val = { let path_val = {
if let Some(path) = path_val { if let Some(path) = path_val {
Some(Spanned { Some(Spanned {
item: match strip_ansi_escapes::strip(&path.item) { item: nu_utils::strip_ansi_string_unlikely(path.item),
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
span: path.span, span: path.span,
}) })
} else { } else {

View file

@ -73,10 +73,7 @@ impl Command for Cp {
let src: Spanned<String> = call.req(engine_state, stack, 0)?; let src: Spanned<String> = call.req(engine_state, stack, 0)?;
let src = { let src = {
Spanned { Spanned {
item: match strip_ansi_escapes::strip(&src.item) { item: nu_utils::strip_ansi_string_unlikely(src.item),
Ok(item) => String::from_utf8(item).unwrap_or(src.item),
Err(_) => src.item,
},
span: src.span, span: src.span,
} }
}; };

View file

@ -86,10 +86,7 @@ impl Command for Ls {
let pattern_arg = { let pattern_arg = {
if let Some(path) = pattern_arg { if let Some(path) = pattern_arg {
Some(Spanned { Some(Spanned {
item: match strip_ansi_escapes::strip(&path.item) { item: nu_utils::strip_ansi_string_unlikely(path.item),
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
span: path.span, span: path.span,
}) })
} else { } else {

View file

@ -66,10 +66,7 @@ impl Command for Mv {
let spanned_source: Spanned<String> = call.req(engine_state, stack, 0)?; let spanned_source: Spanned<String> = call.req(engine_state, stack, 0)?;
let spanned_source = { let spanned_source = {
Spanned { Spanned {
item: match strip_ansi_escapes::strip(&spanned_source.item) { item: nu_utils::strip_ansi_string_unlikely(spanned_source.item),
Ok(item) => String::from_utf8(item).unwrap_or(spanned_source.item),
Err(_) => spanned_source.item,
},
span: spanned_source.span, span: spanned_source.span,
} }
}; };

View file

@ -53,10 +53,7 @@ impl Command for Open {
let path = { let path = {
if let Some(path_val) = path { if let Some(path_val) = path {
Some(Spanned { Some(Spanned {
item: match strip_ansi_escapes::strip(&path_val.item) { item: nu_utils::strip_ansi_string_unlikely(path_val.item),
Ok(item) => String::from_utf8(item).unwrap_or(path_val.item),
Err(_) => path_val.item,
},
span: path_val.span, span: path_val.span,
}) })
} else { } else {

View file

@ -143,10 +143,7 @@ fn rm(
for (idx, path) in targets.clone().into_iter().enumerate() { for (idx, path) in targets.clone().into_iter().enumerate() {
let corrected_path = Spanned { let corrected_path = Spanned {
item: match strip_ansi_escapes::strip(&path.item) { item: nu_utils::strip_ansi_string_unlikely(path.item),
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
span: path.span, span: path.span,
}; };
let _ = std::mem::replace(&mut targets[idx], corrected_path); let _ = std::mem::replace(&mut targets[idx], corrected_path);

View file

@ -3,7 +3,6 @@ use nu_protocol::{
ast::Call, ast::CellPath, engine::Command, engine::EngineState, engine::Stack, Category, ast::Call, ast::CellPath, engine::Command, engine::EngineState, engine::Stack, Category,
Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use strip_ansi_escapes::strip;
#[derive(Clone)] #[derive(Clone)]
pub struct SubCommand; pub struct SubCommand;
@ -79,15 +78,7 @@ fn operate(
fn action(input: &Value, command_span: &Span) -> Value { fn action(input: &Value, command_span: &Span) -> Value {
match input { match input {
Value::String { val, span } => { Value::String { val, span } => {
let stripped_string = { Value::string(nu_utils::strip_ansi_likely(val).to_string(), *span)
if let Ok(bytes) = strip(&val) {
String::from_utf8_lossy(&bytes).to_string()
} else {
val.to_string()
}
};
Value::string(stripped_string, *span)
} }
other => { other => {
let got = format!("value is {}, not string", other.get_type()); let got = format!("value is {}, not string", other.get_type());

View file

@ -1,5 +1,3 @@
use std::borrow::Cow;
// use super::icons::{icon_for_file, iconify_style_ansi_to_nu}; // use super::icons::{icon_for_file, iconify_style_ansi_to_nu};
use super::icons::icon_for_file; use super::icons::icon_for_file;
use lscolors::Style; use lscolors::Style;
@ -178,26 +176,6 @@ prints out the list properly."#
} }
} }
/// Removes ANSI escape codes and some ASCII control characters
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
fn strip_ansi(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
fn create_grid_output( fn create_grid_output(
items: Vec<(usize, String, String)>, items: Vec<(usize, String, String)>,
call: &Call, call: &Call,
@ -232,7 +210,7 @@ fn create_grid_output(
if header == "name" { if header == "name" {
if color_param { if color_param {
if use_grid_icons { if use_grid_icons {
let no_ansi = strip_ansi(&value); let no_ansi = nu_utils::strip_ansi_unlikely(&value);
let path = std::path::Path::new(no_ansi.as_ref()); let path = std::path::Path::new(no_ansi.as_ref());
let icon = icon_for_file(path, call.head)?; let icon = icon_for_file(path, call.head)?;
let ls_colors_style = ls_colors.style_for_path(path); let ls_colors_style = ls_colors.style_for_path(path);

View file

@ -1394,17 +1394,14 @@ fn render_path_name(
return None; return None;
} }
let stripped_path = match strip_ansi_escapes::strip(path) { let stripped_path = nu_utils::strip_ansi_unlikely(path);
Ok(v) => String::from_utf8(v).unwrap_or_else(|_| path.to_owned()),
Err(_) => path.to_owned(),
};
let (style, has_metadata) = match std::fs::symlink_metadata(&stripped_path) { let (style, has_metadata) = match std::fs::symlink_metadata(stripped_path.as_ref()) {
Ok(metadata) => ( Ok(metadata) => (
ls_colors.style_for_path_with_metadata(&stripped_path, Some(&metadata)), ls_colors.style_for_path_with_metadata(stripped_path.as_ref(), Some(&metadata)),
true, true,
), ),
Err(_) => (ls_colors.style_for_path(&stripped_path), false), Err(_) => (ls_colors.style_for_path(stripped_path.as_ref()), false),
}; };
// clickable links don't work in remote SSH sessions // clickable links don't work in remote SSH sessions
@ -1416,9 +1413,9 @@ fn render_path_name(
// .map(ToNuAnsiStyle::to_nu_ansi_style) // .map(ToNuAnsiStyle::to_nu_ansi_style)
.unwrap_or_default(); .unwrap_or_default();
let full_path = PathBuf::from(&stripped_path) let full_path = PathBuf::from(stripped_path.as_ref())
.canonicalize() .canonicalize()
.unwrap_or_else(|_| PathBuf::from(&stripped_path)); .unwrap_or_else(|_| PathBuf::from(stripped_path.as_ref()));
let full_path_link = make_clickable_link( let full_path_link = make_clickable_link(
full_path.display().to_string(), full_path.display().to_string(),

View file

@ -15,7 +15,6 @@ nu-utils = { path = "../nu-utils", version = "0.70.1" }
chrono = { version="0.4.21", features=["serde"] } chrono = { version="0.4.21", features=["serde"] }
sysinfo = "0.26.2" sysinfo = "0.26.2"
strip-ansi-escapes = "0.1.1"
[features] [features]
plugin = [] plugin = []

View file

@ -170,17 +170,11 @@ fn get_documentation(
long_desc.push('\n'); long_desc.push('\n');
let stripped_string = if config.no_color { if config.no_color {
if let Ok(bytes) = strip_ansi_escapes::strip(&long_desc) { nu_utils::strip_ansi_string_likely(long_desc)
String::from_utf8_lossy(&bytes).to_string()
} else {
long_desc
}
} else { } else {
long_desc long_desc
}; }
stripped_string
} }
// document shape helps showing more useful information // document shape helps showing more useful information

View file

@ -15,7 +15,7 @@ path = "src/main.rs"
[dependencies] [dependencies]
nu-ansi-term = "0.46.0" nu-ansi-term = "0.46.0"
nu-protocol = { path = "../nu-protocol", version = "0.70.1" } nu-protocol = { path = "../nu-protocol", version = "0.70.1" }
strip-ansi-escapes = "0.1.1" nu-utils = { path = "../nu-utils", version = "0.70.1" }
atty = "0.2.14" atty = "0.2.14"
tabled = { version = "0.10.0", features = ["color"], default-features = false } tabled = { version = "0.10.0", features = ["color"], default-features = false }
json_to_table = { version = "0.2.0", features = ["color"] } json_to_table = { version = "0.2.0", features = ["color"] }

View file

@ -181,10 +181,7 @@ fn print_table(table: tabled::Table<Data>, config: &Config) -> String {
// the atty is for when people do ls from vim, there should be no coloring there // the atty is for when people do ls from vim, there should be no coloring there
if !config.use_ansi_coloring || !atty::is(atty::Stream::Stdout) { if !config.use_ansi_coloring || !atty::is(atty::Stream::Stdout) {
// Draw the table without ansi colors // Draw the table without ansi colors
match strip_ansi_escapes::strip(&output) { nu_utils::strip_ansi_string_likely(output)
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
Err(_) => output, // we did our best; so return at least something
}
} else { } else {
// Draw the table with ansi colors // Draw the table with ansi colors
output output

View file

@ -14,4 +14,5 @@ path = "src/main.rs"
[dependencies] [dependencies]
unicode-width = "0.1.9" unicode-width = "0.1.9"
strip-ansi-escapes = "0.1.1"
nu-utils = { path = "../nu-utils", version = "0.70.1" }

View file

@ -91,35 +91,13 @@
//! [`fit_into_width`]: ./struct.Grid.html#method.fit_into_width //! [`fit_into_width`]: ./struct.Grid.html#method.fit_into_width
//! [`GridOptions`]: ./struct.GridOptions.html //! [`GridOptions`]: ./struct.GridOptions.html
use std::borrow::Cow;
use std::cmp::max; use std::cmp::max;
use std::fmt; use std::fmt;
use std::iter::repeat; use std::iter::repeat;
use strip_ansi_escapes;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
/// Removes ANSI escape codes and some ASCII control characters
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
fn strip_ansi(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
fn unicode_width_strip_ansi(astring: &str) -> usize { fn unicode_width_strip_ansi(astring: &str) -> usize {
strip_ansi(astring).width() nu_utils::strip_ansi_unlikely(astring).width()
} }
/// Alignment indicate on which side the content should stick if some filling /// Alignment indicate on which side the content should stick if some filling

View file

@ -16,6 +16,7 @@ path = "src/main.rs"
lscolors = { version = "0.12.0", features = ["crossterm"], default-features = false } lscolors = { version = "0.12.0", features = ["crossterm"], default-features = false }
num-format = { version = "0.4.3" } num-format = { version = "0.4.3" }
sys-locale = "0.2.1" sys-locale = "0.2.1"
strip-ansi-escapes = "0.1.1"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
crossterm_winapi = "0.9.0" crossterm_winapi = "0.9.0"

View file

@ -0,0 +1,91 @@
use std::borrow::Cow;
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_unlikely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_likely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_unlikely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string
.as_str()
.bytes()
.any(|x| matches!(x, 0..=9 | 11..=31))
{
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
}
// Else case includes failures to parse!
string
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_likely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
// Else case includes failures to parse!
string
}

View file

@ -1,3 +1,4 @@
mod deansi;
pub mod locale; pub mod locale;
pub mod utils; pub mod utils;
@ -6,3 +7,7 @@ pub use utils::{
enable_vt_processing, get_default_config, get_default_env, get_ls_colors, enable_vt_processing, get_default_config, get_default_env, get_ls_colors,
stderr_write_all_and_flush, stdout_write_all_and_flush, stderr_write_all_and_flush, stdout_write_all_and_flush,
}; };
pub use deansi::{
strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely,
};