Refactor ansi stripping into nu-utils functions (#6966)

Allows use of slightly optimized variants that check if they have to use
the heavier vte parser. Tries to avoid unnnecessary allocations. Initial
performance characteristics proven out in #4378.

Also reduces boilerplate with right-ward drift.
This commit is contained in:
Stefan Holderbach 2022-11-04 19:49:45 +01:00 committed by GitHub
parent b9195c2668
commit 2c4048eb43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 126 additions and 134 deletions

8
Cargo.lock generated
View file

@ -2535,7 +2535,6 @@ dependencies = [
"percent-encoding",
"reedline",
"rstest",
"strip-ansi-escapes",
"sysinfo",
"thiserror",
]
@ -2630,7 +2629,6 @@ dependencies = [
"sha2",
"shadow-rs",
"sqlparser",
"strip-ansi-escapes",
"sysinfo",
"terminal_size 0.2.1",
"thiserror",
@ -2657,7 +2655,6 @@ dependencies = [
"nu-path",
"nu-protocol",
"nu-utils",
"strip-ansi-escapes",
"sysinfo",
]
@ -2777,8 +2774,8 @@ dependencies = [
"json_to_table",
"nu-ansi-term",
"nu-protocol",
"nu-utils",
"serde_json",
"strip-ansi-escapes",
"tabled",
]
@ -2786,7 +2783,7 @@ dependencies = [
name = "nu-term-grid"
version = "0.70.1"
dependencies = [
"strip-ansi-escapes",
"nu-utils",
"unicode-width",
]
@ -2811,6 +2808,7 @@ dependencies = [
"crossterm_winapi",
"lscolors",
"num-format",
"strip-ansi-escapes",
"sys-locale",
]

View file

@ -32,7 +32,6 @@ lazy_static = "1.4.0"
log = "0.4"
miette = { version = "5.1.0", features = ["fancy-no-backtrace"] }
percent-encoding = "2"
strip-ansi-escapes = "0.1.1"
sysinfo = "0.26.2"
thiserror = "1.0.31"

View file

@ -24,7 +24,6 @@ use std::{
sync::atomic::Ordering,
time::Instant,
};
use strip_ansi_escapes::strip;
use sysinfo::SystemExt;
// According to Daniel Imms @Tyriar, we need to do these this way:
@ -140,15 +139,7 @@ pub fn evaluate_repl(
if use_ansi {
println!("{}", banner);
} else {
let stripped_string = {
if let Ok(bytes) = strip(&banner) {
String::from_utf8_lossy(&bytes).to_string()
} else {
banner
}
};
println!("{}", stripped_string);
println!("{}", nu_utils::strip_ansi_string_likely(banner));
}
}

View file

@ -77,7 +77,6 @@ serde_yaml = "0.9.4"
sha2 = "0.10.0"
# Disable default features b/c the default features build Git (very slow to compile)
shadow-rs = { version = "0.16.1", default-features = false }
strip-ansi-escapes = "0.1.1"
sysinfo = "0.26.2"
terminal_size = "0.2.1"
thiserror = "1.0.31"

View file

@ -363,15 +363,12 @@ pub fn highlight_search_string(
}
};
// strip haystack to remove existing ansi style
let stripped_haystack: String = match strip_ansi_escapes::strip(haystack) {
Ok(i) => String::from_utf8(i).unwrap_or_else(|_| String::from(haystack)),
Err(_) => String::from(haystack),
};
let stripped_haystack = nu_utils::strip_ansi_likely(haystack);
let mut last_match_end = 0;
let style = Style::new().fg(White).on(Red);
let mut highlighted = String::new();
for cap in regex.captures_iter(stripped_haystack.as_str()) {
for cap in regex.captures_iter(stripped_haystack.as_ref()) {
match cap {
Ok(capture) => {
let start = match capture.get(0) {

View file

@ -248,14 +248,8 @@ fn nu_value_to_string(value: Value, separator: &str, config: &Config) -> String
}
Value::String { val, .. } => {
// don't store ansi escape sequences in the database
let stripped = {
match strip_ansi_escapes::strip(&val) {
Ok(item) => String::from_utf8(item).unwrap_or(val),
Err(_) => val,
}
};
// escape single quotes
stripped.replace('\'', "''")
nu_utils::strip_ansi_unlikely(&val).replace('\'', "''")
}
Value::List { vals: val, .. } => val
.iter()

View file

@ -67,10 +67,7 @@ impl Command for Cd {
let path_val = {
if let Some(path) = path_val {
Some(Spanned {
item: match strip_ansi_escapes::strip(&path.item) {
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
item: nu_utils::strip_ansi_string_unlikely(path.item),
span: path.span,
})
} else {

View file

@ -73,10 +73,7 @@ impl Command for Cp {
let src: Spanned<String> = call.req(engine_state, stack, 0)?;
let src = {
Spanned {
item: match strip_ansi_escapes::strip(&src.item) {
Ok(item) => String::from_utf8(item).unwrap_or(src.item),
Err(_) => src.item,
},
item: nu_utils::strip_ansi_string_unlikely(src.item),
span: src.span,
}
};

View file

@ -86,10 +86,7 @@ impl Command for Ls {
let pattern_arg = {
if let Some(path) = pattern_arg {
Some(Spanned {
item: match strip_ansi_escapes::strip(&path.item) {
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
item: nu_utils::strip_ansi_string_unlikely(path.item),
span: path.span,
})
} else {

View file

@ -66,10 +66,7 @@ impl Command for Mv {
let spanned_source: Spanned<String> = call.req(engine_state, stack, 0)?;
let spanned_source = {
Spanned {
item: match strip_ansi_escapes::strip(&spanned_source.item) {
Ok(item) => String::from_utf8(item).unwrap_or(spanned_source.item),
Err(_) => spanned_source.item,
},
item: nu_utils::strip_ansi_string_unlikely(spanned_source.item),
span: spanned_source.span,
}
};

View file

@ -53,10 +53,7 @@ impl Command for Open {
let path = {
if let Some(path_val) = path {
Some(Spanned {
item: match strip_ansi_escapes::strip(&path_val.item) {
Ok(item) => String::from_utf8(item).unwrap_or(path_val.item),
Err(_) => path_val.item,
},
item: nu_utils::strip_ansi_string_unlikely(path_val.item),
span: path_val.span,
})
} else {

View file

@ -143,10 +143,7 @@ fn rm(
for (idx, path) in targets.clone().into_iter().enumerate() {
let corrected_path = Spanned {
item: match strip_ansi_escapes::strip(&path.item) {
Ok(item) => String::from_utf8(item).unwrap_or(path.item),
Err(_) => path.item,
},
item: nu_utils::strip_ansi_string_unlikely(path.item),
span: path.span,
};
let _ = std::mem::replace(&mut targets[idx], corrected_path);

View file

@ -3,7 +3,6 @@ use nu_protocol::{
ast::Call, ast::CellPath, engine::Command, engine::EngineState, engine::Stack, Category,
Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use strip_ansi_escapes::strip;
#[derive(Clone)]
pub struct SubCommand;
@ -79,15 +78,7 @@ fn operate(
fn action(input: &Value, command_span: &Span) -> Value {
match input {
Value::String { val, span } => {
let stripped_string = {
if let Ok(bytes) = strip(&val) {
String::from_utf8_lossy(&bytes).to_string()
} else {
val.to_string()
}
};
Value::string(stripped_string, *span)
Value::string(nu_utils::strip_ansi_likely(val).to_string(), *span)
}
other => {
let got = format!("value is {}, not string", other.get_type());

View file

@ -1,5 +1,3 @@
use std::borrow::Cow;
// use super::icons::{icon_for_file, iconify_style_ansi_to_nu};
use super::icons::icon_for_file;
use lscolors::Style;
@ -178,26 +176,6 @@ prints out the list properly."#
}
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
fn strip_ansi(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
fn create_grid_output(
items: Vec<(usize, String, String)>,
call: &Call,
@ -232,7 +210,7 @@ fn create_grid_output(
if header == "name" {
if color_param {
if use_grid_icons {
let no_ansi = strip_ansi(&value);
let no_ansi = nu_utils::strip_ansi_unlikely(&value);
let path = std::path::Path::new(no_ansi.as_ref());
let icon = icon_for_file(path, call.head)?;
let ls_colors_style = ls_colors.style_for_path(path);

View file

@ -1394,17 +1394,14 @@ fn render_path_name(
return None;
}
let stripped_path = match strip_ansi_escapes::strip(path) {
Ok(v) => String::from_utf8(v).unwrap_or_else(|_| path.to_owned()),
Err(_) => path.to_owned(),
};
let stripped_path = nu_utils::strip_ansi_unlikely(path);
let (style, has_metadata) = match std::fs::symlink_metadata(&stripped_path) {
let (style, has_metadata) = match std::fs::symlink_metadata(stripped_path.as_ref()) {
Ok(metadata) => (
ls_colors.style_for_path_with_metadata(&stripped_path, Some(&metadata)),
ls_colors.style_for_path_with_metadata(stripped_path.as_ref(), Some(&metadata)),
true,
),
Err(_) => (ls_colors.style_for_path(&stripped_path), false),
Err(_) => (ls_colors.style_for_path(stripped_path.as_ref()), false),
};
// clickable links don't work in remote SSH sessions
@ -1416,9 +1413,9 @@ fn render_path_name(
// .map(ToNuAnsiStyle::to_nu_ansi_style)
.unwrap_or_default();
let full_path = PathBuf::from(&stripped_path)
let full_path = PathBuf::from(stripped_path.as_ref())
.canonicalize()
.unwrap_or_else(|_| PathBuf::from(&stripped_path));
.unwrap_or_else(|_| PathBuf::from(stripped_path.as_ref()));
let full_path_link = make_clickable_link(
full_path.display().to_string(),

View file

@ -15,7 +15,6 @@ nu-utils = { path = "../nu-utils", version = "0.70.1" }
chrono = { version="0.4.21", features=["serde"] }
sysinfo = "0.26.2"
strip-ansi-escapes = "0.1.1"
[features]
plugin = []

View file

@ -170,17 +170,11 @@ fn get_documentation(
long_desc.push('\n');
let stripped_string = if config.no_color {
if let Ok(bytes) = strip_ansi_escapes::strip(&long_desc) {
String::from_utf8_lossy(&bytes).to_string()
if config.no_color {
nu_utils::strip_ansi_string_likely(long_desc)
} else {
long_desc
}
} else {
long_desc
};
stripped_string
}
// document shape helps showing more useful information

View file

@ -15,7 +15,7 @@ path = "src/main.rs"
[dependencies]
nu-ansi-term = "0.46.0"
nu-protocol = { path = "../nu-protocol", version = "0.70.1" }
strip-ansi-escapes = "0.1.1"
nu-utils = { path = "../nu-utils", version = "0.70.1" }
atty = "0.2.14"
tabled = { version = "0.10.0", features = ["color"], default-features = false }
json_to_table = { version = "0.2.0", features = ["color"] }

View file

@ -181,10 +181,7 @@ fn print_table(table: tabled::Table<Data>, config: &Config) -> String {
// the atty is for when people do ls from vim, there should be no coloring there
if !config.use_ansi_coloring || !atty::is(atty::Stream::Stdout) {
// Draw the table without ansi colors
match strip_ansi_escapes::strip(&output) {
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
Err(_) => output, // we did our best; so return at least something
}
nu_utils::strip_ansi_string_likely(output)
} else {
// Draw the table with ansi colors
output

View file

@ -14,4 +14,5 @@ path = "src/main.rs"
[dependencies]
unicode-width = "0.1.9"
strip-ansi-escapes = "0.1.1"
nu-utils = { path = "../nu-utils", version = "0.70.1" }

View file

@ -91,35 +91,13 @@
//! [`fit_into_width`]: ./struct.Grid.html#method.fit_into_width
//! [`GridOptions`]: ./struct.GridOptions.html
use std::borrow::Cow;
use std::cmp::max;
use std::fmt;
use std::iter::repeat;
use strip_ansi_escapes;
use unicode_width::UnicodeWidthStr;
/// Removes ANSI escape codes and some ASCII control characters
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
fn strip_ansi(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
fn unicode_width_strip_ansi(astring: &str) -> usize {
strip_ansi(astring).width()
nu_utils::strip_ansi_unlikely(astring).width()
}
/// Alignment indicate on which side the content should stick if some filling

View file

@ -16,6 +16,7 @@ path = "src/main.rs"
lscolors = { version = "0.12.0", features = ["crossterm"], default-features = false }
num-format = { version = "0.4.3" }
sys-locale = "0.2.1"
strip-ansi-escapes = "0.1.1"
[target.'cfg(windows)'.dependencies]
crossterm_winapi = "0.9.0"

View file

@ -0,0 +1,91 @@
use std::borrow::Cow;
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_unlikely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_likely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_unlikely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string
.as_str()
.bytes()
.any(|x| matches!(x, 0..=9 | 11..=31))
{
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
}
// Else case includes failures to parse!
string
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_likely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
// Else case includes failures to parse!
string
}

View file

@ -1,3 +1,4 @@
mod deansi;
pub mod locale;
pub mod utils;
@ -6,3 +7,7 @@ pub use utils::{
enable_vt_processing, get_default_config, get_default_env, get_ls_colors,
stderr_write_all_and_flush, stdout_write_all_and_flush,
};
pub use deansi::{
strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely,
};