mirror of
https://github.com/nushell/nushell
synced 2024-12-31 23:39:00 +00:00
9c7feb2b19
For the width calculations for table layout the `strip_ansi` function has to be called frequently. By checking for the ASCII control chars (0x00 to 0x1f) except `\n` that are stripped by `strip_ansi_escapes` the number of necessary allocations can be reduced significantly for the simple case of text not containing ANSI escapes. **Benchmark:** ``` nu -c "for i in 0..1000 { ls } | flatten | table" ``` **Allocation reduction** Running on the nushell repo root as the directory, this change reduces the allocation volume by approximately 400 MB (Measured run via KDE heaptrack) **Speed improvement to output** Measured via `/usr/bin/time -v` *before* ``` Command being timed: "./eager_nu -c for i in 0..1000 {ls} | flatten | table" User time (seconds): 0.87 System time (seconds): 0.14 Percent of CPU this job got: 87% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.16 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 18888 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 4809 Voluntary context switches: 38 Involuntary context switches: 14 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0 ``` *after* ``` Command being timed: "./lazy_nu -c for i in 0..1000 {ls} | flatten | table" User time (seconds): 0.63 System time (seconds): 0.14 Percent of CPU this job got: 80% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.97 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 18660 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 5149 Voluntary context switches: 24 Involuntary context switches: 5 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0 ```
320 lines
9.6 KiB
Rust
320 lines
9.6 KiB
Rust
use crate::table::TextStyle;
|
|
use ansi_cut::AnsiCut;
|
|
use nu_ansi_term::Style;
|
|
use std::borrow::Cow;
|
|
use std::collections::HashMap;
|
|
use std::{fmt::Display, iter::Iterator};
|
|
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub enum Alignment {
|
|
Left,
|
|
Center,
|
|
Right,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Subline {
|
|
pub subline: String,
|
|
pub width: usize,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Line {
|
|
pub sublines: Vec<Subline>,
|
|
pub width: usize,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct WrappedLine {
|
|
pub line: String,
|
|
pub width: usize,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct WrappedCell {
|
|
pub lines: Vec<WrappedLine>,
|
|
pub max_width: usize,
|
|
|
|
pub style: TextStyle,
|
|
}
|
|
|
|
impl Display for Line {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
let mut first = true;
|
|
for subline in &self.sublines {
|
|
if !first {
|
|
write!(f, " ")?;
|
|
} else {
|
|
first = false;
|
|
}
|
|
write!(f, "{}", subline.subline)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// Removes ANSI escape codes and some ASCII control characters
|
|
///
|
|
/// Keeps `\n` removes `\r`, `\t` etc.
|
|
///
|
|
/// If parsing fails silently returns the input string
|
|
fn strip_ansi(string: &str) -> Cow<str> {
|
|
// Check if any ascii control character except LF(0x0A = 10) is present,
|
|
// which will be stripped. Includes the primary start of ANSI sequences ESC
|
|
// (0x1B = decimal 27)
|
|
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
|
|
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
|
|
if let Ok(new_string) = String::from_utf8(stripped) {
|
|
return Cow::Owned(new_string);
|
|
}
|
|
}
|
|
}
|
|
// Else case includes failures to parse!
|
|
Cow::Borrowed(string)
|
|
}
|
|
|
|
// fn special_width(astring: &str) -> usize {
|
|
// // remove the zwj's '\u{200d}'
|
|
// // remove the fe0f's
|
|
// let stripped_string: String = {
|
|
// if let Ok(bytes) = strip_ansi_escapes::strip(astring) {
|
|
// String::from_utf8_lossy(&bytes).to_string()
|
|
// } else {
|
|
// astring.to_string()
|
|
// }
|
|
// };
|
|
|
|
// let no_zwj = stripped_string.replace('\u{200d}', "");
|
|
// let no_fe0f = no_zwj.replace('\u{fe0f}', "");
|
|
// UnicodeWidthStr::width(&no_fe0f[..])
|
|
// }
|
|
|
|
pub fn split_sublines(input: &str) -> Vec<Vec<Subline>> {
|
|
input
|
|
.split_terminator('\n')
|
|
.map(|line| {
|
|
line.split_terminator(' ')
|
|
.map(|x| Subline {
|
|
subline: x.to_string(),
|
|
width: {
|
|
// We've tried UnicodeWidthStr::width(x), UnicodeSegmentation::graphemes(x, true).count()
|
|
// and x.chars().count() with all types of combinations. Currently, it appears that
|
|
// getting the max of char count and Unicode width seems to produce the best layout.
|
|
// However, it's not perfect.
|
|
// let c = x.chars().count();
|
|
// let u = UnicodeWidthStr::width(x);
|
|
// std::cmp::min(c, u)
|
|
|
|
// let c = strip_ansi(x).chars().count();
|
|
// let u = special_width(x);
|
|
// std::cmp::max(c, u)
|
|
let stripped = strip_ansi(x);
|
|
|
|
let c = stripped.chars().count();
|
|
let u = stripped.width();
|
|
std::cmp::max(c, u)
|
|
},
|
|
})
|
|
.collect::<Vec<_>>()
|
|
})
|
|
.collect::<Vec<_>>()
|
|
}
|
|
|
|
pub fn column_width(input: &[Vec<Subline>]) -> usize {
|
|
let mut max = 0;
|
|
|
|
for line in input {
|
|
let mut total = 0;
|
|
|
|
let mut first = true;
|
|
for inp in line {
|
|
if !first {
|
|
// Account for the space
|
|
total += 1;
|
|
} else {
|
|
first = false;
|
|
}
|
|
|
|
total += inp.width;
|
|
}
|
|
|
|
if total > max {
|
|
max = total;
|
|
}
|
|
}
|
|
|
|
max
|
|
}
|
|
|
|
fn split_word(cell_width: usize, word: &str) -> Vec<Subline> {
|
|
let mut output = vec![];
|
|
let mut current_width = 0;
|
|
let mut start_index = 0;
|
|
let mut end_index;
|
|
|
|
let word_no_ansi = strip_ansi(word);
|
|
for c in word_no_ansi.char_indices() {
|
|
if let Some(width) = c.1.width() {
|
|
end_index = c.0;
|
|
if current_width + width > cell_width {
|
|
output.push(Subline {
|
|
subline: word.cut(start_index..end_index),
|
|
width: current_width,
|
|
});
|
|
|
|
start_index = c.0;
|
|
current_width = width;
|
|
} else {
|
|
current_width += width;
|
|
}
|
|
}
|
|
}
|
|
|
|
if start_index != word_no_ansi.len() {
|
|
output.push(Subline {
|
|
subline: word.cut(start_index..),
|
|
width: current_width,
|
|
});
|
|
}
|
|
|
|
output
|
|
}
|
|
|
|
pub fn wrap(
|
|
cell_width: usize,
|
|
mut input: impl Iterator<Item = Subline>,
|
|
color_hm: &HashMap<String, Style>,
|
|
re_leading: ®ex::Regex,
|
|
re_trailing: ®ex::Regex,
|
|
) -> (Vec<WrappedLine>, usize) {
|
|
let mut lines = vec![];
|
|
let mut current_line: Vec<Subline> = vec![];
|
|
let mut current_width = 0;
|
|
let mut first = true;
|
|
let mut max_width = 0;
|
|
let lead_trail_space_bg_color = color_hm
|
|
.get("leading_trailing_space_bg")
|
|
.unwrap_or(&Style::default())
|
|
.to_owned();
|
|
|
|
loop {
|
|
match input.next() {
|
|
Some(item) => {
|
|
if !first {
|
|
current_width += 1;
|
|
} else {
|
|
first = false;
|
|
}
|
|
|
|
if item.width + current_width > cell_width {
|
|
// If this is a really long single word, we need to split the word
|
|
if current_line.len() == 1 && current_width > cell_width {
|
|
max_width = cell_width;
|
|
let sublines = split_word(cell_width, ¤t_line[0].subline);
|
|
for subline in sublines {
|
|
let width = subline.width;
|
|
lines.push(Line {
|
|
sublines: vec![subline],
|
|
width,
|
|
});
|
|
}
|
|
|
|
first = true;
|
|
|
|
current_width = item.width;
|
|
current_line = vec![item];
|
|
} else {
|
|
if !current_line.is_empty() {
|
|
lines.push(Line {
|
|
sublines: current_line,
|
|
width: current_width,
|
|
});
|
|
}
|
|
|
|
first = true;
|
|
|
|
current_width = item.width;
|
|
current_line = vec![item];
|
|
max_width = std::cmp::max(max_width, current_width);
|
|
}
|
|
} else {
|
|
current_width += item.width;
|
|
current_line.push(item);
|
|
}
|
|
}
|
|
None => {
|
|
if current_width > cell_width {
|
|
// We need to break up the last word
|
|
let sublines = split_word(cell_width, ¤t_line[0].subline);
|
|
for subline in sublines {
|
|
let width = subline.width;
|
|
lines.push(Line {
|
|
sublines: vec![subline],
|
|
width,
|
|
});
|
|
}
|
|
} else if current_width > 0 {
|
|
lines.push(Line {
|
|
sublines: current_line,
|
|
width: current_width,
|
|
});
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut current_max = 0;
|
|
let mut output = vec![];
|
|
|
|
for line in lines {
|
|
let mut current_line_width = 0;
|
|
let mut first = true;
|
|
let mut current_line = String::new();
|
|
|
|
for subline in line.sublines {
|
|
if !first {
|
|
current_line_width += 1 + subline.width;
|
|
current_line.push(' ');
|
|
} else {
|
|
first = false;
|
|
current_line_width = subline.width;
|
|
}
|
|
current_line.push_str(&subline.subline);
|
|
}
|
|
|
|
if current_line_width > current_max {
|
|
current_max = current_line_width;
|
|
}
|
|
|
|
// highlight leading and trailing spaces so they stand out.
|
|
let mut bg_color_string = Style::default().prefix().to_string();
|
|
// right now config settings can only set foreground colors so, in this
|
|
// instance we take the foreground color and make it a background color
|
|
if let Some(bg) = lead_trail_space_bg_color.foreground {
|
|
bg_color_string = Style::default().on(bg).prefix().to_string()
|
|
};
|
|
|
|
if let Some(leading_match) = re_leading.find(¤t_line.clone()) {
|
|
String::insert_str(
|
|
&mut current_line,
|
|
leading_match.end(),
|
|
nu_ansi_term::ansi::RESET,
|
|
);
|
|
String::insert_str(&mut current_line, leading_match.start(), &bg_color_string);
|
|
}
|
|
|
|
if let Some(trailing_match) = re_trailing.find(¤t_line.clone()) {
|
|
String::insert_str(&mut current_line, trailing_match.start(), &bg_color_string);
|
|
current_line += nu_ansi_term::ansi::RESET;
|
|
}
|
|
|
|
output.push(WrappedLine {
|
|
line: current_line,
|
|
width: current_line_width,
|
|
});
|
|
}
|
|
|
|
(output, current_max)
|
|
}
|