perf: Vendor textwrap parts we need

The immediate benefit is binary size but this also makes us more
flexible on the implementation, like allowing wrapping of `StyledStr`.

This removed 12 KiB from `.text`

This helps towards #1365 and probably #2037
This commit is contained in:
Ed Page 2022-08-24 15:53:23 -05:00
parent 735d6fd1e3
commit 37f2efb0bd
7 changed files with 264 additions and 47 deletions

25
Cargo.lock generated
View file

@ -113,7 +113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"bitflags",
"textwrap 0.11.0",
"textwrap",
"unicode-width",
]
@ -133,8 +133,7 @@ dependencies = [
"static_assertions",
"strsim",
"termcolor",
"terminal_size 0.2.1",
"textwrap 0.15.0",
"terminal_size",
"trybuild",
"trycmd",
"unic-emoji-char",
@ -931,16 +930,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "terminal_size"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "terminal_size"
version = "0.2.1"
@ -960,16 +949,6 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "textwrap"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
dependencies = [
"terminal_size 0.1.17",
"unicode-width",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"

View file

@ -71,9 +71,9 @@ suggestions = ["dep:strsim"]
deprecated = ["clap_derive?/deprecated"] # Guided experience to prepare for next breaking release (at different stages of development, this may become default)
derive = ["clap_derive"]
cargo = [] # Disable if you're not using Cargo, enables Cargo-env-var-dependent macros
wrap_help = ["dep:terminal_size", "textwrap/terminal_size"]
wrap_help = ["dep:terminal_size"]
env = [] # Use environment variables during arg parsing
unicode = ["textwrap/unicode-width", "dep:unicode-width", "dep:unicase"] # Support for unicode characters in arguments and help messages
unicode = ["dep:unicode-width", "dep:unicase"] # Support for unicode characters in arguments and help messages
perf = [] # Optimize for runtime performance
# In-work features
@ -88,7 +88,6 @@ bench = false
clap_derive = { path = "./clap_derive", version = "=4.0.0-alpha.0", optional = true }
clap_lex = { path = "./clap_lex", version = "0.2.2" }
bitflags = "1.2"
textwrap = { version = "0.15.0", default-features = false, features = [] }
unicase = { version = "2.6", optional = true }
strsim = { version = "0.10", optional = true }
atty = { version = "0.2", optional = true }

View file

@ -10,6 +10,7 @@ use crate::builder::Str;
use crate::builder::StyledStr;
use crate::builder::{render_arg_val, Arg, Command};
use crate::output::display_width;
use crate::output::wrap;
use crate::output::Usage;
use crate::util::FlatSet;
use crate::ArgAction;
@ -355,7 +356,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
self.cmd.get_before_help()
};
if let Some(output) = before_help {
self.none(text_wrapper(
self.none(wrap(
&output.unwrap_none().replace("{n}", "\n"),
self.term_w,
));
@ -374,7 +375,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
};
if let Some(output) = after_help {
self.none("\n\n");
self.none(text_wrapper(
self.none(wrap(
&output.unwrap_none().replace("{n}", "\n"),
self.term_w,
));
@ -415,7 +416,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
// Determine how many newlines we need to insert
let avail_chars = self.term_w - spaces;
debug!("Help::help: Usable space...{}", avail_chars);
help = text_wrapper(&help.replace("{n}", "\n"), avail_chars);
help = wrap(&help.replace("{n}", "\n"), avail_chars);
} else {
debug!("No");
}
@ -498,7 +499,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
usize::MAX
};
let help = text_wrapper(help.unwrap_none(), avail_chars);
let help = wrap(help.unwrap_none(), avail_chars);
let mut help = help.lines();
self.none(help.next().unwrap_or_default());
@ -657,7 +658,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
if before_new_line {
self.none("\n");
}
self.none(text_wrapper(output.unwrap_none(), self.term_w));
self.none(wrap(output.unwrap_none(), self.term_w));
if after_new_line {
self.none("\n");
}
@ -669,7 +670,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
if before_new_line {
self.none("\n");
}
self.none(text_wrapper(author, self.term_w));
self.none(wrap(author, self.term_w));
if after_new_line {
self.none("\n");
}
@ -682,7 +683,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
.get_version()
.or_else(|| self.cmd.get_long_version());
if let Some(output) = version {
self.none(text_wrapper(output, self.term_w));
self.none(wrap(output, self.term_w));
}
}
}
@ -906,7 +907,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
fn write_display_name(&mut self) {
debug!("Help::write_display_name");
let display_name = text_wrapper(
let display_name = wrap(
&self
.cmd
.get_display_name()
@ -926,10 +927,10 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> {
// In case we're dealing with subcommands i.e. git mv is translated to git-mv
bn.replace(' ', "-")
} else {
text_wrapper(&self.cmd.get_name().replace("{n}", "\n"), self.term_w)
wrap(&self.cmd.get_name().replace("{n}", "\n"), self.term_w)
}
} else {
text_wrapper(&self.cmd.get_name().replace("{n}", "\n"), self.term_w)
wrap(&self.cmd.get_name().replace("{n}", "\n"), self.term_w)
};
self.good(&bin_name);
}
@ -1071,16 +1072,6 @@ fn should_show_subcommand(subcommand: &Command) -> bool {
!subcommand.is_hide_set()
}
fn text_wrapper(help: &str, width: usize) -> String {
let wrapper = textwrap::Options::new(width)
.break_words(false)
.word_splitter(textwrap::WordSplitter::NoHyphenation);
help.lines()
.map(|line| textwrap::fill(line, &wrapper))
.collect::<Vec<String>>()
.join("\n")
}
#[cfg(test)]
mod test {
use super::*;
@ -1088,7 +1079,7 @@ mod test {
#[test]
fn wrap_help_last_word() {
let help = String::from("foo bar baz");
assert_eq!(text_wrapper(&help, 5), "foo\nbar\nbaz");
assert_eq!(wrap(&help, 5), "foo\nbar\nbaz");
}
#[test]

View file

@ -6,4 +6,5 @@ pub(crate) mod fmt;
pub(crate) use self::help::Help;
pub(crate) use self::textwrap::core::display_width;
pub(crate) use self::textwrap::wrap;
pub(crate) use self::usage::Usage;

View file

@ -1 +1,113 @@
//! Fork of `textwrap` crate
//!
//! Benefits of forking:
//! - Pull in only what we need rather than relying on the compiler to remove what we don't need
//! - `LineWrapper` is able to incrementally wrap which will help with `StyledStr
pub(crate) mod core;
pub(crate) mod word_separators;
pub(crate) mod wrap_algorithms;
pub(crate) fn wrap(content: &str, hard_width: usize) -> String {
let mut wrapper = wrap_algorithms::LineWrapper::new(hard_width);
let mut total = Vec::new();
for line in content.split_inclusive('\n') {
wrapper.reset();
let line = word_separators::find_words_ascii_space(line).collect::<Vec<_>>();
total.extend(wrapper.wrap(line));
}
total.join("")
}
#[cfg(test)]
mod test {
/// Compatibility shim to keep textwrap's tests
fn wrap(content: &str, hard_width: usize) -> Vec<String> {
super::wrap(content, hard_width)
.trim_end()
.split('\n')
.map(|s| s.to_owned())
.collect::<Vec<_>>()
}
#[test]
fn no_wrap() {
assert_eq!(wrap("foo", 10), vec!["foo"]);
}
#[test]
fn wrap_simple() {
assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
}
#[test]
fn to_be_or_not() {
assert_eq!(
wrap("To be, or not to be, that is the question.", 10),
vec!["To be, or", "not to be,", "that is", "the", "question."]
);
}
#[test]
fn multiple_words_on_first_line() {
assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
}
#[test]
fn long_word() {
assert_eq!(wrap("foo", 0), vec!["foo"]);
}
#[test]
fn long_words() {
assert_eq!(wrap("foo bar", 0), vec!["foo", "bar"]);
}
#[test]
fn max_width() {
assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
let text = "Hello there! This is some English text. \
It should not be wrapped given the extents below.";
assert_eq!(wrap(text, usize::MAX), vec![text]);
}
#[test]
fn leading_whitespace() {
assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
}
#[test]
fn leading_whitespace_empty_first_line() {
// If there is no space for the first word, the first line
// will be empty. This is because the string is split into
// words like [" ", "foobar ", "baz"], which puts "foobar " on
// the second line. We never output trailing whitespace
assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
}
#[test]
fn trailing_whitespace() {
// Whitespace is only significant inside a line. After a line
// gets too long and is broken, the first word starts in
// column zero and is not indented.
assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]);
}
#[test]
fn issue_99() {
// We did not reset the in_whitespace flag correctly and did
// not handle single-character words after a line break.
assert_eq!(
wrap("aaabbbccc x yyyzzzwww", 9),
vec!["aaabbbccc", "x", "yyyzzzwww"]
);
}
#[test]
fn issue_129() {
// The dash is an em-dash which takes up four bytes. We used
// to panic since we tried to index into the character.
assert_eq!(wrap("x x", 1), vec!["x", "", "x"]);
}
}

View file

@ -0,0 +1,91 @@
pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator<Item = &'_ str> + '_ {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = line.char_indices();
std::iter::from_fn(move || {
for (idx, ch) in char_indices.by_ref() {
if in_whitespace && ch != ' ' {
let word = &line[start..idx];
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}
in_whitespace = ch == ' ';
}
if start < line.len() {
let word = &line[start..];
start = line.len();
return Some(word);
}
None
})
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! test_find_words {
($ascii_name:ident,
$([ $line:expr, $ascii_words:expr ]),+) => {
#[test]
fn $ascii_name() {
$(
let expected_words: Vec<&str> = $ascii_words.to_vec();
let actual_words = find_words_ascii_space($line)
.collect::<Vec<_>>();
assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
)+
}
};
}
test_find_words!(ascii_space_empty, ["", []]);
test_find_words!(ascii_single_word, ["foo", ["foo"]]);
test_find_words!(ascii_two_words, ["foo bar", ["foo ", "bar"]]);
test_find_words!(
ascii_multiple_words,
["foo bar", ["foo ", "bar"]],
["x y z", ["x ", "y ", "z"]]
);
test_find_words!(ascii_only_whitespace, [" ", [" "]], [" ", [" "]]);
test_find_words!(
ascii_inter_word_whitespace,
["foo bar", ["foo ", "bar"]]
);
test_find_words!(ascii_trailing_whitespace, ["foo ", ["foo "]]);
test_find_words!(ascii_leading_whitespace, [" foo", [" ", "foo"]]);
test_find_words!(
ascii_multi_column_char,
["\u{1f920}", ["\u{1f920}"]] // cowboy emoji 🤠
);
test_find_words!(
ascii_hyphens,
["foo-bar", ["foo-bar"]],
["foo- bar", ["foo- ", "bar"]],
["foo - bar", ["foo ", "- ", "bar"]],
["foo -bar", ["foo ", "-bar"]]
);
test_find_words!(ascii_newline, ["foo\nbar", ["foo\nbar"]]);
test_find_words!(ascii_tab, ["foo\tbar", ["foo\tbar"]]);
test_find_words!(
ascii_non_breaking_space,
["foo\u{00A0}bar", ["foo\u{00A0}bar"]]
);
}

View file

@ -0,0 +1,44 @@
use super::core::display_width;
#[derive(Debug)]
pub(crate) struct LineWrapper {
line_width: usize,
hard_width: usize,
}
impl LineWrapper {
pub(crate) fn new(hard_width: usize) -> Self {
Self {
line_width: 0,
hard_width,
}
}
pub(crate) fn reset(&mut self) {
self.line_width = 0;
}
pub(crate) fn wrap<'w>(&mut self, mut words: Vec<&'w str>) -> Vec<&'w str> {
let mut i = 0;
while i < words.len() {
let word = &words[i];
let trimmed = word.trim_end();
let word_width = display_width(trimmed);
let trimmed_delta = word.len() - trimmed.len();
if i != 0 && self.hard_width < self.line_width + word_width {
if 0 < i {
let last = i - 1;
let trimmed = words[last].trim_end();
words[last] = trimmed;
}
words.insert(i, "\n");
i += 1;
self.reset();
}
self.line_width += word_width + trimmed_delta;
i += 1;
}
words
}
}