perf(reflow): eliminate most WordWrapper allocations (#1239)

On large paragraphs (~1MB), this saves hundreds of thousands of
allocations.

TL;DR: reuse as much memory as possible across `next_line` calls.
Instead of allocating new buffers each time, allocate the buffers once
and clear them before reuse.

Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>
This commit is contained in:
Alex Saveau 2024-08-06 20:49:05 -07:00 committed by GitHub
parent 36fa3c11c1
commit 4753b7241b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,4 +1,4 @@
use std::{collections::VecDeque, vec::IntoIter};
use std::{collections::VecDeque, mem};
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
@ -34,11 +34,16 @@ where
/// The given, unprocessed lines
input_lines: O,
max_line_width: u16,
wrapped_lines: Option<IntoIter<Vec<StyledGrapheme<'a>>>>,
wrapped_lines: VecDeque<Vec<StyledGrapheme<'a>>>,
current_alignment: Alignment,
current_line: Vec<StyledGrapheme<'a>>,
/// Removes the leading whitespace from lines
trim: bool,
// These are cached allocations that hold no state across next_line invocations
pending_word: Vec<StyledGrapheme<'a>>,
pending_whitespace: VecDeque<StyledGrapheme<'a>>,
pending_line_pool: Vec<Vec<StyledGrapheme<'a>>>,
}
impl<'a, O, I> WordWrapper<'a, O, I>
@ -50,29 +55,30 @@ where
Self {
input_lines: lines,
max_line_width,
wrapped_lines: None,
wrapped_lines: VecDeque::new(),
current_alignment: Alignment::Left,
current_line: vec![],
trim,
}
}
fn next_cached_line(&mut self) -> Option<Vec<StyledGrapheme<'a>>> {
self.wrapped_lines.as_mut()?.next()
pending_word: Vec::new(),
pending_line_pool: Vec::new(),
pending_whitespace: VecDeque::new(),
}
}
/// Split an input line (`line_symbols`) into wrapped lines
/// and cache them to be emitted later
fn process_input(&mut self, line_symbols: impl IntoIterator<Item = StyledGrapheme<'a>>) {
let mut result_lines = vec![];
let mut pending_line = vec![];
let mut pending_line = self.pending_line_pool.pop().unwrap_or_default();
let mut line_width = 0;
let mut pending_word = vec![];
let mut word_width = 0;
let mut pending_whitespace: VecDeque<StyledGrapheme> = VecDeque::new();
let mut whitespace_width = 0;
let mut non_whitespace_previous = false;
self.pending_word.clear();
self.pending_whitespace.clear();
pending_line.clear();
for grapheme in line_symbols {
let is_whitespace = grapheme.is_whitespace();
let symbol_width = grapheme.symbol.width() as u16;
@ -99,14 +105,14 @@ where
// append finished segment to current line
if word_found || trimmed_overflow || whitespace_overflow || untrimmed_overflow {
if !pending_line.is_empty() || !self.trim {
pending_line.extend(pending_whitespace.drain(..));
pending_line.extend(self.pending_whitespace.drain(..));
line_width += whitespace_width;
}
pending_line.append(&mut pending_word);
pending_line.append(&mut self.pending_word);
line_width += word_width;
pending_whitespace.clear();
self.pending_whitespace.clear();
whitespace_width = 0;
word_width = 0;
}
@ -121,11 +127,11 @@ where
if line_full || pending_word_overflow {
let mut remaining_width = u16::saturating_sub(self.max_line_width, line_width);
result_lines.push(std::mem::take(&mut pending_line));
self.wrapped_lines.push_back(mem::take(&mut pending_line));
line_width = 0;
// remove whitespace up to the end of line
while let Some(grapheme) = pending_whitespace.front() {
while let Some(grapheme) = self.pending_whitespace.front() {
let width = grapheme.symbol.width() as u16;
if width > remaining_width {
@ -134,11 +140,11 @@ where
whitespace_width -= width;
remaining_width -= width;
pending_whitespace.pop_front();
self.pending_whitespace.pop_front();
}
// don't count first whitespace toward next word
if is_whitespace && pending_whitespace.is_empty() {
if is_whitespace && self.pending_whitespace.is_empty() {
continue;
}
}
@ -146,33 +152,43 @@ where
// append symbol to a pending buffer
if is_whitespace {
whitespace_width += symbol_width;
pending_whitespace.push_back(grapheme);
self.pending_whitespace.push_back(grapheme);
} else {
word_width += symbol_width;
pending_word.push(grapheme);
self.pending_word.push(grapheme);
}
non_whitespace_previous = !is_whitespace;
}
// append remaining text parts
if pending_line.is_empty() && pending_word.is_empty() && !pending_whitespace.is_empty() {
result_lines.push(vec![]);
if pending_line.is_empty()
&& self.pending_word.is_empty()
&& !self.pending_whitespace.is_empty()
{
self.wrapped_lines.push_back(vec![]);
}
if !pending_line.is_empty() || !self.trim {
pending_line.extend(pending_whitespace);
pending_line.extend(self.pending_whitespace.drain(..));
}
pending_line.extend(pending_word);
pending_line.append(&mut self.pending_word);
#[allow(clippy::else_if_without_else)]
if !pending_line.is_empty() {
result_lines.push(pending_line);
self.wrapped_lines.push_back(pending_line);
} else if pending_line.capacity() > 0 {
self.pending_line_pool.push(pending_line);
}
if result_lines.is_empty() {
result_lines.push(vec![]);
if self.wrapped_lines.is_empty() {
self.wrapped_lines.push_back(vec![]);
}
}
// save processed lines for emitting later
self.wrapped_lines = Some(result_lines.into_iter());
fn replace_current_line(&mut self, line: Vec<StyledGrapheme<'a>>) {
let cache = mem::replace(&mut self.current_line, line);
if cache.capacity() > 0 {
self.pending_line_pool.push(cache);
}
}
}
@ -189,13 +205,13 @@ where
loop {
// emit next cached line if present
if let Some(line) = self.next_cached_line() {
if let Some(line) = self.wrapped_lines.pop_front() {
let line_width = line
.iter()
.map(|grapheme| grapheme.symbol.width() as u16)
.sum();
self.current_line = line;
self.replace_current_line(line);
return Some(WrappedLine {
line: &self.current_line,
width: line_width,