perf(reflow): eliminate most WordWrapper allocations (#1239)

On large paragraphs (~1MB), this saves hundreds of thousands of
allocations.

TL;DR: reuse as much memory as possible across `next_line` calls.
Instead of allocating new buffers each time, allocate the buffers once
and clear them before reuse.

Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>
This commit is contained in:
Alex Saveau 2024-08-06 20:49:05 -07:00 committed by GitHub
parent 36fa3c11c1
commit 4753b7241b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,4 +1,4 @@
use std::{collections::VecDeque, vec::IntoIter}; use std::{collections::VecDeque, mem};
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
@ -34,11 +34,16 @@ where
/// The given, unprocessed lines /// The given, unprocessed lines
input_lines: O, input_lines: O,
max_line_width: u16, max_line_width: u16,
wrapped_lines: Option<IntoIter<Vec<StyledGrapheme<'a>>>>, wrapped_lines: VecDeque<Vec<StyledGrapheme<'a>>>,
current_alignment: Alignment, current_alignment: Alignment,
current_line: Vec<StyledGrapheme<'a>>, current_line: Vec<StyledGrapheme<'a>>,
/// Removes the leading whitespace from lines /// Removes the leading whitespace from lines
trim: bool, trim: bool,
// These are cached allocations that hold no state across next_line invocations
pending_word: Vec<StyledGrapheme<'a>>,
pending_whitespace: VecDeque<StyledGrapheme<'a>>,
pending_line_pool: Vec<Vec<StyledGrapheme<'a>>>,
} }
impl<'a, O, I> WordWrapper<'a, O, I> impl<'a, O, I> WordWrapper<'a, O, I>
@ -50,29 +55,30 @@ where
Self { Self {
input_lines: lines, input_lines: lines,
max_line_width, max_line_width,
wrapped_lines: None, wrapped_lines: VecDeque::new(),
current_alignment: Alignment::Left, current_alignment: Alignment::Left,
current_line: vec![], current_line: vec![],
trim, trim,
}
}
fn next_cached_line(&mut self) -> Option<Vec<StyledGrapheme<'a>>> { pending_word: Vec::new(),
self.wrapped_lines.as_mut()?.next() pending_line_pool: Vec::new(),
pending_whitespace: VecDeque::new(),
}
} }
/// Split an input line (`line_symbols`) into wrapped lines /// Split an input line (`line_symbols`) into wrapped lines
/// and cache them to be emitted later /// and cache them to be emitted later
fn process_input(&mut self, line_symbols: impl IntoIterator<Item = StyledGrapheme<'a>>) { fn process_input(&mut self, line_symbols: impl IntoIterator<Item = StyledGrapheme<'a>>) {
let mut result_lines = vec![]; let mut pending_line = self.pending_line_pool.pop().unwrap_or_default();
let mut pending_line = vec![];
let mut line_width = 0; let mut line_width = 0;
let mut pending_word = vec![];
let mut word_width = 0; let mut word_width = 0;
let mut pending_whitespace: VecDeque<StyledGrapheme> = VecDeque::new();
let mut whitespace_width = 0; let mut whitespace_width = 0;
let mut non_whitespace_previous = false; let mut non_whitespace_previous = false;
self.pending_word.clear();
self.pending_whitespace.clear();
pending_line.clear();
for grapheme in line_symbols { for grapheme in line_symbols {
let is_whitespace = grapheme.is_whitespace(); let is_whitespace = grapheme.is_whitespace();
let symbol_width = grapheme.symbol.width() as u16; let symbol_width = grapheme.symbol.width() as u16;
@ -99,14 +105,14 @@ where
// append finished segment to current line // append finished segment to current line
if word_found || trimmed_overflow || whitespace_overflow || untrimmed_overflow { if word_found || trimmed_overflow || whitespace_overflow || untrimmed_overflow {
if !pending_line.is_empty() || !self.trim { if !pending_line.is_empty() || !self.trim {
pending_line.extend(pending_whitespace.drain(..)); pending_line.extend(self.pending_whitespace.drain(..));
line_width += whitespace_width; line_width += whitespace_width;
} }
pending_line.append(&mut pending_word); pending_line.append(&mut self.pending_word);
line_width += word_width; line_width += word_width;
pending_whitespace.clear(); self.pending_whitespace.clear();
whitespace_width = 0; whitespace_width = 0;
word_width = 0; word_width = 0;
} }
@ -121,11 +127,11 @@ where
if line_full || pending_word_overflow { if line_full || pending_word_overflow {
let mut remaining_width = u16::saturating_sub(self.max_line_width, line_width); let mut remaining_width = u16::saturating_sub(self.max_line_width, line_width);
result_lines.push(std::mem::take(&mut pending_line)); self.wrapped_lines.push_back(mem::take(&mut pending_line));
line_width = 0; line_width = 0;
// remove whitespace up to the end of line // remove whitespace up to the end of line
while let Some(grapheme) = pending_whitespace.front() { while let Some(grapheme) = self.pending_whitespace.front() {
let width = grapheme.symbol.width() as u16; let width = grapheme.symbol.width() as u16;
if width > remaining_width { if width > remaining_width {
@ -134,11 +140,11 @@ where
whitespace_width -= width; whitespace_width -= width;
remaining_width -= width; remaining_width -= width;
pending_whitespace.pop_front(); self.pending_whitespace.pop_front();
} }
// don't count first whitespace toward next word // don't count first whitespace toward next word
if is_whitespace && pending_whitespace.is_empty() { if is_whitespace && self.pending_whitespace.is_empty() {
continue; continue;
} }
} }
@ -146,33 +152,43 @@ where
// append symbol to a pending buffer // append symbol to a pending buffer
if is_whitespace { if is_whitespace {
whitespace_width += symbol_width; whitespace_width += symbol_width;
pending_whitespace.push_back(grapheme); self.pending_whitespace.push_back(grapheme);
} else { } else {
word_width += symbol_width; word_width += symbol_width;
pending_word.push(grapheme); self.pending_word.push(grapheme);
} }
non_whitespace_previous = !is_whitespace; non_whitespace_previous = !is_whitespace;
} }
// append remaining text parts // append remaining text parts
if pending_line.is_empty() && pending_word.is_empty() && !pending_whitespace.is_empty() { if pending_line.is_empty()
result_lines.push(vec![]); && self.pending_word.is_empty()
&& !self.pending_whitespace.is_empty()
{
self.wrapped_lines.push_back(vec![]);
} }
if !pending_line.is_empty() || !self.trim { if !pending_line.is_empty() || !self.trim {
pending_line.extend(pending_whitespace); pending_line.extend(self.pending_whitespace.drain(..));
} }
pending_line.extend(pending_word); pending_line.append(&mut self.pending_word);
#[allow(clippy::else_if_without_else)]
if !pending_line.is_empty() { if !pending_line.is_empty() {
result_lines.push(pending_line); self.wrapped_lines.push_back(pending_line);
} else if pending_line.capacity() > 0 {
self.pending_line_pool.push(pending_line);
} }
if result_lines.is_empty() { if self.wrapped_lines.is_empty() {
result_lines.push(vec![]); self.wrapped_lines.push_back(vec![]);
} }
}
// save processed lines for emitting later fn replace_current_line(&mut self, line: Vec<StyledGrapheme<'a>>) {
self.wrapped_lines = Some(result_lines.into_iter()); let cache = mem::replace(&mut self.current_line, line);
if cache.capacity() > 0 {
self.pending_line_pool.push(cache);
}
} }
} }
@ -189,13 +205,13 @@ where
loop { loop {
// emit next cached line if present // emit next cached line if present
if let Some(line) = self.next_cached_line() { if let Some(line) = self.wrapped_lines.pop_front() {
let line_width = line let line_width = line
.iter() .iter()
.map(|grapheme| grapheme.symbol.width() as u16) .map(|grapheme| grapheme.symbol.width() as u16)
.sum(); .sum();
self.current_line = line; self.replace_current_line(line);
return Some(WrappedLine { return Some(WrappedLine {
line: &self.current_line, line: &self.current_line,
width: line_width, width: line_width,