mirror of
https://github.com/ClementTsang/bottom
synced 2024-11-25 05:30:22 +00:00
refactor: add fast branch for ascii-only string truncate (#1330)
This is just a first attempt to speed up what looked like a hot spot in samply's profiling results. Benchmark code and results here: https://gist.github.com/ClementTsang/e242f12f7e1d1902ed414dcc18c3b321
This commit is contained in:
parent
3a50d7622e
commit
a93521d2b1
2 changed files with 78 additions and 36 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -42,3 +42,6 @@ dhat/
|
||||||
|
|
||||||
# cargo vet
|
# cargo vet
|
||||||
supply-chain/
|
supply-chain/
|
||||||
|
|
||||||
|
# samply profiling
|
||||||
|
profile.json
|
||||||
|
|
|
@ -100,43 +100,59 @@ fn grapheme_width(g: &str) -> usize {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn truncate_str<U: Into<usize>>(content: &str, width: U) -> String {
|
fn truncate_str<U: Into<usize>>(content: &str, width: U) -> String {
|
||||||
let width = width.into();
|
let width = width.into();
|
||||||
let mut text = String::with_capacity(width);
|
|
||||||
|
|
||||||
if width > 0 {
|
if width > 0 {
|
||||||
let mut curr_width = 0;
|
if content.is_ascii() {
|
||||||
let mut early_break = false;
|
// If the entire string is ASCII, we can use a much simpler approach.
|
||||||
|
|
||||||
// This tracks the length of the last added string - note this does NOT match the grapheme *width*.
|
if content.len() <= width {
|
||||||
let mut last_added_str_len = 0;
|
content.to_owned()
|
||||||
|
|
||||||
// Cases to handle:
|
|
||||||
// - Completes adding the entire string.
|
|
||||||
// - Adds a character up to the boundary, then fails.
|
|
||||||
// - Adds a character not up to the boundary, then fails.
|
|
||||||
// Inspired by https://tomdebruijn.com/posts/rust-string-length-width-calculations/
|
|
||||||
for g in UnicodeSegmentation::graphemes(content, true) {
|
|
||||||
let g_width = grapheme_width(g);
|
|
||||||
|
|
||||||
if curr_width + g_width <= width {
|
|
||||||
curr_width += g_width;
|
|
||||||
last_added_str_len = g.len();
|
|
||||||
text.push_str(g);
|
|
||||||
} else {
|
} else {
|
||||||
early_break = true;
|
let mut text = String::with_capacity(width);
|
||||||
break;
|
let (keep, _throw) = content.split_at(width - 1);
|
||||||
}
|
text.push_str(keep);
|
||||||
}
|
text.push('…');
|
||||||
|
|
||||||
if early_break {
|
text
|
||||||
if curr_width == width {
|
|
||||||
// Remove the last grapheme cluster added.
|
|
||||||
text.truncate(text.len() - last_added_str_len);
|
|
||||||
}
|
}
|
||||||
text.push('…');
|
} else {
|
||||||
|
let mut text = String::with_capacity(width);
|
||||||
|
let mut curr_width = 0;
|
||||||
|
let mut early_break = false;
|
||||||
|
|
||||||
|
// This tracks the length of the last added string - note this does NOT match the grapheme *width*.
|
||||||
|
let mut last_added_str_len = 0;
|
||||||
|
|
||||||
|
// Cases to handle:
|
||||||
|
// - Completes adding the entire string.
|
||||||
|
// - Adds a character up to the boundary, then fails.
|
||||||
|
// - Adds a character not up to the boundary, then fails.
|
||||||
|
// Inspired by https://tomdebruijn.com/posts/rust-string-length-width-calculations/
|
||||||
|
for g in UnicodeSegmentation::graphemes(content, true) {
|
||||||
|
let g_width = grapheme_width(g);
|
||||||
|
|
||||||
|
if curr_width + g_width <= width {
|
||||||
|
curr_width += g_width;
|
||||||
|
last_added_str_len = g.len();
|
||||||
|
text.push_str(g);
|
||||||
|
} else {
|
||||||
|
early_break = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if early_break {
|
||||||
|
if curr_width == width {
|
||||||
|
// Remove the last grapheme cluster added.
|
||||||
|
text.truncate(text.len() - last_added_str_len);
|
||||||
|
}
|
||||||
|
text.push('…');
|
||||||
|
}
|
||||||
|
text
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
String::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
text
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -252,7 +268,7 @@ mod test {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
truncate_str(cpu_header, 8_usize),
|
truncate_str(cpu_header, 8_usize),
|
||||||
cpu_header,
|
cpu_header,
|
||||||
"should match base string as there is enough room"
|
"should match base string as there is extra room"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -269,13 +285,36 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn truncate_cjk() {
|
fn test_truncate_ascii() {
|
||||||
|
let content = "0123456";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
truncate_str(content, 8_usize),
|
||||||
|
content,
|
||||||
|
"should match base string as there is extra room"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
truncate_str(content, 7_usize),
|
||||||
|
content,
|
||||||
|
"should match base string as there is enough room"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(truncate_str(content, 6_usize), "01234…");
|
||||||
|
assert_eq!(truncate_str(content, 5_usize), "0123…");
|
||||||
|
assert_eq!(truncate_str(content, 4_usize), "012…");
|
||||||
|
assert_eq!(truncate_str(content, 1_usize), "…");
|
||||||
|
assert_eq!(truncate_str(content, 0_usize), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_truncate_cjk() {
|
||||||
let cjk = "施氏食獅史";
|
let cjk = "施氏食獅史";
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
truncate_str(cjk, 11_usize),
|
truncate_str(cjk, 11_usize),
|
||||||
cjk,
|
cjk,
|
||||||
"should match base string as there is enough room"
|
"should match base string as there is extra room"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -292,13 +331,13 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn truncate_mixed() {
|
fn test_truncate_mixed() {
|
||||||
let test = "Test (施氏食獅史) Test";
|
let test = "Test (施氏食獅史) Test";
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
truncate_str(test, 30_usize),
|
truncate_str(test, 30_usize),
|
||||||
test,
|
test,
|
||||||
"should match base string as there is enough room"
|
"should match base string as there is extra room"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -325,7 +364,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn truncate_flags() {
|
fn test_truncate_flags() {
|
||||||
let flag = "🇨🇦";
|
let flag = "🇨🇦";
|
||||||
assert_eq!(truncate_str(flag, 3_usize), flag);
|
assert_eq!(truncate_str(flag, 3_usize), flag);
|
||||||
assert_eq!(truncate_str(flag, 2_usize), flag);
|
assert_eq!(truncate_str(flag, 2_usize), flag);
|
||||||
|
@ -368,7 +407,7 @@ mod test {
|
||||||
|
|
||||||
/// This might not be the best way to handle it, but this at least tests that it doesn't crash...
|
/// This might not be the best way to handle it, but this at least tests that it doesn't crash...
|
||||||
#[test]
|
#[test]
|
||||||
fn truncate_hindi() {
|
fn test_truncate_hindi() {
|
||||||
// cSpell:disable
|
// cSpell:disable
|
||||||
let test = "हिन्दी";
|
let test = "हिन्दी";
|
||||||
assert_eq!(truncate_str(test, 10_usize), test);
|
assert_eq!(truncate_str(test, 10_usize), test);
|
||||||
|
|
Loading…
Reference in a new issue