Further optimize unescape_yaml_fish_2_0()

This hot function dominates the flamegraphs for the completions thread, and any optimizations are worthwhile. A variety of different approaches were tested and benchmarked against real-world fish-history file inputs and this is the one that won out across all rustc target-cpu variations tried. Benchmarks and code at https://github.com/mqudsi/fish-yaml-unescape-benchmark
2025-01-14 14:03:58 +00:00 · 2024-05-17 16:06:08 -05:00 · 2024-05-17 16:06:08 -05:00 · bcb1e2ed85
commit bcb1e2ed85
parent 9ab1ec2a9e
1 changed files with 60 additions and 52 deletions
--- a/src/history/file.rs
+++ b/src/history/file.rs
@ -250,43 +250,64 @@ fn escape_yaml_fish_2_0(s: &mut Vec<u8>) {
    replace_all(s, b"\n", b"\\n"); // replace newline with backslash + literal n
 }

-/// This function is called frequently, so it ought to be fast.
-fn unescape_yaml_fish_2_0(s: &mut Vec<u8>) {
-    let mut cursor = 0;
-    while cursor < s.len() {
-        // Look for a backslash.
-        let Some(backslash) = s[cursor..].iter().position(|&c| c == b'\\') else {
-            // No more backslashes
-            break;
-        };
-
-        // Add back the start offset
-        let backslash = backslash + cursor;
-
-        // Backslash found. Maybe we'll do something about it.
-        let Some(escaped_char) = s.get(backslash + 1) else {
-            // Backslash was final character
-            break;
-        };
-
-        match escaped_char {
-            b'\\' => {
-                // Two backslashes in a row. Delete the second one.
-                s.remove(backslash + 1);
-            }
-            b'n' => {
-                // Backslash + n. Replace with a newline.
-                s.splice(backslash..(backslash + 2), [b'\n']);
-            }
-            _ => {
-                // Unknown backslash escape, keep as-is
-            }
-        };
-
-        // The character at index backslash has now been made whole; start at the next
-        // character.
-        cursor = backslash + 1;
+#[inline(always)]
+/// Unescapes the fish-specific yaml variant, if it requires it.
+fn maybe_unescape_yaml_fish_2_0(s: &[u8]) -> Cow<[u8]> {
+    // This is faster than s.contains(b'\\') and can be auto-vectorized to SIMD. See benchmark note
+    // on unescape_yaml_fish_2_0().
+    if !s
+        .into_iter()
+        .copied()
+        .fold(false, |acc, b| acc | (b == b'\\'))
+    {
+        return s.into();
    }
+    unescape_yaml_fish_2_0(s).into()
+}
+
+/// Unescapes the fish-specific yaml variant. Use [`maybe_unescape_yaml_fish_2_0()`] if you're not
+/// positive the input contains an escape.
+
+// This function is called on every input event and shows up heavily in all flamegraphs.
+// Various approaches were benchmarked against real-world fish-history files on lines with escapes,
+// and this implementation (chunk_loop_box) won out. Make changes with care!
+//
+// Benchmarks and code: https://github.com/mqudsi/fish-yaml-unescape-benchmark
+fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec<u8> {
+    // Safety: we never read from this box; we only write to it then read what we've written.
+    // Rationale: This function is in a very hot loop and this benchmarks around 8% faster on
+    // real-world escaped yaml samples from the fish history file.
+    //
+    // This is a very long way around of writing `Box::new_uninit_slice(s.len())`, which
+    // requires the unstablized nightly-only feature new_unit (#63291). It optimizes away.
+    let mut result: Box<[u8]> = std::iter::repeat_with(std::mem::MaybeUninit::uninit)
+        .take(s.len())
+        .map(|b| unsafe { b.assume_init() })
+        .collect();
+    let mut chars = s.iter().copied();
+    let mut src_idx = 0;
+    let mut dst_idx = 0;
+    loop {
+        // While inspecting the asm reveals the compiler does not elide the bounds check from
+        // the writes to `result`, benchmarking shows that using `result.get_unchecked_mut()`
+        // everywhere does not result in a statistically significant improvement to the
+        // performance of this function.
+        let to_copy = chars.by_ref().take_while(|b| *b != b'\\').count();
+        result[dst_idx..][..to_copy].copy_from_slice(&s[src_idx..][..to_copy]);
+        dst_idx += to_copy;
+
+        match chars.next() {
+            Some(b'\\') => result[dst_idx] = b'\\',
+            Some(b'n') => result[dst_idx] = b'\n',
+            _ => break,
+        }
+        src_idx += to_copy + 2;
+        dst_idx += 1;
+    }
+
+    let mut result = Vec::from(result);
+    result.truncate(dst_idx);
+    result
 }

 /// Read one line, stripping off any newline, returning the number of bytes consumed.
@ -320,23 +341,11 @@ fn extract_prefix_and_unescape_yaml(line: &[u8]) -> Option<(Cow<[u8]>, Cow<[u8]>
    let value = split.next()?;
    debug_assert!(split.next().is_none());

-    let key: Cow<[u8]> = if key.iter().copied().any(|b| b == b'\\') {
-        let mut key = key.to_owned();
-        unescape_yaml_fish_2_0(&mut key);
-        key.into()
-    } else {
-        key.into()
-    };
+    let key = maybe_unescape_yaml_fish_2_0(key);

    // Skip a space after the : if necessary.
    let value = trim_start(value);
-    let value: Cow<[u8]> = if value.iter().copied().any(|b| b == b'\\') {
-        let mut value = value.to_owned();
-        unescape_yaml_fish_2_0(&mut value);
-        value.into()
-    } else {
-        value.into()
-    };
+    let value = maybe_unescape_yaml_fish_2_0(value);

    Some((key, value))
 }
@ -399,8 +408,7 @@ fn decode_item_fish_2_0(mut data: &[u8]) -> Option<HistoryItem> {
                // We're going to consume this line.
                data = &data[advance..];

-                let mut line = line.to_owned();
-                unescape_yaml_fish_2_0(&mut line);
+                let line = maybe_unescape_yaml_fish_2_0(&line);
                paths.push(str2wcstring(&line));
            }
        }