unescape_yaml_fish_2_0: Remove MaybeUninit::assume_init()

The generated assembly is more or less the same and the previously generated
version had been manually verified, but this PR removes the usage of
`MaybeUninit::assume_init()` and replaces it with direct pointer writes.

This should result in no observable change: it continues to pass the functional
tests and benchmarks identically. The safety of the new code has been verified
with Miri.

[0]: https://github.com/mqudsi/fish-yaml-unescape-benchmark
This commit is contained in:
Mahmoud Al-Qudsi 2024-08-20 14:19:56 -05:00
parent 569d3cdfff
commit 6c37103b7c

View file

@ -270,15 +270,13 @@ fn maybe_unescape_yaml_fish_2_0(s: &[u8]) -> Cow<[u8]> {
// //
// Benchmarks and code: https://github.com/mqudsi/fish-yaml-unescape-benchmark // Benchmarks and code: https://github.com/mqudsi/fish-yaml-unescape-benchmark
fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec<u8> { fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec<u8> {
// Safety: we never read from this box; we only write to it then read what we've written. // This function is in a very hot loop and the usage of boxed uninit memory benchmarks around 8%
// Rationale: This function is in a very hot loop and this benchmarks around 8% faster on // faster on real-world escaped yaml samples from the fish history file.
// real-world escaped yaml samples from the fish history file.
//
// This is a very long way around of writing `Box::new_uninit_slice(s.len())`, which // This is a very long way around of writing `Box::new_uninit_slice(s.len())`, which
// requires the unstablized nightly-only feature new_unit (#63291). It optimizes away. // requires the unstablized nightly-only feature new_unit (#63291). It optimizes away.
let mut result: Box<[u8]> = std::iter::repeat_with(std::mem::MaybeUninit::uninit) let mut result: Box<[_]> = std::iter::repeat_with(std::mem::MaybeUninit::uninit)
.take(s.len()) .take(s.len())
.map(|b| unsafe { b.assume_init() })
.collect(); .collect();
let mut chars = s.iter().copied(); let mut chars = s.iter().copied();
let mut src_idx = 0; let mut src_idx = 0;
@ -289,21 +287,26 @@ fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec<u8> {
// everywhere does not result in a statistically significant improvement to the // everywhere does not result in a statistically significant improvement to the
// performance of this function. // performance of this function.
let to_copy = chars.by_ref().take_while(|b| *b != b'\\').count(); let to_copy = chars.by_ref().take_while(|b| *b != b'\\').count();
result[dst_idx..][..to_copy].copy_from_slice(&s[src_idx..][..to_copy]); unsafe {
let src = &s[src_idx..].as_ptr();
// Can use the following when feature(maybe_uninit_slice) is stabilized:
// let dst = std::mem::MaybeUninit::slice_as_mut_ptr(&mut result[dst_idx..]);
let dst = result[dst_idx..].as_mut_ptr().cast();
std::ptr::copy_nonoverlapping(*src, dst, to_copy);
}
dst_idx += to_copy; dst_idx += to_copy;
match chars.next() { match chars.next() {
Some(b'\\') => result[dst_idx] = b'\\', Some(b'\\') => result[dst_idx].write(b'\\'),
Some(b'n') => result[dst_idx] = b'\n', Some(b'n') => result[dst_idx].write(b'\n'),
_ => break, _ => break,
} };
src_idx += to_copy + 2; src_idx += to_copy + 2;
dst_idx += 1; dst_idx += 1;
} }
let mut result = Vec::from(result); let result = Box::leak(result);
result.truncate(dst_idx); unsafe { Vec::from_raw_parts(result.as_mut_ptr().cast(), dst_idx, result.len()) }
result
} }
/// Read one line, stripping off any newline, returning the number of bytes consumed. /// Read one line, stripping off any newline, returning the number of bytes consumed.