From f750e577789e1dd34c6950d8c8fe16d1bfd1f49c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 8 Feb 2024 10:05:24 -0600 Subject: [PATCH] fix(lex): Use new-ish OsStr API Fixes #5280 --- clap_lex/src/ext.rs | 73 +++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/clap_lex/src/ext.rs b/clap_lex/src/ext.rs index 009c03c5..729ba7e1 100644 --- a/clap_lex/src/ext.rs +++ b/clap_lex/src/ext.rs @@ -183,7 +183,7 @@ pub trait OsStrExt: private::Sealed { impl OsStrExt for OsStr { fn try_str(&self) -> Result<&str, std::str::Utf8Error> { - let bytes = to_bytes(self); + let bytes = self.as_encoded_bytes(); std::str::from_utf8(bytes) } @@ -192,22 +192,22 @@ impl OsStrExt for OsStr { } fn find(&self, needle: &str) -> Option { - let bytes = to_bytes(self); + let bytes = self.as_encoded_bytes(); (0..=self.len().checked_sub(needle.len())?) .find(|&x| bytes[x..].starts_with(needle.as_bytes())) } fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> { - let bytes = to_bytes(self); + let bytes = self.as_encoded_bytes(); bytes.strip_prefix(prefix.as_bytes()).map(|s| { // SAFETY: - // - This came from `to_bytes` - // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie - unsafe { to_os_str_unchecked(s) } + // - This came from `as_encoded_bytes` + // - Since `prefix` is `&str`, any split will be along UTF-8 boundary + unsafe { OsStr::from_encoded_bytes_unchecked(s) } }) } fn starts_with(&self, prefix: &str) -> bool { - let bytes = to_bytes(self); + let bytes = self.as_encoded_bytes(); bytes.starts_with(prefix.as_bytes()) } @@ -222,13 +222,18 @@ impl OsStrExt for OsStr { fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> { let start = self.find(needle)?; let end = start + needle.len(); - let haystack = to_bytes(self); + let haystack = self.as_encoded_bytes(); let first = &haystack[0..start]; let second = &haystack[end..]; // SAFETY: - // - This came from `to_bytes` - // - Since `needle` is `&str`, any split will be along UTF-8 boundarie - unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) } + // - This came from `as_encoded_bytes` + // - Since `needle` is `&str`, any split will be along UTF-8 boundary + unsafe { + Some(( + OsStr::from_encoded_bytes_unchecked(first), + OsStr::from_encoded_bytes_unchecked(second), + )) + } } } @@ -238,45 +243,6 @@ mod private { impl Sealed for std::ffi::OsStr {} } -/// Allow access to raw bytes -/// -/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with -/// 7-bit ASCII or `&str` -/// -/// # Compatibility -/// -/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate -/// (since its dependent on rustc) -fn to_bytes(s: &OsStr) -> &[u8] { - // SAFETY: - // - Lifetimes are the same - // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) - // - The primary contract is that the encoding for invalid surrogate code points is not - // guaranteed which isn't a problem here - // - // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) - // but its in limbo - unsafe { std::mem::transmute(s) } -} - -/// Restore raw bytes as `OsStr` -/// -/// # Safety -/// -/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary -/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries -unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr { - // SAFETY: - // - Lifetimes are the same - // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) - // - The primary contract is that the encoding for invalid surrogate code points is not - // guaranteed which isn't a problem here - // - // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) - // but its in limbo - std::mem::transmute(s) -} - pub struct Split<'s, 'n> { haystack: Option<&'s OsStr>, needle: &'n str, @@ -309,7 +275,10 @@ impl<'s, 'n> Iterator for Split<'s, 'n> { /// /// `index` must be at a valid UTF-8 boundary pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) { - let bytes = to_bytes(os); + let bytes = os.as_encoded_bytes(); let (first, second) = bytes.split_at(index); - (to_os_str_unchecked(first), to_os_str_unchecked(second)) + ( + OsStr::from_encoded_bytes_unchecked(first), + OsStr::from_encoded_bytes_unchecked(second), + ) }