diff --git a/clap_lex/src/ext.rs b/clap_lex/src/ext.rs index 6d319988..4cd098c6 100644 --- a/clap_lex/src/ext.rs +++ b/clap_lex/src/ext.rs @@ -212,7 +212,8 @@ pub trait OsStrExt: private::Sealed { impl OsStrExt for OsStr { fn try_str(&self) -> Result<&str, std::str::Utf8Error> { - let bytes = to_bytes(self); + // SAFETY: Only interacting with `OsStr` as `&str + let bytes = unsafe { to_bytes(self) }; std::str::from_utf8(bytes) } @@ -221,17 +222,24 @@ impl OsStrExt for OsStr { } fn find(&self, needle: &str) -> Option { + // SAFETY: Only interacting with `OsStr` as `&str + let bytes = unsafe { to_bytes(self) }; (0..=self.len().checked_sub(needle.len())?) - .find(|&x| to_bytes(self)[x..].starts_with(needle.as_bytes())) + .find(|&x| bytes[x..].starts_with(needle.as_bytes())) } fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> { - to_bytes(self) - .strip_prefix(prefix.as_bytes()) - .map(to_os_str) + // SAFETY: Only interacting with `OsStr` as `&str + let bytes = unsafe { to_bytes(self) }; + bytes.strip_prefix(prefix.as_bytes()).map(|s| { + // SAFETY: Only interacting with `OsStr` as `&str + unsafe { to_os_str(s) } + }) } fn starts_with(&self, prefix: &str) -> bool { - to_bytes(self).starts_with(prefix.as_bytes()) + // SAFETY: Only interacting with `OsStr` as `&str + let bytes = unsafe { to_bytes(self) }; + bytes.starts_with(prefix.as_bytes()) } fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> { @@ -243,17 +251,24 @@ impl OsStrExt for OsStr { } fn split_at(&self, index: usize) -> (&OsStr, &OsStr) { - let (first, second) = to_bytes(self).split_at(index); - (to_os_str(first), to_os_str(second)) + // BUG: This is unsafe + unsafe { + let bytes = to_bytes(self); + let (first, second) = bytes.split_at(index); + (to_os_str(first), to_os_str(second)) + } } fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> { let start = self.find(needle)?; let end = start + needle.len(); - let haystack = to_bytes(self); - let first = &haystack[0..start]; - let second = &haystack[end..]; - Some((to_os_str(first), to_os_str(second))) + // SAFETY: Only interacting with `OsStr` as `&str + unsafe { + let haystack = to_bytes(self); + let first = &haystack[0..start]; + let second = &haystack[end..]; + Some((to_os_str(first), to_os_str(second))) + } } } @@ -265,14 +280,15 @@ mod private { /// Allow access to raw bytes /// -/// **Note:** the bytes only make sense when compared with ASCII or `&str` +/// # Safety /// -/// **Note:** This must never be serialized as there is no guarantee at how invalid UTF-8 will be -/// encoded, even within the same version of this crate (since its dependent on rustc version) -fn to_bytes(s: &OsStr) -> &[u8] { +/// - The bytes only make sense when compared with ASCII or `&str` +/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be +/// encoded, even within the same version of this crate (since its dependent on rustc version) +unsafe fn to_bytes(s: &OsStr) -> &[u8] { // SAFETY: // - Lifetimes are the same - // - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`) + // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) // - The primary contract is that the encoding for invalid surrogate code points is not // guaranteed which isn't a problem here // @@ -282,10 +298,16 @@ fn to_bytes(s: &OsStr) -> &[u8] { } /// Restore raw bytes as `OsStr` -fn to_os_str(s: &[u8]) -> &OsStr { +/// +/// # Safety +/// +/// - The bytes only make sense when compared with ASCII or `&str` +/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be +/// encoded, even within the same version of this crate (since its dependent on rustc version) +unsafe fn to_os_str(s: &[u8]) -> &OsStr { // SAFETY: // - Lifetimes are the same - // - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`) + // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) // - The primary contract is that the encoding for invalid surrogate code points is not // guaranteed which isn't a problem here //