doc(lex): Clarify safety of unsafe

This commit is contained in:
Ed Page 2023-03-27 23:26:41 -05:00
parent 53cb165b3f
commit 56dc953617

View file

@ -212,7 +212,8 @@ pub trait OsStrExt: private::Sealed {
impl OsStrExt for OsStr {
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
let bytes = to_bytes(self);
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
std::str::from_utf8(bytes)
}
@ -221,17 +222,24 @@ impl OsStrExt for OsStr {
}
fn find(&self, needle: &str) -> Option<usize> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
(0..=self.len().checked_sub(needle.len())?)
.find(|&x| to_bytes(self)[x..].starts_with(needle.as_bytes()))
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
}
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
to_bytes(self)
.strip_prefix(prefix.as_bytes())
.map(to_os_str)
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
// SAFETY: Only interacting with `OsStr` as `&str
unsafe { to_os_str(s) }
})
}
fn starts_with(&self, prefix: &str) -> bool {
to_bytes(self).starts_with(prefix.as_bytes())
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
bytes.starts_with(prefix.as_bytes())
}
fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
@ -243,18 +251,25 @@ impl OsStrExt for OsStr {
}
fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
let (first, second) = to_bytes(self).split_at(index);
// BUG: This is unsafe
unsafe {
let bytes = to_bytes(self);
let (first, second) = bytes.split_at(index);
(to_os_str(first), to_os_str(second))
}
}
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
let start = self.find(needle)?;
let end = start + needle.len();
// SAFETY: Only interacting with `OsStr` as `&str
unsafe {
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
Some((to_os_str(first), to_os_str(second)))
}
}
}
mod private {
@ -265,14 +280,15 @@ mod private {
/// Allow access to raw bytes
///
/// **Note:** the bytes only make sense when compared with ASCII or `&str`
/// # Safety
///
/// **Note:** This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
fn to_bytes(s: &OsStr) -> &[u8] {
unsafe fn to_bytes(s: &OsStr) -> &[u8] {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//
@ -282,10 +298,16 @@ fn to_bytes(s: &OsStr) -> &[u8] {
}
/// Restore raw bytes as `OsStr`
fn to_os_str(s: &[u8]) -> &OsStr {
///
/// # Safety
///
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
unsafe fn to_os_str(s: &[u8]) -> &OsStr {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//