mirror of
https://github.com/clap-rs/clap
synced 2024-12-13 06:12:40 +00:00
Merge pull request #4802 from epage/osstr
fix(lex): Deprecate unsound `OsStrExt::split_at`
This commit is contained in:
commit
4b180f8cd0
2 changed files with 60 additions and 23 deletions
|
@ -193,6 +193,7 @@ pub trait OsStrExt: private::Sealed {
|
||||||
/// assert_eq!("Per", first);
|
/// assert_eq!("Per", first);
|
||||||
/// assert_eq!(" Martin-Löf", last);
|
/// assert_eq!(" Martin-Löf", last);
|
||||||
/// ```
|
/// ```
|
||||||
|
#[deprecated(since = "4.1.0", note = "This is not sound for all `index`")]
|
||||||
fn split_at(&self, index: usize) -> (&OsStr, &OsStr);
|
fn split_at(&self, index: usize) -> (&OsStr, &OsStr);
|
||||||
/// Splits the string on the first occurrence of the specified delimiter and
|
/// Splits the string on the first occurrence of the specified delimiter and
|
||||||
/// returns prefix before delimiter and suffix after delimiter.
|
/// returns prefix before delimiter and suffix after delimiter.
|
||||||
|
@ -212,7 +213,8 @@ pub trait OsStrExt: private::Sealed {
|
||||||
|
|
||||||
impl OsStrExt for OsStr {
|
impl OsStrExt for OsStr {
|
||||||
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
|
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
|
||||||
let bytes = to_bytes(self);
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
|
let bytes = unsafe { to_bytes(self) };
|
||||||
std::str::from_utf8(bytes)
|
std::str::from_utf8(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,17 +223,24 @@ impl OsStrExt for OsStr {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find(&self, needle: &str) -> Option<usize> {
|
fn find(&self, needle: &str) -> Option<usize> {
|
||||||
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
|
let bytes = unsafe { to_bytes(self) };
|
||||||
(0..=self.len().checked_sub(needle.len())?)
|
(0..=self.len().checked_sub(needle.len())?)
|
||||||
.find(|&x| to_bytes(self)[x..].starts_with(needle.as_bytes()))
|
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
|
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
|
||||||
to_bytes(self)
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
.strip_prefix(prefix.as_bytes())
|
let bytes = unsafe { to_bytes(self) };
|
||||||
.map(to_os_str)
|
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
|
||||||
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
|
unsafe { to_os_str(s) }
|
||||||
|
})
|
||||||
}
|
}
|
||||||
fn starts_with(&self, prefix: &str) -> bool {
|
fn starts_with(&self, prefix: &str) -> bool {
|
||||||
to_bytes(self).starts_with(prefix.as_bytes())
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
|
let bytes = unsafe { to_bytes(self) };
|
||||||
|
bytes.starts_with(prefix.as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
|
fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
|
||||||
|
@ -243,17 +252,24 @@ impl OsStrExt for OsStr {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
|
fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
|
||||||
let (first, second) = to_bytes(self).split_at(index);
|
// BUG: This is unsafe and has been deprecated
|
||||||
(to_os_str(first), to_os_str(second))
|
unsafe {
|
||||||
|
let bytes = to_bytes(self);
|
||||||
|
let (first, second) = bytes.split_at(index);
|
||||||
|
(to_os_str(first), to_os_str(second))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
|
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
|
||||||
let start = self.find(needle)?;
|
let start = self.find(needle)?;
|
||||||
let end = start + needle.len();
|
let end = start + needle.len();
|
||||||
let haystack = to_bytes(self);
|
// SAFETY: Only interacting with `OsStr` as `&str
|
||||||
let first = &haystack[0..start];
|
unsafe {
|
||||||
let second = &haystack[end..];
|
let haystack = to_bytes(self);
|
||||||
Some((to_os_str(first), to_os_str(second)))
|
let first = &haystack[0..start];
|
||||||
|
let second = &haystack[end..];
|
||||||
|
Some((to_os_str(first), to_os_str(second)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,33 +281,40 @@ mod private {
|
||||||
|
|
||||||
/// Allow access to raw bytes
|
/// Allow access to raw bytes
|
||||||
///
|
///
|
||||||
/// **Note:** the bytes only make sense when compared with ASCII or `&str`
|
/// # Safety
|
||||||
///
|
///
|
||||||
/// **Note:** This must never be serialized as there is no guarantee at how invalid UTF-8 will be
|
/// - The bytes only make sense when compared with ASCII or `&str`
|
||||||
/// encoded, even within the same version of this crate (since its dependent on rustc version)
|
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
|
||||||
fn to_bytes(s: &OsStr) -> &[u8] {
|
/// encoded, even within the same version of this crate (since its dependent on rustc version)
|
||||||
|
unsafe fn to_bytes(s: &OsStr) -> &[u8] {
|
||||||
// SAFETY:
|
// SAFETY:
|
||||||
// - Lifetimes are the same
|
// - Lifetimes are the same
|
||||||
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
|
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
|
||||||
// - The primary contract is that the encoding for invalid surrogate code points is not
|
// - The primary contract is that the encoding for invalid surrogate code points is not
|
||||||
// guaranteed which isn't a problem here
|
// guaranteed which isn't a problem here
|
||||||
//
|
//
|
||||||
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
|
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
|
||||||
// but its in limbo
|
// but its in limbo
|
||||||
unsafe { std::mem::transmute(s) }
|
std::mem::transmute(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Restore raw bytes as `OsStr`
|
/// Restore raw bytes as `OsStr`
|
||||||
fn to_os_str(s: &[u8]) -> &OsStr {
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// - The bytes only make sense when compared with ASCII or `&str`
|
||||||
|
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
|
||||||
|
/// encoded, even within the same version of this crate (since its dependent on rustc version)
|
||||||
|
unsafe fn to_os_str(s: &[u8]) -> &OsStr {
|
||||||
// SAFETY:
|
// SAFETY:
|
||||||
// - Lifetimes are the same
|
// - Lifetimes are the same
|
||||||
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
|
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
|
||||||
// - The primary contract is that the encoding for invalid surrogate code points is not
|
// - The primary contract is that the encoding for invalid surrogate code points is not
|
||||||
// guaranteed which isn't a problem here
|
// guaranteed which isn't a problem here
|
||||||
//
|
//
|
||||||
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
|
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
|
||||||
// but its in limbo
|
// but its in limbo
|
||||||
unsafe { std::mem::transmute(s) }
|
std::mem::transmute(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Split<'s, 'n> {
|
pub struct Split<'s, 'n> {
|
||||||
|
@ -319,3 +342,14 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Split an `OsStr`
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// `index` must be at a valid UTF-8 boundary
|
||||||
|
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
|
||||||
|
let bytes = to_bytes(os);
|
||||||
|
let (first, second) = bytes.split_at(index);
|
||||||
|
(to_os_str(first), to_os_str(second))
|
||||||
|
}
|
||||||
|
|
|
@ -433,7 +433,9 @@ impl<'s> ShortFlags<'s> {
|
||||||
if let Some((index, _)) = self.utf8_prefix.next() {
|
if let Some((index, _)) = self.utf8_prefix.next() {
|
||||||
self.utf8_prefix = "".char_indices();
|
self.utf8_prefix = "".char_indices();
|
||||||
self.invalid_suffix = None;
|
self.invalid_suffix = None;
|
||||||
return Some(self.inner.split_at(index).1);
|
// SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary
|
||||||
|
let remainder = unsafe { ext::split_at(self.inner, index).1 };
|
||||||
|
return Some(remainder);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(suffix) = self.invalid_suffix {
|
if let Some(suffix) = self.invalid_suffix {
|
||||||
|
@ -457,7 +459,8 @@ fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
|
||||||
match b.try_str() {
|
match b.try_str() {
|
||||||
Ok(s) => (s, None),
|
Ok(s) => (s, None),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let (valid, after_valid) = b.split_at(err.valid_up_to());
|
// SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary
|
||||||
|
let (valid, after_valid) = unsafe { ext::split_at(b, err.valid_up_to()) };
|
||||||
let valid = valid.try_str().unwrap();
|
let valid = valid.try_str().unwrap();
|
||||||
(valid, Some(after_valid))
|
(valid, Some(after_valid))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue