Merge pull request #4794 from epage/osstr

perf!(lex): Build faster by removing `os_str_bytes`
This commit is contained in:
Ed Page 2023-03-27 16:22:46 -05:00 committed by GitHub
commit 627a94f502
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 433 additions and 165 deletions

4
Cargo.lock generated
View file

@ -230,7 +230,6 @@ dependencies = [
"clap 4.1.13",
"clap_lex 0.3.3",
"is_executable",
"os_str_bytes",
"pathdiff",
"shlex",
"snapbox",
@ -269,9 +268,6 @@ dependencies = [
[[package]]
name = "clap_lex"
version = "0.3.3"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "clap_mangen"

View file

@ -1,6 +1,6 @@
use std::cmp::Ordering;
use clap_lex::RawOsStr;
use clap_lex::OsStrExt as _;
use crate::builder::OsStr;
use crate::builder::ValueRange;
@ -841,16 +841,16 @@ fn assert_defaults<'d>(
for default_os in defaults {
let value_parser = arg.get_value_parser();
let assert_cmd = Command::new("assert");
if let Some(delim) = arg.get_value_delimiter() {
let default_os = RawOsStr::new(default_os);
for part in default_os.split(delim) {
if let Err(err) = value_parser.parse_ref(&assert_cmd, Some(arg), &part.to_os_str())
{
if let Some(val_delim) = arg.get_value_delimiter() {
let mut val_delim_buffer = [0; 4];
let val_delim = val_delim.encode_utf8(&mut val_delim_buffer);
for part in default_os.split(val_delim) {
if let Err(err) = value_parser.parse_ref(&assert_cmd, Some(arg), part) {
panic!(
"Argument `{}`'s {}={:?} failed validation: {}",
arg.get_id(),
field,
part.to_str_lossy(),
part.to_string_lossy(),
err
);
}

View file

@ -4,9 +4,7 @@ use std::{
ffi::{OsStr, OsString},
};
// Third Party
use clap_lex::RawOsStr;
use clap_lex::RawOsString;
use clap_lex::OsStrExt as _;
// Internal
use crate::builder::{Arg, Command};
@ -93,9 +91,8 @@ impl<'cmd> Parser<'cmd> {
}
debug!(
"Parser::get_matches_with: Begin parsing '{:?}' ({:?})",
"Parser::get_matches_with: Begin parsing '{:?}'",
arg_os.to_value_os(),
arg_os.to_value_os().as_raw_bytes()
);
// Has the user already passed '--'? Meaning only positional args follow
@ -291,7 +288,7 @@ impl<'cmd> Parser<'cmd> {
} else {
let trailing_values = false;
let arg_values = matcher.pending_values_mut(id, None, trailing_values);
arg_values.push(arg_os.to_value_os().to_os_str().into_owned());
arg_values.push(arg_os.to_value_os().to_owned());
if matcher.needs_more_vals(arg) {
ParseResult::Opt(arg.get_id().clone())
} else {
@ -411,7 +408,7 @@ impl<'cmd> Parser<'cmd> {
Some(Identifier::Index),
trailing_values,
);
arg_values.push(arg_os.to_value_os().to_os_str().into_owned());
arg_values.push(arg_os.to_value_os().to_owned());
}
// Only increment the positional counter if it doesn't allow multiples
@ -548,7 +545,7 @@ impl<'cmd> Parser<'cmd> {
// Checks if the arg matches a subcommand name, or any of its aliases (if defined)
fn possible_subcommand(
&self,
arg: Result<&str, &RawOsStr>,
arg: Result<&str, &OsStr>,
valid_arg_found: bool,
) -> Option<&str> {
debug!("Parser::possible_subcommand: arg={:?}", arg);
@ -723,8 +720,8 @@ impl<'cmd> Parser<'cmd> {
fn parse_long_arg(
&mut self,
matcher: &mut ArgMatcher,
long_arg: Result<&str, &RawOsStr>,
long_value: Option<&RawOsStr>,
long_arg: &str,
long_value: Option<&OsStr>,
parse_state: &ParseState,
pos_counter: usize,
valid_arg_found: &mut bool,
@ -741,14 +738,6 @@ impl<'cmd> Parser<'cmd> {
}
debug!("Parser::parse_long_arg: Does it contain '='...");
let long_arg = match long_arg {
Ok(long_arg) => long_arg,
Err(long_arg) => {
return Ok(ParseResult::NoMatchingArg {
arg: long_arg.to_str_lossy().into_owned(),
});
}
};
if long_arg.is_empty() {
debug_assert!(
long_value.is_some(),
@ -805,7 +794,7 @@ impl<'cmd> Parser<'cmd> {
used.push(arg.get_id().clone());
Ok(ParseResult::UnneededAttachedValue {
rest: rest.to_str_lossy().into_owned(),
rest: rest.to_string_lossy().into_owned(),
used,
arg: arg.to_string(),
})
@ -902,7 +891,7 @@ impl<'cmd> Parser<'cmd> {
Ok(c) => c,
Err(rest) => {
return Ok(ParseResult::NoMatchingArg {
arg: format!("-{}", rest.to_str_lossy()),
arg: format!("-{}", rest.to_string_lossy()),
});
}
};
@ -938,8 +927,8 @@ impl<'cmd> Parser<'cmd> {
// Cloning the iterator, so we rollback if it isn't there.
let val = short_arg.clone().next_value_os().unwrap_or_default();
debug!(
"Parser::parse_short_arg:iter:{}: val={:?} (bytes), val={:?} (ascii), short_arg={:?}",
c, val, val.as_raw_bytes(), short_arg
"Parser::parse_short_arg:iter:{}: val={:?}, short_arg={:?}",
c, val, short_arg
);
let val = Some(val).filter(|v| !v.is_empty());
@ -950,7 +939,7 @@ impl<'cmd> Parser<'cmd> {
//
// e.g. `-xvf`, when require_equals && x.min_vals == 0, we don't
// consume the `vf`, even if it's provided as value.
let (val, has_eq) = if let Some(val) = val.and_then(|v| v.strip_prefix('=')) {
let (val, has_eq) = if let Some(val) = val.and_then(|v| v.strip_prefix("=")) {
(Some(val), true)
} else {
(val, false)
@ -991,7 +980,7 @@ impl<'cmd> Parser<'cmd> {
fn parse_opt_value(
&self,
ident: Identifier,
attached_value: Option<&RawOsStr>,
attached_value: Option<&OsStr>,
arg: &Arg,
matcher: &mut ArgMatcher,
has_eq: bool,
@ -1032,7 +1021,7 @@ impl<'cmd> Parser<'cmd> {
})
}
} else if let Some(v) = attached_value {
let arg_values = vec![v.to_os_str().into_owned()];
let arg_values = vec![v.to_owned()];
let trailing_idx = None;
let react_result = ok!(self.react(
Some(ident),
@ -1054,13 +1043,8 @@ impl<'cmd> Parser<'cmd> {
}
}
fn check_terminator(&self, arg: &Arg, val: &RawOsStr) -> Option<ParseResult> {
if Some(val)
== arg
.terminator
.as_ref()
.map(|s| RawOsStr::from_str(s.as_str()))
{
fn check_terminator(&self, arg: &Arg, val: &OsStr) -> Option<ParseResult> {
if Some(val) == arg.terminator.as_ref().map(|s| OsStr::new(s.as_str())) {
debug!("Parser::check_terminator: terminator={:?}", arg.terminator);
Some(ParseResult::ValuesDone)
} else {
@ -1156,17 +1140,17 @@ impl<'cmd> Parser<'cmd> {
if self.cmd.is_dont_delimit_trailing_values_set() && trailing_idx == Some(0) {
// Nothing to do
} else {
let mut val_delim_buffer = [0; 4];
let val_delim = val_delim.encode_utf8(&mut val_delim_buffer);
let mut split_raw_vals = Vec::with_capacity(raw_vals.len());
for (i, raw_val) in raw_vals.into_iter().enumerate() {
let raw_val = RawOsString::new(raw_val);
if !raw_val.contains(val_delim)
|| (self.cmd.is_dont_delimit_trailing_values_set()
&& trailing_idx == Some(i))
{
split_raw_vals.push(raw_val.into_os_string());
split_raw_vals.push(raw_val);
} else {
split_raw_vals
.extend(raw_val.split(val_delim).map(|x| x.to_os_str().into_owned()));
split_raw_vals.extend(raw_val.split(val_delim).map(|x| x.to_owned()));
}
}
raw_vals = split_raw_vals

View file

@ -35,7 +35,6 @@ bench = false
clap = { path = "../", version = "4.1.0", default-features = false, features = ["std"] }
clap_lex = { path = "../clap_lex", version = "0.3.0", optional = true }
is_executable = { version = "1.0.1", optional = true }
os_str_bytes = { version = "6.0.0", default-features = false, features = ["raw_os_str"], optional = true }
pathdiff = { version = "0.2.1", optional = true }
shlex = { version = "1.1.0", optional = true }
unicode-xid = { version = "0.2.2", optional = true }
@ -52,5 +51,5 @@ required-features = ["unstable-dynamic"]
[features]
default = []
unstable-dynamic = ["dep:clap_lex", "dep:shlex", "dep:unicode-xid", "dep:os_str_bytes", "clap/derive", "dep:is_executable", "dep:pathdiff"]
unstable-dynamic = ["dep:clap_lex", "dep:shlex", "dep:unicode-xid", "clap/derive", "dep:is_executable", "dep:pathdiff"]
debug = ["clap/debug"]

View file

@ -2,9 +2,11 @@
/// Complete commands within bash
pub mod bash {
use std::ffi::OsStr;
use std::ffi::OsString;
use std::io::Write;
use clap_lex::OsStrExt as _;
use unicode_xid::UnicodeXID;
#[derive(clap::Subcommand)]
@ -320,11 +322,7 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
return complete_arg(&arg, current_cmd, current_dir, pos_index, is_escaped);
}
debug!(
"complete::next: Begin parsing '{:?}' ({:?})",
arg.to_value_os(),
arg.to_value_os().as_raw_bytes()
);
debug!("complete::next: Begin parsing '{:?}'", arg.to_value_os(),);
if let Ok(value) = arg.to_value() {
if let Some(next_cmd) = current_cmd.find_subcommand(value) {
@ -370,28 +368,23 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
if !is_escaped {
if let Some((flag, value)) = arg.to_long() {
if let Ok(flag) = flag {
if let Some(value) = value {
if let Some(arg) = cmd.get_arguments().find(|a| a.get_long() == Some(flag))
{
completions.extend(
complete_arg_value(value.to_str().ok_or(value), arg, current_dir)
.into_iter()
.map(|os| {
// HACK: Need better `OsStr` manipulation
format!("--{}={}", flag, os.to_string_lossy()).into()
}),
)
}
} else {
if let Some(value) = value {
if let Some(arg) = cmd.get_arguments().find(|a| a.get_long() == Some(flag)) {
completions.extend(
crate::generator::utils::longs_and_visible_aliases(cmd)
complete_arg_value(value.to_str().ok_or(value), arg, current_dir)
.into_iter()
.filter_map(|f| {
f.starts_with(flag).then(|| format!("--{}", f).into())
.map(|os| {
// HACK: Need better `OsStr` manipulation
format!("--{}={}", flag, os.to_string_lossy()).into()
}),
);
)
}
} else {
completions.extend(
crate::generator::utils::longs_and_visible_aliases(cmd)
.into_iter()
.filter_map(|f| f.starts_with(flag).then(|| format!("--{}", f).into())),
);
}
} else if arg.is_escape() || arg.is_stdio() || arg.is_empty() {
// HACK: Assuming knowledge of is_escape / is_stdio
@ -408,7 +401,7 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
crate::generator::utils::shorts_and_visible_aliases(cmd)
.into_iter()
// HACK: Need better `OsStr` manipulation
.map(|f| format!("{}{}", arg.to_value_os().to_str_lossy(), f).into()),
.map(|f| format!("{}{}", arg.to_value_os().to_string_lossy(), f).into()),
);
}
}
@ -428,7 +421,7 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
}
fn complete_arg_value(
value: Result<&str, &clap_lex::RawOsStr>,
value: Result<&str, &OsStr>,
arg: &clap::Arg,
current_dir: Option<&std::path::Path>,
) -> Vec<OsString> {
@ -444,7 +437,7 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
}
} else {
let value_os = match value {
Ok(value) => clap_lex::RawOsStr::from_str(value),
Ok(value) => OsStr::new(value),
Err(value_os) => value_os,
};
match arg.get_value_hint() {
@ -485,7 +478,7 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
}
fn complete_path(
value_os: &clap_lex::RawOsStr,
value_os: &OsStr,
current_dir: Option<&std::path::Path>,
is_wanted: impl Fn(&std::path::Path) -> bool,
) -> Vec<OsString> {
@ -499,10 +492,11 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
}
};
let (existing, prefix) = value_os
.split_once('\\')
.unwrap_or((clap_lex::RawOsStr::from_str(""), value_os));
let root = current_dir.join(existing.to_os_str());
.split_once("\\")
.unwrap_or((OsStr::new(""), value_os));
let root = current_dir.join(existing);
debug!("complete_path: root={:?}, prefix={:?}", root, prefix);
let prefix = prefix.to_string_lossy();
for entry in std::fs::read_dir(&root)
.ok()
@ -510,8 +504,8 @@ complete OPTIONS -F _clap_complete_NAME EXECUTABLES
.flatten()
.filter_map(Result::ok)
{
let raw_file_name = clap_lex::RawOsString::new(entry.file_name());
if !raw_file_name.starts_with_os(prefix) {
let raw_file_name = OsString::from(entry.file_name());
if !raw_file_name.starts_with(&prefix) {
continue;
}

View file

@ -28,6 +28,3 @@ pre-release-replacements = [
[lib]
bench = false
[dependencies]
os_str_bytes = { version = "6.0.0", default-features = false, features = ["raw_os_str"] }

321
clap_lex/src/ext.rs Normal file
View file

@ -0,0 +1,321 @@
use std::ffi::OsStr;
pub trait OsStrExt: private::Sealed {
/// Converts to a string slice.
fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
/// Returns `true` if the given pattern matches a sub-slice of
/// this string slice.
///
/// Returns `false` if it does not.
///
/// # Examples
///
/// ```rust
/// use clap_lex::OsStrExt as _;
/// let bananas = std::ffi::OsStr::new("bananas");
///
/// assert!(bananas.contains("nana"));
/// assert!(!bananas.contains("apples"));
/// ```
fn contains(&self, needle: &str) -> bool;
/// Returns the byte index of the first character of this string slice that
/// matches the pattern.
///
/// Returns [`None`] if the pattern doesn't match.
///
/// # Examples
///
/// ```rust
/// use clap_lex::OsStrExt as _;
/// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
///
/// assert_eq!(s.find("L"), Some(0));
/// assert_eq!(s.find("é"), Some(14));
/// assert_eq!(s.find("par"), Some(17));
/// ```
///
/// Not finding the pattern:
///
/// ```rust
/// use clap_lex::OsStrExt as _;
/// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
///
/// assert_eq!(s.find("1"), None);
/// ```
fn find(&self, needle: &str) -> Option<usize>;
/// Returns a string slice with the prefix removed.
///
/// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
/// in `Some`.
///
/// If the string does not start with `prefix`, returns `None`.
///
/// # Examples
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
/// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
/// ```
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
/// Returns `true` if the given pattern matches a prefix of this
/// string slice.
///
/// Returns `false` if it does not.
///
/// # Examples
///
/// ```
/// use clap_lex::OsStrExt as _;
/// let bananas = std::ffi::OsStr::new("bananas");
///
/// assert!(bananas.starts_with("bana"));
/// assert!(!bananas.starts_with("nana"));
/// ```
fn starts_with(&self, prefix: &str) -> bool;
/// An iterator over substrings of this string slice, separated by
/// characters matched by a pattern.
///
/// # Examples
///
/// Simple patterns:
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
/// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
///
/// let v: Vec<_> = OsStr::new("").split("X").collect();
/// assert_eq!(v, [OsStr::new("")]);
///
/// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
/// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
///
/// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
/// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
/// ```
///
/// If a string contains multiple contiguous separators, you will end up
/// with empty strings in the output:
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let x = OsStr::new("||||a||b|c");
/// let d: Vec<_> = x.split("|").collect();
///
/// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
/// ```
///
/// Contiguous separators are separated by the empty string.
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let x = OsStr::new("(///)");
/// let d: Vec<_> = x.split("/").collect();
///
/// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
/// ```
///
/// Separators at the start or end of a string are neighbored
/// by empty strings.
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let d: Vec<_> = OsStr::new("010").split("0").collect();
/// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
/// ```
///
/// When the empty string is used as a separator, it panics
///
/// ```should_panic
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let f: Vec<_> = OsStr::new("rust").split("").collect();
/// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
/// ```
///
/// Contiguous separators can lead to possibly surprising behavior
/// when whitespace is used as the separator. This code is correct:
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// let x = OsStr::new(" a b c");
/// let d: Vec<_> = x.split(" ").collect();
///
/// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
/// ```
///
/// It does _not_ give you:
///
/// ```,ignore
/// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
/// ```
///
/// Use [`split_whitespace`] for this behavior.
///
/// [`split_whitespace`]: str::split_whitespace
fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
/// Divide one string slice into two at an index.
///
/// The argument, `mid`, should be a byte offset from the start of the
/// string. It must also be on the boundary of a UTF-8 code point.
///
/// The two slices returned go from the start of the string slice to `mid`,
/// and from `mid` to the end of the string slice.
///
/// To get mutable string slices instead, see the [`split_at_mut`]
/// method.
///
/// [`split_at_mut`]: str::split_at_mut
///
/// # Panics
///
/// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
/// past the end of the last code point of the string slice.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use clap_lex::OsStrExt as _;
/// let s = std::ffi::OsStr::new("Per Martin-Löf");
///
/// let (first, last) = s.split_at(3);
///
/// assert_eq!("Per", first);
/// assert_eq!(" Martin-Löf", last);
/// ```
fn split_at(&self, index: usize) -> (&OsStr, &OsStr);
/// Splits the string on the first occurrence of the specified delimiter and
/// returns prefix before delimiter and suffix after delimiter.
///
/// # Examples
///
/// ```
/// use std::ffi::OsStr;
/// use clap_lex::OsStrExt as _;
/// assert_eq!(OsStr::new("cfg").split_once("="), None);
/// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
/// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
/// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
/// ```
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
}
impl OsStrExt for OsStr {
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
let bytes = to_bytes(self);
std::str::from_utf8(bytes)
}
fn contains(&self, needle: &str) -> bool {
self.find(needle).is_some()
}
fn find(&self, needle: &str) -> Option<usize> {
(0..=self.len().checked_sub(needle.len())?)
.find(|&x| to_bytes(self)[x..].starts_with(needle.as_bytes()))
}
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
to_bytes(self)
.strip_prefix(prefix.as_bytes())
.map(to_os_str)
}
fn starts_with(&self, prefix: &str) -> bool {
to_bytes(self).starts_with(prefix.as_bytes())
}
fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
assert_ne!(needle, "");
Split {
haystack: Some(self),
needle,
}
}
fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
let (first, second) = to_bytes(self).split_at(index);
(to_os_str(first), to_os_str(second))
}
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
let start = self.find(needle)?;
let end = start + needle.len();
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
Some((to_os_str(first), to_os_str(second)))
}
}
mod private {
pub trait Sealed {}
impl Sealed for std::ffi::OsStr {}
}
/// Allow access to raw bytes
///
/// **Note:** the bytes only make sense when compared with ASCII or `&str`
///
/// **Note:** This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
fn to_bytes(s: &OsStr) -> &[u8] {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
unsafe { std::mem::transmute(s) }
}
/// Restore raw bytes as `OsStr`
fn to_os_str(s: &[u8]) -> &OsStr {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
unsafe { std::mem::transmute(s) }
}
pub struct Split<'s, 'n> {
haystack: Option<&'s OsStr>,
needle: &'n str,
}
impl<'s, 'n> Iterator for Split<'s, 'n> {
type Item = &'s OsStr;
fn next(&mut self) -> Option<Self::Item> {
let haystack = self.haystack?;
match haystack.split_once(self.needle) {
Some((first, second)) => {
if !haystack.is_empty() {
debug_assert_ne!(haystack, second);
}
self.haystack = Some(second);
Some(first)
}
None => {
self.haystack = None;
Some(haystack)
}
}
}
}

View file

@ -8,6 +8,7 @@
//!
//! ```rust
//! use std::path::PathBuf;
//! use std::ffi::OsStr;
//!
//! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
//!
@ -26,7 +27,7 @@
//! }
//!
//! impl Color {
//! fn parse(s: Option<&clap_lex::RawOsStr>) -> Result<Self, BoxedError> {
//! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> {
//! let s = s.map(|s| s.to_str().ok_or(s));
//! match s {
//! Some(Ok("always")) | Some(Ok("")) | None => {
@ -64,13 +65,13 @@
//! args.paths.push(PathBuf::from("-"));
//! } else if let Some((long, value)) = arg.to_long() {
//! match long {
//! Ok("verbose") => {
//! "verbose" => {
//! if let Some(value) = value {
//! return Err(format!("`--verbose` does not take a value, got `{:?}`", value).into());
//! }
//! args.verbosity += 1;
//! }
//! Ok("color") => {
//! "color" => {
//! args.color = Color::parse(value)?;
//! }
//! _ => {
@ -93,12 +94,12 @@
//! return Err(format!("Unexpected flag: -{}", c).into());
//! }
//! Err(e) => {
//! return Err(format!("Unexpected flag: -{}", e.to_str_lossy()).into());
//! return Err(format!("Unexpected flag: -{}", e.to_string_lossy()).into());
//! }
//! }
//! }
//! } else {
//! args.paths.push(PathBuf::from(arg.to_value_os().to_os_str().into_owned()));
//! args.paths.push(PathBuf::from(arg.to_value_os().to_owned()));
//! }
//! }
//!
@ -109,13 +110,14 @@
//! println!("{:?}", args);
//! ```
mod ext;
use std::ffi::OsStr;
use std::ffi::OsString;
pub use std::io::SeekFrom;
pub use os_str_bytes::RawOsStr;
pub use os_str_bytes::RawOsString;
pub use ext::OsStrExt;
/// Command-line arguments
#[derive(Default, Clone, Debug, PartialEq, Eq)]
@ -275,30 +277,27 @@ impl ArgCursor {
/// Command-line Argument
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ParsedArg<'s> {
inner: std::borrow::Cow<'s, RawOsStr>,
utf8: Option<&'s str>,
inner: &'s OsStr,
}
impl<'s> ParsedArg<'s> {
fn new(inner: &'s OsStr) -> Self {
let utf8 = inner.to_str();
let inner = RawOsStr::new(inner);
Self { inner, utf8 }
Self { inner }
}
/// Argument is length of 0
pub fn is_empty(&self) -> bool {
self.inner.as_ref().is_empty()
self.inner.is_empty()
}
/// Does the argument look like a stdio argument (`-`)
pub fn is_stdio(&self) -> bool {
self.inner.as_ref() == "-"
self.inner == "-"
}
/// Does the argument look like an argument escape (`--`)
pub fn is_escape(&self) -> bool {
self.inner.as_ref() == "--"
self.inner == "--"
}
/// Does the argument look like a number
@ -309,56 +308,38 @@ impl<'s> ParsedArg<'s> {
}
/// Treat as a long-flag
pub fn to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)> {
if let Some(raw) = self.utf8 {
let remainder = raw.strip_prefix("--")?;
if remainder.is_empty() {
debug_assert!(self.is_escape());
return None;
}
let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
(p0, Some(p1))
} else {
(remainder, None)
};
let flag = Ok(flag);
let value = value.map(RawOsStr::from_str);
Some((flag, value))
} else {
let raw = self.inner.as_ref();
let remainder = raw.strip_prefix("--")?;
if remainder.is_empty() {
debug_assert!(self.is_escape());
return None;
}
let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
(p0, Some(p1))
} else {
(remainder, None)
};
let flag = flag.to_str().ok_or(flag);
Some((flag, value))
pub fn to_long(&self) -> Option<(&str, Option<&OsStr>)> {
let raw = self.inner;
let remainder = raw.strip_prefix("--")?;
if remainder.is_empty() {
debug_assert!(self.is_escape());
return None;
}
let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=") {
(p0, Some(p1))
} else {
(remainder, None)
};
let flag = flag.to_str()?;
Some((flag, value))
}
/// Can treat as a long-flag
pub fn is_long(&self) -> bool {
self.inner.as_ref().starts_with("--") && !self.is_escape()
self.inner.starts_with("--") && !self.is_escape()
}
/// Treat as a short-flag
pub fn to_short(&self) -> Option<ShortFlags<'_>> {
if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
if remainder_os.starts_with('-') {
if let Some(remainder_os) = self.inner.strip_prefix("-") {
if remainder_os.starts_with("-") {
None
} else if remainder_os.is_empty() {
debug_assert!(self.is_stdio());
None
} else {
let remainder = self.utf8.map(|s| &s[1..]);
Some(ShortFlags::new(remainder_os, remainder))
Some(ShortFlags::new(remainder_os))
}
} else {
None
@ -367,48 +348,42 @@ impl<'s> ParsedArg<'s> {
/// Can treat as a short-flag
pub fn is_short(&self) -> bool {
self.inner.as_ref().starts_with('-')
&& !self.is_stdio()
&& !self.inner.as_ref().starts_with("--")
self.inner.starts_with("-") && !self.is_stdio() && !self.inner.starts_with("--")
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
pub fn to_value_os(&self) -> &RawOsStr {
self.inner.as_ref()
pub fn to_value_os(&self) -> &OsStr {
self.inner
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
pub fn to_value(&self) -> Result<&str, &RawOsStr> {
self.utf8.ok_or_else(|| self.inner.as_ref())
pub fn to_value(&self) -> Result<&str, &OsStr> {
self.inner.to_str().ok_or(self.inner)
}
/// Safely print an argument that may contain non-UTF8 content
///
/// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
pub fn display(&self) -> impl std::fmt::Display + '_ {
self.inner.to_str_lossy()
self.inner.to_string_lossy()
}
}
/// Walk through short flags within a [`ParsedArg`]
#[derive(Clone, Debug)]
pub struct ShortFlags<'s> {
inner: &'s RawOsStr,
inner: &'s OsStr,
utf8_prefix: std::str::CharIndices<'s>,
invalid_suffix: Option<&'s RawOsStr>,
invalid_suffix: Option<&'s OsStr>,
}
impl<'s> ShortFlags<'s> {
fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
(utf8, None)
} else {
split_nonutf8_once(inner)
};
fn new(inner: &'s OsStr) -> Self {
let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner);
let utf8_prefix = utf8_prefix.char_indices();
Self {
inner,
@ -440,7 +415,7 @@ impl<'s> ShortFlags<'s> {
/// Advance the iterator, returning the next short flag on success
///
/// On error, returns the invalid-UTF8 value
pub fn next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>> {
pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> {
if let Some((_, flag)) = self.utf8_prefix.next() {
return Some(Ok(flag));
}
@ -454,11 +429,11 @@ impl<'s> ShortFlags<'s> {
}
/// Advance the iterator, returning everything left as a value
pub fn next_value_os(&mut self) -> Option<&'s RawOsStr> {
pub fn next_value_os(&mut self) -> Option<&'s OsStr> {
if let Some((index, _)) = self.utf8_prefix.next() {
self.utf8_prefix = "".char_indices();
self.invalid_suffix = None;
return Some(&self.inner[index..]);
return Some(self.inner.split_at(index).1);
}
if let Some(suffix) = self.invalid_suffix {
@ -471,19 +446,19 @@ impl<'s> ShortFlags<'s> {
}
impl<'s> Iterator for ShortFlags<'s> {
type Item = Result<char, &'s RawOsStr>;
type Item = Result<char, &'s OsStr>;
fn next(&mut self) -> Option<Self::Item> {
self.next_flag()
}
}
fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
match std::str::from_utf8(b.as_raw_bytes()) {
fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
match b.try_str() {
Ok(s) => (s, None),
Err(err) => {
let (valid, after_valid) = b.split_at(err.valid_up_to());
let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
let valid = valid.try_str().unwrap();
(valid, Some(after_valid))
}
}

View file

@ -1,3 +1,5 @@
use std::ffi::OsStr;
// Despite our design philosophy being to support completion generation, we aren't considering `-`
// the start of a long because there is no valid value to return.
#[test]
@ -34,7 +36,7 @@ fn to_long_no_value() {
assert!(next.is_long());
let (key, value) = next.to_long().unwrap();
assert_eq!(key, Ok("long"));
assert_eq!(key, "long");
assert_eq!(value, None);
}
@ -48,8 +50,8 @@ fn to_long_with_empty_value() {
assert!(next.is_long());
let (key, value) = next.to_long().unwrap();
assert_eq!(key, Ok("long"));
assert_eq!(value, Some(clap_lex::RawOsStr::from_str("")));
assert_eq!(key, "long");
assert_eq!(value, Some(OsStr::new("")));
}
#[test]
@ -62,8 +64,8 @@ fn to_long_with_value() {
assert!(next.is_long());
let (key, value) = next.to_long().unwrap();
assert_eq!(key, Ok("long"));
assert_eq!(value, Some(clap_lex::RawOsStr::from_str("hello")));
assert_eq!(key, "long");
assert_eq!(value, Some(OsStr::new("hello")));
}
#[test]

View file

@ -37,7 +37,7 @@ fn next_value_os() {
let next = raw.next(&mut cursor).unwrap();
let mut shorts = next.to_short().unwrap();
let actual = shorts.next_value_os().unwrap().to_str_lossy();
let actual = shorts.next_value_os().unwrap().to_string_lossy();
assert_eq!(actual, "short");
}
@ -51,7 +51,7 @@ fn next_flag_with_value() {
let mut shorts = next.to_short().unwrap();
assert_eq!(shorts.next_flag().unwrap().unwrap(), 's');
let actual = shorts.next_value_os().unwrap().to_str_lossy();
let actual = shorts.next_value_os().unwrap().to_string_lossy();
assert_eq!(actual, "hort");
}