mirror of
https://github.com/clap-rs/clap
synced 2024-12-13 22:32:33 +00:00
refactor(lex): Expand lexer design
In considering the design for this, we want: - Ability to modify the argment list while maintaining the `Cursor` for replacements - Allow picking up subcommand parsing in the middle of short flags - Ability to peek at the next item to determine if we want to treat it as a flag or as a value - Ability to detect started short and long arguments for completions Longer term, we also want to consider: - Allowing users to customize the lexer to support different syntaxes
This commit is contained in:
parent
c58928b6bd
commit
6e05b8075b
3 changed files with 195 additions and 6 deletions
|
@ -665,7 +665,7 @@ impl<'help> App<'help> {
|
||||||
// to display
|
// to display
|
||||||
// the full path when displaying help messages and such
|
// the full path when displaying help messages and such
|
||||||
if !self.settings.is_set(AppSettings::NoBinaryName) {
|
if !self.settings.is_set(AppSettings::NoBinaryName) {
|
||||||
if let Some(name) = raw_args.next(&mut cursor) {
|
if let Some(name) = raw_args.next_os(&mut cursor) {
|
||||||
let p = Path::new(name);
|
let p = Path::new(name);
|
||||||
|
|
||||||
if let Some(f) = p.file_name() {
|
if let Some(f) = p.file_name() {
|
||||||
|
|
|
@ -3,6 +3,8 @@ use std::ffi::OsString;
|
||||||
|
|
||||||
pub use std::io::SeekFrom;
|
pub use std::io::SeekFrom;
|
||||||
|
|
||||||
|
use os_str_bytes::RawOsStr;
|
||||||
|
|
||||||
#[derive(Default, Clone, Debug, PartialEq, Eq)]
|
#[derive(Default, Clone, Debug, PartialEq, Eq)]
|
||||||
pub(crate) struct RawArgs {
|
pub(crate) struct RawArgs {
|
||||||
items: Vec<OsString>,
|
items: Vec<OsString>,
|
||||||
|
@ -13,13 +15,21 @@ impl RawArgs {
|
||||||
ArgCursor::new()
|
ArgCursor::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
|
pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
|
||||||
|
self.next_os(cursor).map(ParsedArg::new)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
|
||||||
let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
|
let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
|
||||||
cursor.cursor = cursor.cursor.saturating_add(1);
|
cursor.cursor = cursor.cursor.saturating_add(1);
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn peek(&self, cursor: &ArgCursor) -> Option<&OsStr> {
|
pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
|
||||||
|
self.peek_os(cursor).map(ParsedArg::new)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
|
||||||
self.items.get(cursor.cursor).map(|s| s.as_os_str())
|
self.items.get(cursor.cursor).map(|s| s.as_os_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +70,7 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Default, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub(crate) struct ArgCursor {
|
pub(crate) struct ArgCursor {
|
||||||
cursor: usize,
|
cursor: usize,
|
||||||
}
|
}
|
||||||
|
@ -70,3 +80,182 @@ impl ArgCursor {
|
||||||
Default::default()
|
Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub(crate) struct ParsedArg<'s> {
|
||||||
|
inner: std::borrow::Cow<'s, RawOsStr>,
|
||||||
|
utf8: Option<&'s str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> ParsedArg<'s> {
|
||||||
|
fn new(inner: &'s OsStr) -> Self {
|
||||||
|
let utf8 = inner.to_str();
|
||||||
|
let inner = RawOsStr::new(inner);
|
||||||
|
Self { inner, utf8 }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_stdio(&self) -> bool {
|
||||||
|
self.inner.as_ref() == "-"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_escape(&self) -> bool {
|
||||||
|
self.inner.as_ref() == "--"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_number(&self) -> bool {
|
||||||
|
self.to_value()
|
||||||
|
.map(|s| s.parse::<f64>().is_ok())
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Treat as a long-flag
|
||||||
|
///
|
||||||
|
/// **NOTE:** May return an empty flag. Check [`ParsedArg::is_escape`] to separately detect `--`.
|
||||||
|
pub fn to_long(&self) -> Option<(&RawOsStr, Option<&RawOsStr>)> {
|
||||||
|
let remainder = self.inner.as_ref().strip_prefix("--")?;
|
||||||
|
let parts = if let Some((p0, p1)) = remainder.split_once("=") {
|
||||||
|
(p0, Some(p1))
|
||||||
|
} else {
|
||||||
|
(remainder, None)
|
||||||
|
};
|
||||||
|
Some(parts)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Can treat as a long-flag
|
||||||
|
///
|
||||||
|
/// **NOTE:** May return an empty flag. Check [`ParsedArg::is_escape`] to separately detect `--`.
|
||||||
|
pub fn is_long(&self) -> bool {
|
||||||
|
self.inner.as_ref().starts_with("--")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Treat as a short-flag
|
||||||
|
///
|
||||||
|
/// **NOTE:** Maybe return an empty flag. Check [`ParsedArg::is_stdio`] to separately detect
|
||||||
|
/// `-`.
|
||||||
|
pub fn to_short(&self) -> Option<ShortFlags<'_>> {
|
||||||
|
if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
|
||||||
|
if remainder_os.starts_with('-') {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let remainder = self.utf8.map(|s| &s[1..]);
|
||||||
|
Some(ShortFlags::new(remainder_os, remainder))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Can treat as a short-flag
|
||||||
|
///
|
||||||
|
/// **NOTE:** Maybe return an empty flag. Check [`ParsedArg::is_stdio`] to separately detect
|
||||||
|
/// `-`.
|
||||||
|
pub fn is_short(&self) -> bool {
|
||||||
|
self.inner.as_ref().starts_with('-') && !self.is_long()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Treat as a value
|
||||||
|
///
|
||||||
|
/// **NOTE:** May return a flag or an escape.
|
||||||
|
pub fn to_value_os(&self) -> &RawOsStr {
|
||||||
|
self.inner.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Treat as a value
|
||||||
|
///
|
||||||
|
/// **NOTE:** May return a flag or an escape.
|
||||||
|
pub fn to_value(&self) -> Option<&str> {
|
||||||
|
self.utf8
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Safely print an argument that may contain non-UTF8 content
|
||||||
|
///
|
||||||
|
/// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
|
||||||
|
pub fn display(&self) -> impl std::fmt::Display + '_ {
|
||||||
|
self.inner.to_str_lossy()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct ShortFlags<'s> {
|
||||||
|
inner: &'s RawOsStr,
|
||||||
|
utf8_prefix: std::str::CharIndices<'s>,
|
||||||
|
invalid_suffix: Option<&'s RawOsStr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> ShortFlags<'s> {
|
||||||
|
fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
|
||||||
|
let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
|
||||||
|
(utf8, None)
|
||||||
|
} else {
|
||||||
|
split_nonutf8_once(inner)
|
||||||
|
};
|
||||||
|
let utf8_prefix = utf8_prefix.char_indices();
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
utf8_prefix,
|
||||||
|
invalid_suffix,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
|
||||||
|
for i in 0..n {
|
||||||
|
self.next().ok_or(i)?.map_err(|_| i)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_number(&self) -> bool {
|
||||||
|
self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(&mut self) -> Option<Result<char, &'s RawOsStr>> {
|
||||||
|
if let Some((_, flag)) = self.utf8_prefix.next() {
|
||||||
|
return Some(Ok(flag));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(suffix) = self.invalid_suffix {
|
||||||
|
self.invalid_suffix = None;
|
||||||
|
return Some(Err(suffix));
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn value_os(&mut self) -> Option<&'s RawOsStr> {
|
||||||
|
if let Some((index, _)) = self.utf8_prefix.next() {
|
||||||
|
self.utf8_prefix = "".char_indices();
|
||||||
|
self.invalid_suffix = None;
|
||||||
|
return Some(&self.inner[index..]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(suffix) = self.invalid_suffix {
|
||||||
|
self.invalid_suffix = None;
|
||||||
|
return Some(suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for ShortFlags<'s> {
|
||||||
|
type Item = Result<char, &'s RawOsStr>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
|
||||||
|
match std::str::from_utf8(b.as_raw_bytes()) {
|
||||||
|
Ok(s) => (s, None),
|
||||||
|
Err(err) => {
|
||||||
|
let (valid, after_valid) = b.split_at(err.valid_up_to());
|
||||||
|
let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
|
||||||
|
(valid, Some(after_valid))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -90,7 +90,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> {
|
||||||
// If any arg sets .last(true)
|
// If any arg sets .last(true)
|
||||||
let contains_last = self.cmd.get_arguments().any(|x| x.is_last_set());
|
let contains_last = self.cmd.get_arguments().any(|x| x.is_last_set());
|
||||||
|
|
||||||
while let Some(arg_os) = raw_args.next(&mut args_cursor) {
|
while let Some(arg_os) = raw_args.next_os(&mut args_cursor) {
|
||||||
// Recover the replaced items if any.
|
// Recover the replaced items if any.
|
||||||
if let Some(replaced_items) = arg_os.to_str().and_then(|a| self.cmd.get_replacement(a))
|
if let Some(replaced_items) = arg_os.to_str().and_then(|a| self.cmd.get_replacement(a))
|
||||||
{
|
{
|
||||||
|
@ -140,7 +140,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> {
|
||||||
);
|
);
|
||||||
|
|
||||||
if low_index_mults || missing_pos {
|
if low_index_mults || missing_pos {
|
||||||
let skip_current = if let Some(n) = raw_args.peek(&args_cursor) {
|
let skip_current = if let Some(n) = raw_args.peek_os(&args_cursor) {
|
||||||
if let Some(p) = self
|
if let Some(p) = self
|
||||||
.cmd
|
.cmd
|
||||||
.get_positionals()
|
.get_positionals()
|
||||||
|
|
Loading…
Reference in a new issue