refactor(lex): Expand lexer design

In considering the design for this, we want:
- Ability to modify the argment list while maintaining the `Cursor` for
  replacements
- Allow picking up subcommand parsing in the middle of short flags
- Ability to peek at the next item to determine if we want to treat it
  as a flag or as a value
- Ability to detect started short and long arguments for completions

Longer term, we also want to consider:
- Allowing users to customize the lexer to support different syntaxes
This commit is contained in:
Ed Page 2022-04-12 13:52:21 -05:00
parent c58928b6bd
commit 6e05b8075b
3 changed files with 195 additions and 6 deletions

View file

@ -665,7 +665,7 @@ impl<'help> App<'help> {
// to display
// the full path when displaying help messages and such
if !self.settings.is_set(AppSettings::NoBinaryName) {
if let Some(name) = raw_args.next(&mut cursor) {
if let Some(name) = raw_args.next_os(&mut cursor) {
let p = Path::new(name);
if let Some(f) = p.file_name() {

View file

@ -3,6 +3,8 @@ use std::ffi::OsString;
pub use std::io::SeekFrom;
use os_str_bytes::RawOsStr;
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub(crate) struct RawArgs {
items: Vec<OsString>,
@ -13,13 +15,21 @@ impl RawArgs {
ArgCursor::new()
}
pub fn next(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
self.next_os(cursor).map(ParsedArg::new)
}
pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
cursor.cursor = cursor.cursor.saturating_add(1);
next
}
pub fn peek(&self, cursor: &ArgCursor) -> Option<&OsStr> {
pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
self.peek_os(cursor).map(ParsedArg::new)
}
pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
self.items.get(cursor.cursor).map(|s| s.as_os_str())
}
@ -60,7 +70,7 @@ where
}
}
#[derive(Default, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Default, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct ArgCursor {
cursor: usize,
}
@ -70,3 +80,182 @@ impl ArgCursor {
Default::default()
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct ParsedArg<'s> {
inner: std::borrow::Cow<'s, RawOsStr>,
utf8: Option<&'s str>,
}
impl<'s> ParsedArg<'s> {
fn new(inner: &'s OsStr) -> Self {
let utf8 = inner.to_str();
let inner = RawOsStr::new(inner);
Self { inner, utf8 }
}
pub fn is_stdio(&self) -> bool {
self.inner.as_ref() == "-"
}
pub fn is_escape(&self) -> bool {
self.inner.as_ref() == "--"
}
pub fn is_number(&self) -> bool {
self.to_value()
.map(|s| s.parse::<f64>().is_ok())
.unwrap_or_default()
}
/// Treat as a long-flag
///
/// **NOTE:** May return an empty flag. Check [`ParsedArg::is_escape`] to separately detect `--`.
pub fn to_long(&self) -> Option<(&RawOsStr, Option<&RawOsStr>)> {
let remainder = self.inner.as_ref().strip_prefix("--")?;
let parts = if let Some((p0, p1)) = remainder.split_once("=") {
(p0, Some(p1))
} else {
(remainder, None)
};
Some(parts)
}
/// Can treat as a long-flag
///
/// **NOTE:** May return an empty flag. Check [`ParsedArg::is_escape`] to separately detect `--`.
pub fn is_long(&self) -> bool {
self.inner.as_ref().starts_with("--")
}
/// Treat as a short-flag
///
/// **NOTE:** Maybe return an empty flag. Check [`ParsedArg::is_stdio`] to separately detect
/// `-`.
pub fn to_short(&self) -> Option<ShortFlags<'_>> {
if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
if remainder_os.starts_with('-') {
None
} else {
let remainder = self.utf8.map(|s| &s[1..]);
Some(ShortFlags::new(remainder_os, remainder))
}
} else {
None
}
}
/// Can treat as a short-flag
///
/// **NOTE:** Maybe return an empty flag. Check [`ParsedArg::is_stdio`] to separately detect
/// `-`.
pub fn is_short(&self) -> bool {
self.inner.as_ref().starts_with('-') && !self.is_long()
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
pub fn to_value_os(&self) -> &RawOsStr {
self.inner.as_ref()
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
pub fn to_value(&self) -> Option<&str> {
self.utf8
}
/// Safely print an argument that may contain non-UTF8 content
///
/// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
pub fn display(&self) -> impl std::fmt::Display + '_ {
self.inner.to_str_lossy()
}
}
#[derive(Clone, Debug)]
pub(crate) struct ShortFlags<'s> {
inner: &'s RawOsStr,
utf8_prefix: std::str::CharIndices<'s>,
invalid_suffix: Option<&'s RawOsStr>,
}
impl<'s> ShortFlags<'s> {
fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
(utf8, None)
} else {
split_nonutf8_once(inner)
};
let utf8_prefix = utf8_prefix.char_indices();
Self {
inner,
utf8_prefix,
invalid_suffix,
}
}
pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
for i in 0..n {
self.next().ok_or(i)?.map_err(|_| i)?;
}
Ok(())
}
pub fn is_empty(&self) -> bool {
self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
}
pub fn is_number(&self) -> bool {
self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok()
}
pub fn next(&mut self) -> Option<Result<char, &'s RawOsStr>> {
if let Some((_, flag)) = self.utf8_prefix.next() {
return Some(Ok(flag));
}
if let Some(suffix) = self.invalid_suffix {
self.invalid_suffix = None;
return Some(Err(suffix));
}
None
}
pub fn value_os(&mut self) -> Option<&'s RawOsStr> {
if let Some((index, _)) = self.utf8_prefix.next() {
self.utf8_prefix = "".char_indices();
self.invalid_suffix = None;
return Some(&self.inner[index..]);
}
if let Some(suffix) = self.invalid_suffix {
self.invalid_suffix = None;
return Some(suffix);
}
None
}
}
impl<'s> Iterator for ShortFlags<'s> {
type Item = Result<char, &'s RawOsStr>;
fn next(&mut self) -> Option<Self::Item> {
self.next()
}
}
fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
match std::str::from_utf8(b.as_raw_bytes()) {
Ok(s) => (s, None),
Err(err) => {
let (valid, after_valid) = b.split_at(err.valid_up_to());
let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
(valid, Some(after_valid))
}
}
}

View file

@ -90,7 +90,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> {
// If any arg sets .last(true)
let contains_last = self.cmd.get_arguments().any(|x| x.is_last_set());
while let Some(arg_os) = raw_args.next(&mut args_cursor) {
while let Some(arg_os) = raw_args.next_os(&mut args_cursor) {
// Recover the replaced items if any.
if let Some(replaced_items) = arg_os.to_str().and_then(|a| self.cmd.get_replacement(a))
{
@ -140,7 +140,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> {
);
if low_index_mults || missing_pos {
let skip_current = if let Some(n) = raw_args.peek(&args_cursor) {
let skip_current = if let Some(n) = raw_args.peek_os(&args_cursor) {
if let Some(p) = self
.cmd
.get_positionals()