mirror of
https://github.com/clap-rs/clap
synced 2024-12-14 14:52:33 +00:00
fix(UsageParser): Handle non-ascii names / options.
This should also speeds up the parser (except maybe for short options). Multi-codepoint characters still can't be used as short options, but people shouldn't non-ASCII options nyway. Closes #664
This commit is contained in:
parent
5c7a2190cc
commit
1d6a7c6e7e
1 changed files with 67 additions and 49 deletions
|
@ -52,14 +52,12 @@ impl<'a> UsageParser<'a> {
|
|||
loop {
|
||||
debugln!("iter; pos={};", self.pos);
|
||||
self.stop_at(token);
|
||||
if self.pos < self.usage.len() {
|
||||
if let Some(c) = self.usage.chars().nth(self.pos) {
|
||||
match c {
|
||||
'-' => self.short_or_long(&mut arg),
|
||||
'.' => self.multiple(&mut arg),
|
||||
'\'' => self.help(&mut arg),
|
||||
_ => self.name(&mut arg),
|
||||
}
|
||||
if let Some(&c) = self.usage.as_bytes().get(self.pos) {
|
||||
match c {
|
||||
b'-' => self.short_or_long(&mut arg),
|
||||
b'.' => self.multiple(&mut arg),
|
||||
b'\'' => self.help(&mut arg),
|
||||
_ => self.name(&mut arg),
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
|
@ -82,7 +80,7 @@ impl<'a> UsageParser<'a> {
|
|||
|
||||
fn name(&mut self, arg: &mut Arg<'a, 'a>) {
|
||||
debugln!("fn=name;");
|
||||
if self.usage.chars().nth(self.pos).expect(INTERNAL_ERROR_MSG) == '<' &&
|
||||
if *self.usage.as_bytes().get(self.pos).expect(INTERNAL_ERROR_MSG) == b'<' &&
|
||||
!self.explicit_name_set {
|
||||
arg.setb(ArgSettings::Required);
|
||||
}
|
||||
|
@ -113,23 +111,17 @@ impl<'a> UsageParser<'a> {
|
|||
}
|
||||
|
||||
fn stop_at<F>(&mut self, f: F)
|
||||
where F: Fn(u32) -> bool
|
||||
where F: Fn(u8) -> bool
|
||||
{
|
||||
debugln!("fn=stop_at;");
|
||||
self.start = self.pos;
|
||||
for c in self.usage[self.start..].chars() {
|
||||
if f(c as u32) {
|
||||
self.pos += 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
self.pos += self.usage[self.start..].bytes().take_while(|&b| f(b) ).count();
|
||||
}
|
||||
|
||||
fn short_or_long(&mut self, arg: &mut Arg<'a, 'a>) {
|
||||
debugln!("fn=short_or_long;");
|
||||
self.pos += 1;
|
||||
if self.usage.chars().nth(self.pos).expect(INTERNAL_ERROR_MSG) == '-' {
|
||||
if *self.usage.as_bytes().get(self.pos).expect(INTERNAL_ERROR_MSG) == b'-' {
|
||||
self.pos += 1;
|
||||
self.long(arg);
|
||||
return;
|
||||
|
@ -141,7 +133,7 @@ impl<'a> UsageParser<'a> {
|
|||
debugln!("fn=long;");
|
||||
self.stop_at(long_end);
|
||||
let name = &self.usage[self.start..self.pos];
|
||||
if arg.name.is_empty() || (self.prev == UsageToken::Short && arg.name.len() == 1) {
|
||||
if !self.explicit_name_set {
|
||||
debugln!("setting name: {}", name);
|
||||
arg.name = name;
|
||||
}
|
||||
|
@ -152,41 +144,41 @@ impl<'a> UsageParser<'a> {
|
|||
|
||||
fn short(&mut self, arg: &mut Arg<'a, 'a>) {
|
||||
debugln!("fn=short;");
|
||||
let name = &self.usage[self.pos..self.pos + 1];
|
||||
debugln!("setting short: {}", name);
|
||||
arg.short = Some(name.chars().nth(0).expect(INTERNAL_ERROR_MSG));
|
||||
let start = &self.usage[self.pos..];
|
||||
let short = start.chars().nth(0).expect(INTERNAL_ERROR_MSG);
|
||||
debugln!("setting short: {}", short);
|
||||
arg.short = Some(short);
|
||||
if arg.name.is_empty() {
|
||||
// --long takes precedence but doesn't set self.explicit_name_set
|
||||
let name = &start[..short.len_utf8()];
|
||||
debugln!("setting name: {}", name);
|
||||
arg.name = name;
|
||||
}
|
||||
self.prev = UsageToken::Short;
|
||||
}
|
||||
|
||||
// "something..."
|
||||
fn multiple(&mut self, arg: &mut Arg) {
|
||||
debugln!("fn=multiple;");
|
||||
let mut dot_counter = 1;
|
||||
let start = self.pos;
|
||||
for c in self.usage[start..].chars() {
|
||||
match c {
|
||||
'.' => {
|
||||
dot_counter += 1;
|
||||
self.pos += 1;
|
||||
if dot_counter == 3 {
|
||||
debugln!("setting multiple");
|
||||
arg.setb(ArgSettings::Multiple);
|
||||
if arg.settings.is_set(ArgSettings::TakesValue) {
|
||||
arg.setb(ArgSettings::UseValueDelimiter);
|
||||
arg.unsetb(ArgSettings::ValueDelimiterNotSet);
|
||||
if arg.val_delim.is_none() {
|
||||
arg.val_delim = Some(',');
|
||||
}
|
||||
}
|
||||
self.prev = UsageToken::Multiple;
|
||||
self.pos += 1;
|
||||
break;
|
||||
let mut bytes = self.usage[start..].bytes();
|
||||
while bytes.next() == Some(b'.') {
|
||||
dot_counter += 1;
|
||||
self.pos += 1;
|
||||
if dot_counter == 3 {
|
||||
debugln!("setting multiple");
|
||||
arg.setb(ArgSettings::Multiple);
|
||||
if arg.settings.is_set(ArgSettings::TakesValue) {
|
||||
arg.setb(ArgSettings::UseValueDelimiter);
|
||||
arg.unsetb(ArgSettings::ValueDelimiterNotSet);
|
||||
if arg.val_delim.is_none() {
|
||||
arg.val_delim = Some(',');
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
self.prev = UsageToken::Multiple;
|
||||
self.pos += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -204,26 +196,26 @@ impl<'a> UsageParser<'a> {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
fn name_end(b: u32) -> bool {
|
||||
fn name_end(b: u8) -> bool {
|
||||
// 93(]), 62(>)
|
||||
b > b']' as u32 || b < b'>' as u32 || (b > b'>' as u32 && b < b']' as u32)
|
||||
b > b']' || b < b'>' || (b > b'>' && b < b']')
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn token(b: u32) -> bool {
|
||||
fn token(b: u8) -> bool {
|
||||
// 39('), 45(-), 46(.), 60(<), 91([)
|
||||
b < 39 || b > 91 || (b > 46 && b < 91 && b != b'<' as u32) || (b > 39 && b < 45)
|
||||
b < 39 || b > 91 || (b > 46 && b < 91 && b != b'<') || (b > 39 && b < 45)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn long_end(b: u32) -> bool {
|
||||
// 39('), 46(.), 60(<), 61(=), 91([)
|
||||
(b < 39 && (b > 13 && b != b' ' as u32)) || b > 91 || (b > 61 && b < 91) ||
|
||||
fn long_end(b: u8) -> bool {
|
||||
// 39('), 46(.), 60(<), 61(=), 91([), 32( )
|
||||
(b < 39 && (b > 13 && b != b' ')) || b > 91 || (b > 61 && b < 91) ||
|
||||
(b > 39 && b < 60 && b != 46)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn help_start(b: u32) -> bool {
|
||||
fn help_start(b: u8) -> bool {
|
||||
// 39(')
|
||||
b < 39 || b > 39
|
||||
}
|
||||
|
@ -1220,4 +1212,30 @@ mod test {
|
|||
assert!(c.val_names.is_none());
|
||||
assert!(c.num_vals.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonascii() {
|
||||
let a = Arg::from_usage("<ASCII> 'üñíčöĐ€'");
|
||||
assert_eq!(a.name, "ASCII");
|
||||
assert_eq!(a.help, Some("üñíčöĐ€"));
|
||||
let a = Arg::from_usage("<üñíčöĐ€> 'ASCII'");
|
||||
assert_eq!(a.name, "üñíčöĐ€");
|
||||
assert_eq!(a.help, Some("ASCII"));
|
||||
let a = Arg::from_usage("<üñíčöĐ€> 'üñíčöĐ€'");
|
||||
assert_eq!(a.name, "üñíčöĐ€");
|
||||
assert_eq!(a.help, Some("üñíčöĐ€"));
|
||||
let a = Arg::from_usage("-ø 'ø'");
|
||||
assert_eq!(a.name, "ø");
|
||||
assert_eq!(a.short, Some('ø'));
|
||||
assert_eq!(a.help, Some("ø"));
|
||||
let a = Arg::from_usage("--üñíčöĐ€ 'Nōṫ ASCII'");
|
||||
assert_eq!(a.name, "üñíčöĐ€");
|
||||
assert_eq!(a.long, Some("üñíčöĐ€"));
|
||||
assert_eq!(a.help, Some("Nōṫ ASCII"));
|
||||
let a = Arg::from_usage("[ñämê] --ôpt=[üñíčöĐ€] 'hælp'");
|
||||
assert_eq!(a.name, "ñämê");
|
||||
assert_eq!(a.long, Some("ôpt"));
|
||||
assert_eq!(a.val_names.unwrap().values().collect::<Vec<_>>(), [&"üñíčöĐ€"]);
|
||||
assert_eq!(a.help, Some("hælp"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue