fix: Detect when AllowInvalidUtf8 is needed

Fixes #36
This commit is contained in:
Ed Page 2021-11-29 13:16:03 -06:00
parent c5f5c49766
commit b8c34c3b57
7 changed files with 147 additions and 11 deletions

View file

@ -132,6 +132,7 @@ impl ArgMatcher {
let ma = self.entry(id).or_insert(MatchedArg::new());
ma.set_ty(ValueType::CommandLine);
ma.set_ignore_case(arg.is_set(ArgSettings::IgnoreCase));
ma.invalid_utf8_allowed(arg.is_set(ArgSettings::AllowInvalidUtf8));
ma.occurs += 1;
}
@ -150,7 +151,7 @@ impl ArgMatcher {
}
}
pub(crate) fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
// We will manually inc occurrences later(for flexibility under
// specific circumstances, like only add one occurrence for flag
// when we met: `--flag=one,two`).
@ -159,17 +160,17 @@ impl ArgMatcher {
ma.push_val(val);
}
pub(crate) fn new_val_group(&mut self, arg: &Id) {
let ma = self.entry(arg).or_default();
ma.new_val_group();
}
pub(crate) fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
let ma = self.entry(arg).or_default();
ma.set_ty(ty);
ma.append_val(val);
}
pub(crate) fn new_val_group(&mut self, arg: &Id) {
let ma = self.entry(arg).or_default();
ma.new_val_group();
}
pub(crate) fn add_index_to(&mut self, arg: &Id, idx: usize, ty: ValueType) {
let ma = self.entry(arg).or_default();
ma.set_ty(ty);

View file

@ -161,6 +161,7 @@ impl ArgMatches {
/// [`occurrences_of`]: crate::ArgMatches::occurrences_of()
pub fn value_of<T: Key>(&self, id: T) -> Option<&str> {
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_str().expect(INVALID_UTF8))
}
@ -208,6 +209,7 @@ impl ArgMatches {
/// [`Arg::values_of_lossy`]: ArgMatches::values_of_lossy()
pub fn value_of_lossy<T: Key>(&self, id: T) -> Option<Cow<'_, str>> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_string_lossy())
}
@ -256,6 +258,7 @@ impl ArgMatches {
/// [`ArgMatches::values_of_os`]: ArgMatches::values_of_os()
pub fn value_of_os<T: Key>(&self, id: T) -> Option<&OsStr> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.as_os_str())
}
@ -292,6 +295,7 @@ impl ArgMatches {
/// [`Iterator`]: std::iter::Iterator
pub fn values_of<T: Key>(&self, id: T) -> Option<Values> {
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &str {
o.to_str().expect(INVALID_UTF8)
}
@ -305,6 +309,7 @@ impl ArgMatches {
#[cfg(feature = "unstable-grouped")]
pub fn grouped_values_of<T: Key>(&self, id: T) -> Option<GroupedValues> {
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = GroupedValues {
iter: arg
.vals()
@ -351,6 +356,7 @@ impl ArgMatches {
/// ```
pub fn values_of_lossy<T: Key>(&self, id: T) -> Option<Vec<String>> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg
.vals_flatten()
.map(|v| v.to_string_lossy().into_owned())
@ -402,6 +408,7 @@ impl ArgMatches {
/// [`String`]: std::string::String
pub fn values_of_os<T: Key>(&self, id: T) -> Option<OsValues> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &OsStr {
o
}
@ -1230,6 +1237,24 @@ impl<'a> Default for Indices<'a> {
}
}
#[track_caller]
#[inline]
fn assert_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(false)),
"Must use `_os` lookups with `Arg::allow_invalid_utf8`"
);
}
#[track_caller]
#[inline]
fn assert_no_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(true)),
"Must use `Arg::allow_invalid_utf8` with `_os` lookups"
);
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -25,6 +25,7 @@ pub(crate) struct MatchedArg {
indices: Vec<usize>,
vals: Vec<Vec<OsString>>,
ignore_case: bool,
invalid_utf8_allowed: Option<bool>,
}
impl Default for MatchedArg {
@ -41,6 +42,7 @@ impl MatchedArg {
indices: Vec::new(),
vals: Vec::new(),
ignore_case: false,
invalid_utf8_allowed: None,
}
}
@ -143,6 +145,14 @@ impl MatchedArg {
pub(crate) fn set_ignore_case(&mut self, yes: bool) {
self.ignore_case = yes;
}
pub(crate) fn invalid_utf8_allowed(&mut self, yes: bool) {
self.invalid_utf8_allowed = Some(yes);
}
pub(crate) fn is_invalid_utf8_allowed(&self) -> Option<bool> {
self.invalid_utf8_allowed
}
}
#[cfg(test)]

View file

@ -706,9 +706,9 @@ impl<'help, 'app> Parser<'help, 'app> {
let mut sc_m = ArgMatcher::new(self.app);
while let Some((v, _)) = it.next() {
if !self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands)
&& v.to_str().is_none()
{
let allow_invalid_utf8 =
self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands);
if !allow_invalid_utf8 && v.to_str().is_none() {
return Err(ClapError::invalid_utf8(
self.app,
Usage::new(self).create_usage_with_title(&[]),
@ -720,6 +720,9 @@ impl<'help, 'app> Parser<'help, 'app> {
ValueType::CommandLine,
false,
);
sc_m.get_mut(&Id::empty_hash())
.expect("just inserted")
.invalid_utf8_allowed(allow_invalid_utf8);
}
matcher.subcommand(SubCommand {

View file

@ -884,6 +884,7 @@ fn allow_ext_sc_when_sc_required() {
let res = App::new("clap-test")
.version("v1.4.8")
.setting(AppSettings::AllowExternalSubcommands)
.setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands)
.setting(AppSettings::SubcommandRequiredElseHelp)
.try_get_matches_from(vec!["clap-test", "external-cmd", "foo"]);
@ -903,6 +904,7 @@ fn external_subcommand_looks_like_built_in() {
let res = App::new("cargo")
.version("1.26.0")
.setting(AppSettings::AllowExternalSubcommands)
.setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands)
.subcommand(App::new("install"))
.try_get_matches_from(vec!["cargo", "install-update", "foo"]);
assert!(res.is_ok());

View file

@ -539,6 +539,7 @@ fn multiple_defaults() {
Arg::new("files")
.long("files")
.number_of_values(2)
.allow_invalid_utf8(true)
.default_values(&["old", "new"]),
)
.try_get_matches_from(vec![""]);
@ -555,6 +556,7 @@ fn multiple_defaults_override() {
Arg::new("files")
.long("files")
.number_of_values(2)
.allow_invalid_utf8(true)
.default_values(&["old", "new"]),
)
.try_get_matches_from(vec!["", "--files", "other", "mine"]);

View file

@ -1,6 +1,6 @@
#![cfg(not(windows))]
use clap::{App, AppSettings, Arg, ErrorKind};
use clap::{arg, App, AppSettings, Arg, ErrorKind};
use std::ffi::OsString;
use std::os::unix::ffi::OsStringExt;
@ -387,3 +387,96 @@ fn allow_invalid_utf8_subcommand_args_with_allow_external_subcommands() {
]
);
}
#[test]
fn allow_validated_utf8_value_of() {
let a = App::new("test").arg(arg!(--name <NAME>));
let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap();
let _ = m.value_of("name");
}
#[test]
#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups"]
fn panic_validated_utf8_value_of_os() {
let a = App::new("test").arg(arg!(--name <NAME>));
let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap();
let _ = m.value_of_os("name");
}
#[test]
fn ignore_validated_utf8_with_defaults() {
// For now, we don't check the correct call is used with defaults (due to pain of piping it
// through the code) but we need to make sure we don't erroneously panic.
let a = App::new("test").arg(arg!(--value <VALUE>).required(false).default_value("foo"));
let m = a.try_get_matches_from(["test"]).unwrap();
let _ = m.value_of("value");
let _ = m.value_of_os("value");
}
#[test]
fn allow_invalid_utf8_value_of_os() {
let a = App::new("test").arg(arg!(--name <NAME>).allow_invalid_utf8(true));
let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap();
let _ = m.value_of_os("name");
}
#[test]
#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8`"]
fn panic_invalid_utf8_value_of() {
let a = App::new("test").arg(arg!(--name <NAME>).allow_invalid_utf8(true));
let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap();
let _ = m.value_of("name");
}
#[test]
fn ignore_invalid_utf8_with_defaults() {
// For now, we don't check the correct call is used with defaults (due to pain of piping it
// through the code) but we need to make sure we don't erroneously panic.
let a = App::new("test").arg(
arg!(--value <VALUE>)
.required(false)
.default_value("foo")
.allow_invalid_utf8(true),
);
let m = a.try_get_matches_from(["test"]).unwrap();
let _ = m.value_of("value");
let _ = m.value_of_os("value");
}
#[test]
fn allow_validated_utf8_external_subcommand_values_of() {
let a = App::new("test").setting(AppSettings::AllowExternalSubcommands);
let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap();
let (_ext, args) = m.subcommand().unwrap();
let _ = args.values_of("");
}
#[test]
#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups"]
fn panic_validated_utf8_external_subcommand_values_of_os() {
let a = App::new("test").setting(AppSettings::AllowExternalSubcommands);
let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap();
let (_ext, args) = m.subcommand().unwrap();
let _ = args.values_of_os("");
}
#[test]
fn allow_invalid_utf8_external_subcommand_values_of_os() {
let a = App::new("test")
.setting(AppSettings::AllowExternalSubcommands)
.setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands);
let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap();
let (_ext, args) = m.subcommand().unwrap();
let _ = args.values_of_os("");
}
#[test]
#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8`"]
fn panic_invalid_utf8_external_subcommand_values_of() {
let a = App::new("test")
.setting(AppSettings::AllowExternalSubcommands)
.setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands);
let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap();
let (_ext, args) = m.subcommand().unwrap();
let _ = args.values_of("");
}