feat(UTF-8): adds support for invalid utf8 in values

Closes #269
This commit is contained in:
Kevin K 2016-01-22 12:58:56 -05:00
parent c3e96232c9
commit e874a0d5e0
11 changed files with 434 additions and 170 deletions

View file

@ -1,5 +1,6 @@
macro_rules! remove_overriden {
($me:ident, $name:expr) => ({
debugln!("macro=remove_overriden!;");
if let Some(ref o) = $me.opts.iter().filter(|o| o.name == *$name).next() {
if let Some(ref ora) = o.requires {
for a in ora {
@ -55,6 +56,7 @@ macro_rules! remove_overriden {
macro_rules! arg_post_processing(
($me:ident, $arg:ident, $matcher:ident) => ({
use args::AnyArg;
debugln!("macro=arg_post_processing!;");
// Handle POSIX overrides
debug!("Is '{}' in overrides...", $arg.to_string());
if $me.overrides.contains(&$arg.name()) {
@ -78,10 +80,10 @@ macro_rules! arg_post_processing(
} else { sdebugln!("No"); }
// Handle conflicts
debugln!("Does '{}' have conflicts...", $arg.to_string());
debug!("Does '{}' have conflicts...", $arg.to_string());
if let Some(bl) = $arg.blacklist() {
for name in bl {
sdebugln!("\tYes '{}'", name);
sdebugln!("\n\tYes '{}'", name);
$me.blacklist.push(name);
vec_remove!($me.overrides, name);
vec_remove!($me.required, name);
@ -109,6 +111,7 @@ macro_rules! arg_post_processing(
macro_rules! _handle_group_reqs{
($me:ident, $arg:ident) => ({
use args::AnyArg;
debugln!("macro=_handle_group_reqs!;");
for grp in $me.groups.values() {
let mut found = false;
for name in grp.args.iter() {
@ -142,6 +145,7 @@ macro_rules! _handle_group_reqs{
macro_rules! validate_multiples {
($_self:ident, $a:ident, $m:ident) => {
debugln!("macro=validate_multiples!;");
if $m.contains(&$a.name) && !$a.settings.is_set(ArgSettings::Multiple) {
// Not the first time, and we don't allow multiples
return Err(Error::unexpected_multiple_usage($a, &*$_self.create_current_usage($m)))

View file

@ -3,6 +3,8 @@ use std::slice::Iter;
use std::io::{self, BufWriter, Write};
use std::ffi::{OsStr, OsString};
use std::fmt::Display;
#[cfg(feature = "debug")]
use std::os::unix::ffi::OsStrExt;
use vec_map::VecMap;
@ -67,6 +69,28 @@ impl<'a, 'b> Default for Parser<'a, 'b> {
}
}
macro_rules! parse_positional {
($_self:ident, $p:ident, $arg_os:ident, $pos_only:ident, $pos_counter:ident, $matcher:ident) => {
debugln!("macro=parse_positional!;");
validate_multiples!($_self, $p, $matcher);
if let Err(e) = $_self.add_val_to_arg($p, &$arg_os, $matcher) {
return Err(e);
}
if !$pos_only &&
($_self.settings.is_set(AppSettings::TrailingVarArg) &&
$pos_counter == $_self.positionals.len()) {
$pos_only = true;
}
arg_post_processing!($_self, $p, $matcher);
// Only increment the positional counter if it doesn't allow multiples
if !$p.settings.is_set(ArgSettings::Multiple) {
$pos_counter += 1;
}
};
}
impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
pub fn with_name(n: String) -> Self {
Parser { meta: AppMeta::with_name(n), ..Default::default() }
@ -423,6 +447,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
where I: Iterator<Item = T>,
T: Into<OsString>
{
debugln!("fn=get_matches_with;");
// First we create the `--help` and `--version` arguments and add them if
// necessary
self.create_help_and_version();
@ -433,11 +458,15 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
let mut pos_counter = 1;
while let Some(arg) = it.next() {
let arg_os = arg.into();
debugln!("Begin parsing '{:?}' ({:?})", arg_os, &*arg_os.as_bytes());
// Is this a new argument, or values from a previous option?
debug!("Starts new arg...");
let starts_new_arg = if arg_os.starts_with(b"-") {
sdebugln!("Yes");
!(arg_os.len() == 1)
} else {
sdebugln!("No");
false
};
@ -454,7 +483,6 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
}
}
}
let mut skip = false;
if arg_os.starts_with(b"--") {
if arg_os.len() == 2 {
// The user has passed '--' which means only positional args follow no matter
@ -464,48 +492,34 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
}
needs_val_of = try!(self.parse_long_arg(matcher, &arg_os));
continue;
} else if arg_os.starts_with(b"-") && arg_os.len() != 1 {
needs_val_of = try!(self.parse_short_arg(matcher, &arg_os));
} else {
skip = true;
continue;
}
if !skip { continue; }
let arg_str = arg_os.to_str().expect(INVALID_UTF8);
// let arg_str = arg_os.to_str().expect(INVALID_UTF8);
if self.subcommands.iter().any(|s| &s.0.meta.name[..] == &*arg_os) {
if &*arg_os == "help" &&
self.settings.is_set(AppSettings::NeedsSubcommandHelp) {
return self._help();
}
subcmd_name = Some(arg_str.to_owned());
// subcommands only support valid UTF-8
subcmd_name = Some(arg_os.to_str().expect(INVALID_UTF8).to_owned());
break;
} else if let Some(candidate) = suggestions::did_you_mean(
arg_str,
&*arg_os.to_string_lossy(),
self.subcommands.iter().map(|s| &s.0.meta.name)) {
return Err(
Error::invalid_subcommand(arg_str,
Error::invalid_subcommand(arg_os.to_string_lossy().into_owned(),
candidate,
self.meta.bin_name.as_ref().unwrap_or(&self.meta.name),
&*self.create_current_usage(matcher)));
}
}
if let Some(p) = self.positionals.get(&pos_counter) {
validate_multiples!(self, p, matcher);
try!(self.add_val_to_arg(p, &arg_os, matcher));
if !pos_only &&
(self.settings.is_set(AppSettings::TrailingVarArg) &&
pos_counter == self.positionals.len()) {
pos_only = true;
}
arg_post_processing!(self, p, matcher);
// Only increment the positional counter if it doesn't allow multiples
if !p.settings.is_set(ArgSettings::Multiple) {
pos_counter += 1;
}
parse_positional!(self, p, arg_os, pos_only, pos_counter, matcher);
} else {
if self.settings.is_set(AppSettings::AllowExternalSubcommands) {
// let arg_str = arg_os.to_str().expect(INVALID_UTF8);
@ -567,51 +581,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
try!(self.validate_required(matcher));
}
if let Some(sc_name) = subcmd_name {
use std::fmt::Write;
let mut mid_string = String::new();
if !self.settings.is_set(AppSettings::SubcommandsNegateReqs) {
let mut hs: Vec<&str> = self.required.iter().map(|n| &**n).collect();
for k in matcher.arg_names() {
hs.push(k);
}
let reqs = self.get_required_from(&hs, Some(matcher));
for s in reqs.iter() {
write!(&mut mid_string, " {}", s).expect(INTERNAL_ERROR_MSG);
}
}
mid_string.push_str(" ");
if let Some(ref mut sc) = self.subcommands
.iter_mut()
.filter(|s| &s.0.meta.name[..] == &sc_name)
.next() {
let mut sc_matcher = ArgMatcher::new();
// bin_name should be parent's bin_name + [<reqs>] + the sc's name separated by
// a space
sc.0.meta.usage = Some(format!("{}{}{}",
self.meta.bin_name.as_ref().unwrap_or(&String::new()),
if self.meta.bin_name.is_some() {
&*mid_string
} else {
""
},
&*sc.0.meta.name));
sc.0.meta.bin_name = Some(format!("{}{}{}",
self.meta.bin_name.as_ref().unwrap_or(&String::new()),
if self.meta.bin_name.is_some() {
" "
} else {
""
},
&*sc.0.meta.name));
if let Err(e) = sc.0.get_matches_with(&mut sc_matcher, it) {
e.exit();
}
matcher.subcommand(SubCommand {
name: sc.0.meta.name.clone(),
matches: sc_matcher.into(),
});
}
try!(self.parse_subcommand(sc_name, matcher, it));
} else if self.is_set(AppSettings::SubcommandRequired) {
let bn = self.meta.bin_name.as_ref().unwrap_or(&self.meta.name);
return Err(Error::missing_subcommand(bn, &self.create_current_usage(matcher)));
@ -636,6 +606,60 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
}
Ok(())
}
fn parse_subcommand<I, T>(&mut self, sc_name: String, matcher: &mut ArgMatcher<'a>, it: &mut I) -> ClapResult<()>
where I: Iterator<Item = T>,
T: Into<OsString>
{
use std::fmt::Write;
debugln!("fn=parse_subcommand;");
let mut mid_string = String::new();
if !self.settings.is_set(AppSettings::SubcommandsNegateReqs) {
let mut hs: Vec<&str> = self.required.iter().map(|n| &**n).collect();
for k in matcher.arg_names() {
hs.push(k);
}
let reqs = self.get_required_from(&hs, Some(matcher));
for s in reqs.iter() {
write!(&mut mid_string, " {}", s).expect(INTERNAL_ERROR_MSG);
}
}
mid_string.push_str(" ");
if let Some(ref mut sc) = self.subcommands
.iter_mut()
.filter(|s| &s.0.meta.name[..] == &sc_name)
.next() {
let mut sc_matcher = ArgMatcher::new();
// bin_name should be parent's bin_name + [<reqs>] + the sc's name separated by
// a space
sc.0.meta.usage = Some(format!("{}{}{}",
self.meta.bin_name.as_ref().unwrap_or(&String::new()),
if self.meta.bin_name.is_some() {
&*mid_string
} else {
""
},
&*sc.0.meta.name));
sc.0.meta.bin_name = Some(format!("{}{}{}",
self.meta.bin_name.as_ref().unwrap_or(&String::new()),
if self.meta.bin_name.is_some() {
" "
} else {
""
},
&*sc.0.meta.name));
if let Err(e) = sc.0.get_matches_with(&mut sc_matcher, it) {
e.exit();
}
matcher.subcommand(SubCommand {
name: sc.0.meta.name.clone(),
matches: sc_matcher.into(),
});
}
Ok(())
}
fn blacklisted_from(&self, name: &str, matcher: &ArgMatcher) -> Option<String> {
for k in matcher.arg_names() {
if let Some(f) = self.flags.iter().filter(|f| &f.name == &k).next() {
@ -780,6 +804,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
}
fn create_help_and_version(&mut self) {
debugln!("fn=create_help_and_version;");
// name is "hclap_help" because flags are sorted by name
if !self.flags.iter().any(|a| a.long.is_some() && a.long.unwrap() == "help") {
if self.help_short.is_none() && !self.short_list.contains(&'h') {
@ -958,9 +983,12 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
debugln!("Found valid short opt -{} in '{}'", c, arg);
// Check for trailing concatenated value
let p: Vec<_> = arg.splitn(2, c).collect();
let i = p[0].as_bytes().len();
let val = if i != 0 {
Some(full_arg.split_at(i + 1).1)
debugln!("arg: {:?}, arg_os: {:?}, full_arg: {:?}", arg, arg_os, full_arg);
debugln!("p[0]: {:?}, p[1]: {:?}", p[0].as_bytes(), p[1].as_bytes());
let i = p[0].as_bytes().len() + 1;
let val = if p[1].as_bytes().len() > 0 {
debugln!("setting val: {:?} (bytes), {:?} (ascii)", arg_os.split_at(i).1.as_bytes(), arg_os.split_at(i).1);
Some(arg_os.split_at(i).1)
} else {
None
};
@ -999,7 +1027,8 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
validate_multiples!(self, opt, matcher);
debug!("Checking for val...");
if let Some(v) = val {
if let Some(mut v) = val {
v = v.trim_left_matches(b'=');
if !opt.is_set(ArgSettings::EmptyValues) && v.len() == 0 {
sdebugln!("Found Empty - Error");
return Err(Error::empty_value(opt, &*self.create_current_usage(matcher)));
@ -1037,9 +1066,13 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
fn validate_value<A>(&self, arg: &A, val: &OsStr, matcher: &ArgMatcher<'a>) -> ClapResult<Option<&'a str>>
where A: AnyArg<'a, 'b> {
debugln!("fn=validate_value; val={:?}", val);
if self.is_set(AppSettings::StrictUtf8) && val.to_str().is_none() {
return Err(Error::invalid_utf8(&*self.create_current_usage(matcher)));
}
if let Some(ref p_vals) = arg.possible_vals() {
let val_str = val.to_str().expect(INVALID_UTF8);
if !p_vals.contains(&val_str.into()) {
let val_str = val.to_string_lossy();
if !p_vals.contains(&&*val_str) {
return Err(
Error::invalid_value(val_str,
p_vals,
@ -1048,22 +1081,12 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
}
}
if !arg.is_set(ArgSettings::EmptyValues) &&
val == "" && // .is_empty() doesn't exist for OsStr
val.is_empty() &&
matcher.contains(&*arg.name()) {
return Err(Error::empty_value(arg, &*self.create_current_usage(matcher)));
}
if let Some(ref vtor) = arg.validator() {
let v = if !self.settings.is_set(AppSettings::StrictUtf8) {
val.to_string_lossy().into_owned()
} else {
match val.to_str() {
Some(s) => s.to_owned(),
None => {
return Err(Error::invalid_utf8(&*self.create_current_usage(matcher)));
}
}
};
if let Err(e) = vtor(v) {
if let Err(e) = vtor(val.to_string_lossy().into_owned()) {
return Err(Error::value_validation(e));
}
}
@ -1140,8 +1163,10 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
fn validate_blacklist(&self, matcher: &mut ArgMatcher) -> ClapResult<()> {
debugln!("fn=validate_blacklist;");
macro_rules! build_err {
($me:ident, $name:expr, $matcher:ident) => ({
debugln!("macro=build_err;");
let c_with = $me.blacklisted_from($name, &$matcher);
debugln!("'{:?}' conflicts with '{}'", c_with, $name);
let usg = $me.create_current_usage($matcher);
@ -1256,14 +1281,16 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
},
&*self.create_current_usage(matcher)));
}
}
if let Some(max) = pos.max_vals {
} else if let Some(max) = pos.max_vals {
if (ma.vals.len() as u8) > max {
return Err(Error::too_many_values(
ma.vals.get(&ma.vals.keys()
.last()
.expect(INTERNAL_ERROR_MSG))
.expect(INTERNAL_ERROR_MSG).to_str().expect(INVALID_UTF8),
return Err(
Error::too_many_values(
ma.vals.get(&ma.vals.keys()
.last()
.expect(INTERNAL_ERROR_MSG))
.expect(INTERNAL_ERROR_MSG)
.to_string_lossy()
.into_owned(),
pos,
&*self.create_current_usage(matcher)));
}
@ -1365,6 +1392,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b {
// after all arguments were parsed, but before any subcommands have been parsed
// (so as to give subcommands their own usage recursively)
fn create_usage(&self, used: &[&str]) -> String {
debugln!("fn=create_usage;");
let mut usage = String::with_capacity(75);
usage.push_str("USAGE:\n\t");
if let Some(u) = self.meta.usage_str {

View file

@ -29,7 +29,7 @@ pub struct AppFlags(Flags);
impl AppFlags {
pub fn new() -> Self {
AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_STRICT)
AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_NONE)
}
pub fn set(&mut self, s: AppSettings) {
@ -329,7 +329,61 @@ pub enum AppSettings {
/// }
/// ```
AllowExternalSubcommands,
/// Specifies that any invalid UTF-8 code points should be treated as an error and fail
/// with a `ErrorKind::InvalidUtf8` error.
///
/// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands
/// only allow valid UTF-8 code points.
///
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, AppSettings, ErrorKind};
/// use std::ffi::OsString;
///
/// let m = App::new("myprog")
/// .setting(AppSettings::StrictUtf8)
/// .arg_from_usage("<arg> 'some positional arg'")
/// .get_matches_from_safe(
/// vec![
/// OsString::from("myprog"),
/// OsString::from_vec(vec![0xe9])]);
///
/// assert!(m.is_err());
/// assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
/// }
/// ```
StrictUtf8,
/// Specifies that any invalid UTF-8 code points should *not* be treated as an error. This is
/// the default behavior of `clap`
///
/// **NOTE:** Using argument values with invalid UTF-8 code points requires using Either
/// `ArgMatches::os_value(s)_of` or `ArgMatches::lossy_value(s)_of` for those particular
/// arguments which may have have invalid UTF-8 values
///
/// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands
/// only allow valid UTF-8 code points.
///
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, AppSettings};
/// use std::ffi::OsString;
/// use std::os::unix::ffi::OsStrExt;
///
/// let r = App::new("myprog")
/// .setting(AppSettings::StrictUtf8)
/// .arg_from_usage("<arg> 'some positional arg'")
/// .get_matches_from_safe(
/// vec![
/// OsString::from("myprog"),
/// OsString::from_vec(vec![0xe9])]);
///
/// assert!(r.is_ok());
/// let m = r.unwrap();
/// assert_eq!(m.os_value_of("arg").unwrap().as_bytes(), &[0xe9]);
/// }
/// ```
AllowInvalidUtf8,
#[doc(hidden)]
NeedsLongVersion,

View file

@ -2,6 +2,7 @@ use std::ffi::{OsString, OsStr};
use std::collections::HashMap;
use std::iter::Map;
use std::slice;
use std::borrow::Cow;
use vec_map;
@ -119,6 +120,15 @@ impl<'a> ArgMatches<'a> {
None
}
pub fn lossy_value_of<S: AsRef<str>>(&'a self, name: S) -> Option<Cow<'a, str>> {
if let Some(arg) = self.args.get(name.as_ref()) {
if let Some(v) = arg.vals.values().nth(0) {
return Some(v.to_string_lossy());
}
}
None
}
pub fn os_value_of<S: AsRef<str>>(&self, name: S) -> Option<&OsStr> {
self.args.get(name.as_ref()).map(|arg| arg.vals.values().nth(0).map(|v| v.as_os_str())).unwrap_or(None)
}

View file

@ -234,9 +234,6 @@ pub enum ErrorKind {
/// Occurs when the user provides a value containing invalid UTF-8 for an argument and
/// `AppSettings::StrictUtf8` is set.
///
/// **Note:** This is the default setting and behavior. If you wish to *allow* invalid UTF-8 in
/// argument values, use `AppSettings::AllowInvalidUtf8`
///
/// # Platform Speicific
///
/// Non-Windows platforms only (such as Linux, Unix, OSX, etc.)
@ -244,16 +241,17 @@ pub enum ErrorKind {
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, ErrorKind};
/// # use clap::{App, Arg, ErrorKind, AppSettings};
/// # use std::os::unix::ffi::OsStringExt;
/// # use std::ffi::OsString;
/// let result = App::new("myprog")
/// .arg(Arg::with_name("debug")
/// .setting(AppSettings::StrictUtf8)
/// .arg(Arg::with_name("utf8")
/// .short("u")
/// .takes_value(true))
/// .get_matches_from_safe(vec![OsString::from("myprog"),
/// OsString::from("-u")
/// OsString::from_vec(vec![0x20, 0xE9])]);
/// OsString::from_vec(vec![0xE9])]);
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().kind, ErrorKind::InvalidUtf8);
/// ```

View file

@ -65,6 +65,7 @@ macro_rules! load_yaml {
// used in src/args/arg_builder/option.rs
macro_rules! print_opt_help {
($opt:ident, $spc:expr, $w:ident) => {
debugln!("macro=print_opt_help!;");
if let Some(h) = $opt.help {
if h.contains("{n}") {
let mut hel = h.split("{n}");
@ -96,6 +97,7 @@ macro_rules! print_opt_help {
// src/app/mod.rs
macro_rules! write_spaces {
($num:expr, $w:ident) => ({
debugln!("macro=write_spaces!;");
for _ in 0..$num {
try!(write!($w, " "));
}
@ -105,6 +107,7 @@ macro_rules! write_spaces {
// convenience macro for remove an item from a vec
macro_rules! vec_remove {
($vec:expr, $to_rem:ident) => {
debugln!("macro=write_spaces!;");
{
let mut ix = None;
$vec.dedup();
@ -127,6 +130,7 @@ macro_rules! vec_remove {
// item.
macro_rules! for_match {
($it:ident, $($p:pat => $($e:expr);+),*) => {
debugln!("macro=for_match!;");
for i in $it {
match i {
$(

View file

@ -8,6 +8,7 @@ pub trait OsStrExt2 {
fn trim_left_matches(&self, b: u8) -> &OsStr;
fn len(&self) -> usize;
fn contains_byte(&self, b: u8) -> bool;
fn is_empty(&self) -> bool;
}
impl OsStrExt2 for OsStr {
@ -21,6 +22,10 @@ impl OsStrExt2 for OsStr {
return true;
}
fn is_empty(&self) -> bool {
self.as_bytes().is_empty()
}
fn contains_byte(&self, byte: u8) -> bool {
for b in self.as_bytes() {
if b == &byte { return true; }

View file

@ -1,35 +1 @@
// use std::ffi::OsStr;
// use std::borrow::Cow;
//
// pub trait Utf8Rule { type Out; fn into(&OsStr) -> <Self as Utf8Rule>::Out; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct Strict<'a>;
// impl<'a> Utf8Rule for Strict<'a> { type Out = &'a str; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct Lossy<'a>;
// impl<'a> Utf8Rule for Lossy<'a> { type Out = Cow<'a, str>; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct AllowInvalid<'a>;
// impl<'a> Utf8Rule for AllowInvalid<'a> { type Out = &'a OsStr; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub enum Utf8 {
// Strict,
// Lossy,
// AllowInvalid,
// }
//
// impl Utf8 {
// pub fn into<U: UtfRule>(&self) -> U::Out {
// match *self {
// Utf::Strict => Strict::,
// Utf::Lossy =>,
// Utf::AllowInvalid =>,
// }
// }
// }
pub const INVALID_UTF8: &'static str = "unexpected invalid UTF-8 code point";

View file

@ -116,9 +116,11 @@ fn conflict_overriden_2() {
.arg(Arg::from_usage("-c, --color 'third flag'")
.mutually_overrides_with("flag"))
.get_matches_from_safe(vec!["myprog", "-f", "-d", "-c"]);
assert!(result.is_err());
let err = result.err().unwrap();
assert_eq!(err.kind, ErrorKind::ArgumentConflict);
assert!(result.is_ok());
let m = result.unwrap();
assert!(m.is_present("color"));
assert!(m.is_present("debug"));
assert!(!m.is_present("flag"));
}
#[test]

View file

@ -1,30 +0,0 @@
#![cfg(not(windows))]
extern crate clap;
use std::ffi::OsString;
use std::os::unix::ffi::OsStringExt;
use clap::{App, Arg, AppSettings, ErrorKind};
#[test]
fn invalid_unicode_safe() {
let m = App::new("bad_unicode")
.arg(Arg::from_usage("<arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from_vec(vec![0x20]),
OsString::from_vec(vec![0xe9])]);
assert!(m.is_err());
if let Err(err) = m {
assert_eq!(err.kind, ErrorKind::InvalidUtf8);
}
}
#[test]
fn invalid_unicode_lossy() {
let m = App::new("bad_unicode")
.arg(Arg::from_usage("<arg> 'some arg'"))
.setting(AppSettings::AllowInvalidUtf8)
.get_matches_from(vec![OsString::from_vec(vec![0x20]),
OsString::from_vec(vec![0xe9])]);
assert!(m.is_present("arg"));
assert_eq!(m.value_of("arg").unwrap(), "\u{FFFD}");
}

223
tests/utf8.rs Normal file
View file

@ -0,0 +1,223 @@
#![cfg(not(windows))]
extern crate clap;
use std::ffi::OsString;
use std::os::unix::ffi::OsStringExt;
use clap::{App, Arg, AppSettings, ErrorKind};
#[test]
fn invalid_utf8_strict_positional() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("<arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_strict_option_short_space() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("-a"),
OsString::from_vec(vec![0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_strict_option_short_equals() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_strict_option_short_no_space() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_strict_option_long_space() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("--arg"),
OsString::from_vec(vec![0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_strict_option_long_equals() {
let m = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.setting(AppSettings::StrictUtf8)
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]);
assert!(m.is_err());
assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
}
#[test]
fn invalid_utf8_lossy_positional() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("<arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_lossy_option_short_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("-a"),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_lossy_option_short_equals() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_lossy_option_short_no_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_lossy_option_long_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("--arg"),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_lossy_option_long_equals() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}");
}
#[test]
fn invalid_utf8_positional() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("<arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}
#[test]
fn invalid_utf8_option_short_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("-a"),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}
#[test]
fn invalid_utf8_option_short_equals() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}
#[test]
fn invalid_utf8_option_short_no_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x61, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}
#[test]
fn invalid_utf8_option_long_space() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from("--arg"),
OsString::from_vec(vec![0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}
#[test]
fn invalid_utf8_option_long_equals() {
let r = App::new("bad_utf8")
.arg(Arg::from_usage("-a, --arg <arg> 'some arg'"))
.get_matches_from_safe(vec![OsString::from(""),
OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]);
assert!(r.is_ok());
let m = r.unwrap();
assert!(m.is_present("arg"));
assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9]));
}