feat(Unicode): allows non-panicing on invalid unicode characters

This commit is contained in:
Kevin K 2015-09-21 21:58:25 -04:00
parent 3d0199da88
commit c5bf7ddc8c
2 changed files with 222 additions and 26 deletions

View file

@ -4,6 +4,8 @@ use std::io::{self, BufRead, BufWriter, Write};
use std::path::Path;
use std::process;
use std::error::Error;
use std::ffi::OsStr;
use std::borrow::Borrow;
#[cfg(feature = "yaml")]
use yaml_rust::Yaml;
@ -1879,6 +1881,10 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands
///
/// # Panics
///
/// If any arguments contain invalid unicode characters. If this is not desired it is
/// recommended to use the `*_safe()` or `*_lossy()` versions of this method.
///
/// # Example
///
@ -1893,6 +1899,23 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
self.get_matches_from(env::args())
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
/// `U+FFFD REPLACEMENT CHARACTER`
///
/// # Examples
///
/// ```no_run
/// # use clap::{App, Arg};
/// let matches = App::new("myprog")
/// // Args and options go here...
/// .get_matches();
/// ```
pub fn get_matches_lossy(self) -> ArgMatches<'ar, 'ar> {
// Start the parsing
self.get_matches_from_lossy(env::args_os())
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands
///
@ -1911,7 +1934,29 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
/// ```
pub fn get_matches_safe(self) -> Result<ArgMatches<'ar, 'ar>, ClapError> {
// Start the parsing
self.get_matches_from_safe(env::args())
self.get_matches_from_safe(env::args_os())
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
/// `U+FFFD REPLACEMENT CHARACTER`
///
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
/// manually.
///
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// let matches = App::new("myprog")
/// // Args and options go here...
/// .get_matches_safe()
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
/// ```
pub fn get_matches_safe_lossy(self) -> Result<ArgMatches<'ar, 'ar>, ClapError> {
// Start the parsing
self.get_matches_from_safe_lossy(env::args_os())
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
@ -1938,8 +1983,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
itr: I)
-> ArgMatches<'ar, 'ar>
where I: IntoIterator<Item = T>,
T: AsRef<str>
{
T: AsRef<OsStr> {
match self.get_matches_from_safe_borrow(itr) {
Ok(m) => return m,
Err(e) => {
@ -1955,6 +1999,47 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
}
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
/// `U+FFFD REPLACEMENT CHARACTER`
///
/// **NOTE:** The first argument will be parsed as the binary name.
///
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
/// `App::get_matches()`
///
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
///
/// let matches = App::new("myprog")
/// // Args and options go here...
/// .get_matches_from(arg_vec);
/// ```
pub fn get_matches_from_lossy<I, T>(mut self,
itr: I)
-> ArgMatches<'ar, 'ar>
where I: IntoIterator<Item = T>,
T: AsRef<OsStr> {
match self.get_matches_from_safe_borrow_lossy(itr) {
Ok(m) => return m,
Err(e) => {
wlnerr!("{}", e.error);
if self.wait_on_error {
wlnerr!("\nPress [ENTER] / [RETURN] to continue...");
let mut s = String::new();
let i = io::stdin();
i.lock().read_line(&mut s).unwrap();
}
process::exit(1);
}
}
}
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands
///
@ -1967,6 +2052,9 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
/// manually.
///
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
/// `ClapErrorType::InvalidUnicode`
///
///
/// # Example
///
@ -1983,14 +2071,13 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
itr: I)
-> Result<ArgMatches<'ar, 'ar>, ClapError>
where I: IntoIterator<Item = T>,
T: AsRef<str>
{
T: AsRef<OsStr> {
self.get_matches_from_safe_borrow(itr)
}
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
/// consume `self`.
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
/// `U+FFFD REPLACEMENT CHARACTER`
///
/// **NOTE:** The first argument will be parsed as the binary name.
///
@ -2001,23 +2088,32 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
/// manually.
///
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
///
/// let mut app = App::new("myprog");
/// let matches = App::new("myprog")
/// // Args and options go here...
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
/// .get_matches_from_safe(arg_vec)
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
/// ```
pub fn get_matches_from_safe_borrow<I, T>(&mut self,
itr: I)
pub fn get_matches_from_safe_lossy<I, T>(mut self,
itr: I)
-> Result<ArgMatches<'ar, 'ar>, ClapError>
where I: IntoIterator<Item = T>,
T: AsRef<OsStr> {
self._get_matches_from_safe_borrow(itr, true)
}
fn _get_matches_from_safe_borrow<I, T>(&mut self,
itr: I,
lossy: bool)
-> Result<ArgMatches<'ar, 'ar>, ClapError>
where I: IntoIterator<Item = T>,
T: AsRef<str>
{
T: AsRef<OsStr> {
// Verify all positional assertions pass
self.verify_positionals();
// If there are global arguments, we need to propgate them down to subcommands before
@ -2044,13 +2140,83 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
}
// do the real parsing
if let Err(e) = self.get_matches_with(&mut matches, &mut it) {
if let Err(e) = self.get_matches_with(&mut matches, &mut it, lossy) {
return Err(e);
}
Ok(matches)
}
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
/// consume `self`.
///
/// **NOTE:** The first argument will be parsed as the binary name.
///
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
/// `App::get_matches_safe()`
///
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
/// manually.
///
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
/// `ClapErrorType::InvalidUnicode`
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
///
/// let mut app = App::new("myprog");
/// // Args and options go here...
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
/// ```
pub fn get_matches_from_safe_borrow<I, T>(&mut self,
itr: I)
-> Result<ArgMatches<'ar, 'ar>, ClapError>
where I: IntoIterator<Item = T>,
T: AsRef<OsStr> {
self._get_matches_from_safe_borrow(itr, false)
}
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
/// consume `self`. Invalid unicode characters are replaced with `U+FFFD REPLACEMENT CHARACTER`
///
/// **NOTE:** The first argument will be parsed as the binary name.
///
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
/// `App::get_matches_safe()`
///
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
/// manually.
///
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
/// `ClapErrorType::InvalidUnicode`
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
///
/// let mut app = App::new("myprog");
/// // Args and options go here...
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
/// ```
pub fn get_matches_from_safe_borrow_lossy<I, T>(&mut self,
itr: I)
-> Result<ArgMatches<'ar, 'ar>, ClapError>
where I: IntoIterator<Item = T>,
T: AsRef<OsStr> {
self._get_matches_from_safe_borrow(itr, true)
}
fn verify_positionals(&mut self) {
// Because you must wait until all arguments have been supplied, this is the first chance
@ -2137,11 +2303,11 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
// The actual parsing function
fn get_matches_with<I, T>(&mut self,
matches: &mut ArgMatches<'ar, 'ar>,
it: &mut I)
it: &mut I,
lossy: bool)
-> Result<(), ClapError>
where I: Iterator<Item = T>,
T: AsRef<str>
{
T: AsRef<OsStr> {
// First we create the `--help` and `--version` arguments and add them if necessary
self.create_help_and_version();
@ -2151,7 +2317,20 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
let mut pos_counter = 1;
let mut val_counter = 0;
while let Some(arg) = it.next() {
let arg_slice = arg.as_ref();
let arg_cow = match arg.as_ref().to_str() {
Some(s) => s.into(),
None => {
if !lossy {
return Err(ClapError{
error: format!("{} Invalid unicode character in one or more arguments",
Format::Error("error:")),
error_type: ClapErrorType::InvalidUnicode
});
}
arg.as_ref().to_string_lossy()
}
};
let arg_slice: &str = arg_cow.borrow();
let mut skip = false;
// we need to know if we're parsing a new argument, or the value of previous argument,
@ -2186,7 +2365,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
if num == vals.len() as u8 && !opt.multiple {
return Err(self.report_error(format!("The argument '{}' \
was found, but '{}' only expects {} values",
Format::Warning(arg.as_ref()),
Format::Warning(arg_slice),
Format::Warning(opt.to_string()),
Format::Good(vals.len().to_string())),
ClapErrorType::InvalidValue,
@ -2335,7 +2514,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
format!("The subcommand '{}' isn't valid\n\tDid you mean '{}' ?\n\n\
If you received this message in error, try \
re-running with '{} {} {}'",
Format::Warning(arg.as_ref()),
Format::Warning(arg_slice),
Format::Good(candidate_subcommand),
self.bin_name.clone().unwrap_or(self.name.clone()),
Format::Good("--"),
@ -2350,7 +2529,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
if self.positionals_idx.is_empty() {
return Err(self.report_error(
format!("Found argument '{}', but {} wasn't expecting any",
Format::Warning(arg.as_ref()),
Format::Warning(arg_slice),
self.bin_name.clone().unwrap_or(self.name.clone())),
ClapErrorType::UnexpectedArgument,
App::get_args(matches)));
@ -2391,7 +2570,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
if vals.len() as u8 == num {
return Err(self.report_error(format!("The argument '{}' \
was found, but '{}' wasn't expecting any more values",
Format::Warning(arg.as_ref()),
Format::Warning(arg_slice),
Format::Warning(p.to_string())),
ClapErrorType::TooMuchValues,
App::get_args(matches)));
@ -2481,7 +2660,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
}
} else {
return Err(self.report_error(format!("The argument '{}' was found, but '{}' \
wasn't expecting any", Format::Warning(arg.as_ref()),
wasn't expecting any", Format::Warning(arg_slice),
self.bin_name.clone().unwrap_or(self.name.clone())),
ClapErrorType::UnexpectedArgument,
App::get_args(matches)));
@ -2575,7 +2754,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
""
},
sc.name.clone()));
if let Err(e) = sc.get_matches_with(&mut new_matches, it) {
if let Err(e) = sc.get_matches_with(&mut new_matches, it, lossy) {
e.exit();
}
matches.subcommand = Some(Box::new(SubCommand {
@ -3554,5 +3733,4 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
None => (String::new(), None),
}
}
}

View file

@ -178,6 +178,24 @@ pub enum ClapErrorType {
/// .get_matches_from_safe(vec!["", "--debug", "--debug"]);
/// ```
UnexpectedMultipleUsage,
/// Error occurs when argument contains invalid unicode characters
///
///
/// # Example
///
/// ```no_run
/// # use clap::{App, Arg};
/// # use std::os::unix::ffi::OsStringExt;
/// # use std::ffi::OsString;
/// let result = App::new("myprog")
/// .arg(Arg::with_name("debug")
/// .short("u")
/// .takes_value(true))
/// .get_matches_from_safe(vec![OsString::from_vec(vec![0x20]),
/// OsString::from_vec(vec![0xE9])]);
/// assert!(result.is_err());
/// ```
InvalidUnicode
}
/// Command line argument parser error