mirror of
https://github.com/uutils/coreutils
synced 2024-11-16 17:58:06 +00:00
Merge pull request #2621 from blyxxyz/filename-quoting
Implement proper quoting/escaping for filenames
This commit is contained in:
commit
a517671d55
11 changed files with 602 additions and 93 deletions
|
@ -1,3 +1,4 @@
|
|||
AFAICT
|
||||
arity
|
||||
autogenerate
|
||||
autogenerated
|
||||
|
|
|
@ -8,6 +8,7 @@ csh
|
|||
globstar
|
||||
inotify
|
||||
localtime
|
||||
mksh
|
||||
mountinfo
|
||||
mountpoint
|
||||
mtab
|
||||
|
|
|
@ -21,6 +21,7 @@ use lscolors::LsColors;
|
|||
use number_prefix::NumberPrefix;
|
||||
use once_cell::unsync::OnceCell;
|
||||
use quoting_style::{escape_name, QuotingStyle};
|
||||
use std::ffi::OsString;
|
||||
#[cfg(windows)]
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
use std::{
|
||||
|
@ -248,7 +249,7 @@ struct LongFormat {
|
|||
|
||||
impl Config {
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
fn from(options: clap::ArgMatches) -> UResult<Config> {
|
||||
fn from(options: &clap::ArgMatches) -> UResult<Config> {
|
||||
let (mut format, opt) = if let Some(format_) = options.value_of(options::FORMAT) {
|
||||
(
|
||||
match format_ {
|
||||
|
@ -428,11 +429,10 @@ impl Config {
|
|||
#[allow(clippy::needless_bool)]
|
||||
let show_control = if options.is_present(options::HIDE_CONTROL_CHARS) {
|
||||
false
|
||||
} else if options.is_present(options::SHOW_CONTROL_CHARS) || atty::is(atty::Stream::Stdout)
|
||||
{
|
||||
} else if options.is_present(options::SHOW_CONTROL_CHARS) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
!atty::is(atty::Stream::Stdout)
|
||||
};
|
||||
|
||||
let quoting_style = if let Some(style) = options.value_of(options::QUOTING_STYLE) {
|
||||
|
@ -599,22 +599,19 @@ impl Config {
|
|||
|
||||
#[uucore_procs::gen_uumain]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let args = args
|
||||
.collect_str(InvalidEncodingHandling::Ignore)
|
||||
.accept_any();
|
||||
|
||||
let usage = usage();
|
||||
|
||||
let app = uu_app().usage(&usage[..]);
|
||||
|
||||
let matches = app.get_matches_from(args);
|
||||
|
||||
let config = Config::from(&matches)?;
|
||||
let locs = matches
|
||||
.values_of(options::PATHS)
|
||||
.map(|v| v.map(ToString::to_string).collect())
|
||||
.unwrap_or_else(|| vec![String::from(".")]);
|
||||
.values_of_os(options::PATHS)
|
||||
.map(|v| v.map(Path::new).collect())
|
||||
.unwrap_or_else(|| vec![Path::new(".")]);
|
||||
|
||||
list(locs, Config::from(matches)?)
|
||||
list(locs, config)
|
||||
}
|
||||
|
||||
pub fn uu_app() -> App<'static, 'static> {
|
||||
|
@ -1177,7 +1174,7 @@ struct PathData {
|
|||
md: OnceCell<Option<Metadata>>,
|
||||
ft: OnceCell<Option<FileType>>,
|
||||
// Name of the file - will be empty for . or ..
|
||||
display_name: String,
|
||||
display_name: OsString,
|
||||
// PathBuf that all above data corresponds to
|
||||
p_buf: PathBuf,
|
||||
must_dereference: bool,
|
||||
|
@ -1187,7 +1184,7 @@ impl PathData {
|
|||
fn new(
|
||||
p_buf: PathBuf,
|
||||
file_type: Option<std::io::Result<FileType>>,
|
||||
file_name: Option<String>,
|
||||
file_name: Option<OsString>,
|
||||
config: &Config,
|
||||
command_line: bool,
|
||||
) -> Self {
|
||||
|
@ -1195,16 +1192,13 @@ impl PathData {
|
|||
// For '..', the filename is None
|
||||
let display_name = if let Some(name) = file_name {
|
||||
name
|
||||
} else {
|
||||
let display_os_str = if command_line {
|
||||
p_buf.as_os_str()
|
||||
} else if command_line {
|
||||
p_buf.clone().into()
|
||||
} else {
|
||||
p_buf
|
||||
.file_name()
|
||||
.unwrap_or_else(|| p_buf.iter().next_back().unwrap())
|
||||
};
|
||||
|
||||
display_os_str.to_string_lossy().into_owned()
|
||||
.to_owned()
|
||||
};
|
||||
let must_dereference = match &config.dereference {
|
||||
Dereference::All => true,
|
||||
|
@ -1249,14 +1243,14 @@ impl PathData {
|
|||
}
|
||||
}
|
||||
|
||||
fn list(locs: Vec<String>, config: Config) -> UResult<()> {
|
||||
fn list(locs: Vec<&Path>, config: Config) -> UResult<()> {
|
||||
let mut files = Vec::<PathData>::new();
|
||||
let mut dirs = Vec::<PathData>::new();
|
||||
|
||||
let mut out = BufWriter::new(stdout());
|
||||
|
||||
for loc in &locs {
|
||||
let p = PathBuf::from(&loc);
|
||||
let p = PathBuf::from(loc);
|
||||
let path_data = PathData::new(p, None, None, &config, true);
|
||||
|
||||
if path_data.md().is_none() {
|
||||
|
@ -1286,6 +1280,7 @@ fn list(locs: Vec<String>, config: Config) -> UResult<()> {
|
|||
sort_entries(&mut dirs, &config);
|
||||
for dir in dirs {
|
||||
if locs.len() > 1 || config.recursive {
|
||||
// FIXME: This should use the quoting style and propagate errors
|
||||
let _ = writeln!(out, "\n{}:", dir.p_buf.display());
|
||||
}
|
||||
enter_directory(&dir, &config, &mut out);
|
||||
|
@ -1671,7 +1666,6 @@ fn get_inode(metadata: &Metadata) -> String {
|
|||
use std::sync::Mutex;
|
||||
#[cfg(unix)]
|
||||
use uucore::entries;
|
||||
use uucore::InvalidEncodingHandling;
|
||||
|
||||
#[cfg(unix)]
|
||||
fn cached_uid2usr(uid: u32) -> String {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use std::char::from_digit;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
// These are characters with special meaning in the shell (e.g. bash).
|
||||
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
||||
|
@ -255,19 +256,21 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
|
|||
(escaped_str, must_quote)
|
||||
}
|
||||
|
||||
pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String {
|
||||
pub(super) fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
||||
match style {
|
||||
QuotingStyle::Literal { show_control } => {
|
||||
if !show_control {
|
||||
name.chars()
|
||||
name.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||
.collect()
|
||||
} else {
|
||||
name.into()
|
||||
name.to_string_lossy().into_owned()
|
||||
}
|
||||
}
|
||||
QuotingStyle::C { quotes } => {
|
||||
let escaped_str: String = name
|
||||
.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_c(c, *quotes))
|
||||
.collect();
|
||||
|
@ -283,6 +286,7 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String {
|
|||
always_quote,
|
||||
show_control,
|
||||
} => {
|
||||
let name = name.to_string_lossy();
|
||||
let (quotes, must_quote) = if name.contains('"') {
|
||||
(Quotes::Single, true)
|
||||
} else if name.contains('\'') {
|
||||
|
@ -294,9 +298,9 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String {
|
|||
};
|
||||
|
||||
let (escaped_str, contains_quote_chars) = if *escape {
|
||||
shell_with_escape(name, quotes)
|
||||
shell_with_escape(&name, quotes)
|
||||
} else {
|
||||
shell_without_escape(name, quotes, *show_control)
|
||||
shell_without_escape(&name, quotes, *show_control)
|
||||
};
|
||||
|
||||
match (must_quote | contains_quote_chars, quotes) {
|
||||
|
@ -362,7 +366,7 @@ mod tests {
|
|||
fn check_names(name: &str, map: Vec<(&str, &str)>) {
|
||||
assert_eq!(
|
||||
map.iter()
|
||||
.map(|(_, style)| escape_name(name, &get_style(style)))
|
||||
.map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
|
||||
.collect::<Vec<String>>(),
|
||||
map.iter()
|
||||
.map(|(correct, _)| correct.to_string())
|
||||
|
|
|
@ -10,9 +10,10 @@ extern crate uucore;
|
|||
|
||||
use clap::{crate_version, App, Arg};
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use uucore::display::println_verbatim;
|
||||
use uucore::error::{FromIo, UResult};
|
||||
|
||||
static ABOUT: &str = "Display the full filename of the current working directory.";
|
||||
|
@ -57,6 +58,7 @@ fn logical_path() -> io::Result<PathBuf> {
|
|||
// POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pwd.html
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
use std::path::Path;
|
||||
fn looks_reasonable(path: &Path) -> bool {
|
||||
// First, check if it's an absolute path.
|
||||
if !path.has_root() {
|
||||
|
@ -148,30 +150,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
.map(Into::into)
|
||||
.unwrap_or(cwd);
|
||||
|
||||
print_path(&cwd).map_err_context(|| "failed to print current directory".to_owned())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_path(path: &Path) -> io::Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
// On Unix we print non-lossily.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
stdout.write_all(path.as_os_str().as_bytes())?;
|
||||
stdout.write_all(b"\n")?;
|
||||
}
|
||||
|
||||
// On other platforms we potentially mangle it.
|
||||
// There might be some clever way to do it correctly on Windows, but
|
||||
// invalid unicode in filenames is rare there.
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
writeln!(stdout, "{}", path.display())?;
|
||||
}
|
||||
println_verbatim(&cwd).map_err_context(|| "failed to print current directory".to_owned())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ use clap::{crate_version, App, Arg};
|
|||
use std::fs::{read_dir, remove_dir};
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use uucore::display::Quotable;
|
||||
use uucore::error::{set_exit_code, strip_errno, UResult};
|
||||
use uucore::util_name;
|
||||
|
||||
|
@ -77,27 +78,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
Ok(path.metadata()?.file_type().is_dir())
|
||||
}
|
||||
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if error.raw_os_error() == Some(libc::ENOTDIR) && path.ends_with(b"/") {
|
||||
let bytes = path.as_os_str().as_bytes();
|
||||
if error.raw_os_error() == Some(libc::ENOTDIR) && bytes.ends_with(b"/") {
|
||||
// Strip the trailing slash or .symlink_metadata() will follow the symlink
|
||||
let path: &Path = OsStr::from_bytes(&path[..path.len() - 1]).as_ref();
|
||||
if is_symlink(path).unwrap_or(false)
|
||||
&& points_to_directory(path).unwrap_or(true)
|
||||
let no_slash: &Path = OsStr::from_bytes(&bytes[..bytes.len() - 1]).as_ref();
|
||||
if is_symlink(no_slash).unwrap_or(false)
|
||||
&& points_to_directory(no_slash).unwrap_or(true)
|
||||
{
|
||||
show_error!(
|
||||
"failed to remove '{}/': Symbolic link not followed",
|
||||
path.display()
|
||||
"failed to remove {}: Symbolic link not followed",
|
||||
path.quote()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
show_error!(
|
||||
"failed to remove '{}': {}",
|
||||
path.display(),
|
||||
strip_errno(&error)
|
||||
);
|
||||
show_error!("failed to remove {}: {}", path.quote(), strip_errno(&error));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,7 +122,7 @@ fn remove(mut path: &Path, opts: Opts) -> Result<(), Error<'_>> {
|
|||
|
||||
fn remove_single(path: &Path, opts: Opts) -> Result<(), Error<'_>> {
|
||||
if opts.verbose {
|
||||
println!("{}: removing directory, '{}'", util_name(), path.display());
|
||||
println!("{}: removing directory, {}", util_name(), path.quote());
|
||||
}
|
||||
remove_dir(path).map_err(|error| Error { error, path })
|
||||
}
|
||||
|
|
|
@ -24,6 +24,8 @@ use std::fs::{self, File};
|
|||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use uucore::display::{Quotable, Quoted};
|
||||
|
||||
/// The minimum character width for formatting counts when reading from stdin.
|
||||
const MINIMUM_WIDTH: usize = 7;
|
||||
|
||||
|
@ -122,10 +124,10 @@ impl Input {
|
|||
}
|
||||
}
|
||||
|
||||
fn path_display(&self) -> std::path::Display<'_> {
|
||||
fn path_display(&self) -> Quoted<'_> {
|
||||
match self {
|
||||
Input::Path(path) => path.display(),
|
||||
Input::Stdin(_) => Path::display("'standard input'".as_ref()),
|
||||
Input::Path(path) => path.maybe_quote(),
|
||||
Input::Stdin(_) => "standard input".maybe_quote(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -448,7 +450,10 @@ fn wc(inputs: Vec<Input>, settings: &Settings) -> Result<(), u32> {
|
|||
if let Err(err) = print_stats(settings, &result, max_width) {
|
||||
show_warning!(
|
||||
"failed to print result for {}: {}",
|
||||
result.title.unwrap_or_else(|| "<stdin>".as_ref()).display(),
|
||||
result
|
||||
.title
|
||||
.unwrap_or_else(|| "<stdin>".as_ref())
|
||||
.maybe_quote(),
|
||||
err
|
||||
);
|
||||
failure = true;
|
||||
|
@ -526,7 +531,7 @@ fn print_stats(
|
|||
}
|
||||
|
||||
if let Some(title) = result.title {
|
||||
writeln!(stdout_lock, " {}", title.display())?;
|
||||
writeln!(stdout_lock, " {}", title.maybe_quote())?;
|
||||
} else {
|
||||
writeln!(stdout_lock)?;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ mod parser; // string parsing modules
|
|||
// * cross-platform modules
|
||||
pub use crate::mods::backup_control;
|
||||
pub use crate::mods::coreopts;
|
||||
pub use crate::mods::display;
|
||||
pub use crate::mods::error;
|
||||
pub use crate::mods::os;
|
||||
pub use crate::mods::panic;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
pub mod backup_control;
|
||||
pub mod coreopts;
|
||||
pub mod display;
|
||||
pub mod error;
|
||||
pub mod os;
|
||||
pub mod panic;
|
||||
|
|
534
src/uucore/src/lib/mods/display.rs
Normal file
534
src/uucore/src/lib/mods/display.rs
Normal file
|
@ -0,0 +1,534 @@
|
|||
/// Utilities for printing paths, with special attention paid to special
|
||||
/// characters and invalid unicode.
|
||||
///
|
||||
/// For displaying paths in informational messages use `Quotable::quote`. This
|
||||
/// will wrap quotes around the filename and add the necessary escapes to make
|
||||
/// it copy/paste-able into a shell.
|
||||
///
|
||||
/// For writing raw paths to stdout when the output should not be quoted or escaped,
|
||||
/// use `println_verbatim`. This will preserve invalid unicode.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use std::path::Path;
|
||||
/// use uucore::display::{Quotable, println_verbatim};
|
||||
///
|
||||
/// let path = Path::new("foo/bar.baz");
|
||||
///
|
||||
/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'"
|
||||
/// println_verbatim(path)?; // Prints "foo/bar.baz"
|
||||
/// # Ok::<(), std::io::Error>(())
|
||||
/// ```
|
||||
// spell-checker:ignore Fbar
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
#[cfg(any(unix, target_os = "wasi", windows))]
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::io::{self, Write as IoWrite};
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
#[cfg(target_os = "wasi")]
|
||||
use std::os::wasi::ffi::OsStrExt;
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
use std::str::from_utf8;
|
||||
|
||||
/// An extension trait for displaying filenames to users.
|
||||
pub trait Quotable {
|
||||
/// Returns an object that implements [`Display`] for printing filenames with
|
||||
/// proper quoting and escaping for the platform.
|
||||
///
|
||||
/// On Unix this corresponds to sh/bash syntax, on Windows Powershell syntax
|
||||
/// is used.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use std::path::Path;
|
||||
/// use uucore::display::Quotable;
|
||||
///
|
||||
/// let path = Path::new("foo/bar.baz");
|
||||
///
|
||||
/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'"
|
||||
/// ```
|
||||
fn quote(&self) -> Quoted<'_>;
|
||||
|
||||
/// Like `quote()`, but don't actually add quotes unless necessary because of
|
||||
/// whitespace or special characters.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use std::path::Path;
|
||||
/// use uucore::display::Quotable;
|
||||
/// use uucore::show_error;
|
||||
///
|
||||
/// let foo = Path::new("foo/bar.baz");
|
||||
/// let bar = Path::new("foo bar");
|
||||
///
|
||||
/// show_error!("{}: Not found", foo.maybe_quote()); // Prints "util: foo/bar.baz: Not found"
|
||||
/// show_error!("{}: Not found", bar.maybe_quote()); // Prints "util: 'foo bar': Not found"
|
||||
/// ```
|
||||
fn maybe_quote(&self) -> Quoted<'_> {
|
||||
let mut quoted = self.quote();
|
||||
quoted.force_quote = false;
|
||||
quoted
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_as_ref {
|
||||
($type: ty) => {
|
||||
impl Quotable for $type {
|
||||
fn quote(&self) -> Quoted<'_> {
|
||||
Quoted::new(self.as_ref())
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_as_ref!(&'_ str);
|
||||
impl_as_ref!(String);
|
||||
impl_as_ref!(&'_ std::path::Path);
|
||||
impl_as_ref!(std::path::PathBuf);
|
||||
impl_as_ref!(std::path::Component<'_>);
|
||||
impl_as_ref!(std::path::Components<'_>);
|
||||
impl_as_ref!(std::path::Iter<'_>);
|
||||
impl_as_ref!(&'_ std::ffi::OsStr);
|
||||
impl_as_ref!(std::ffi::OsString);
|
||||
|
||||
// Cow<'_, str> does not implement AsRef<OsStr> and this is unlikely to be fixed
|
||||
// for backward compatibility reasons. Otherwise we'd use a blanket impl.
|
||||
impl Quotable for Cow<'_, str> {
|
||||
fn quote(&self) -> Quoted<'_> {
|
||||
let text: &str = self.as_ref();
|
||||
Quoted::new(text.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct Quoted<'a> {
|
||||
text: &'a OsStr,
|
||||
force_quote: bool,
|
||||
}
|
||||
|
||||
impl<'a> Quoted<'a> {
|
||||
fn new(text: &'a OsStr) -> Self {
|
||||
Quoted {
|
||||
text,
|
||||
force_quote: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Quoted<'_> {
|
||||
#[cfg(any(windows, unix, target_os = "wasi"))]
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
// On Unix we emulate sh syntax. On Windows Powershell.
|
||||
// They're just similar enough to share some code.
|
||||
|
||||
/// Characters with special meaning outside quotes.
|
||||
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02
|
||||
// I don't know why % is in there, and GNU doesn't quote it either.
|
||||
// {} were used in a version elsewhere but seem unnecessary, GNU doesn't
|
||||
// quote them. They're used in function definitions but not in a way we
|
||||
// have to worry about.
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=";
|
||||
// FIXME: I'm not a PowerShell wizard and don't know if this is correct.
|
||||
// I just copied the Unix version, removed \, and added ,{} based on
|
||||
// experimentation.
|
||||
// I have noticed that ~?*[] only get expanded in some contexts, so watch
|
||||
// out for that if doing your own tests.
|
||||
// Get-ChildItem seems unwilling to quote anything so it doesn't help.
|
||||
// There's the additional wrinkle that Windows has stricter requirements
|
||||
// for filenames: I've been testing using a Linux build of PowerShell, but
|
||||
// this code doesn't even compile on Linux.
|
||||
#[cfg(windows)]
|
||||
const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\"'*?[]=,{}";
|
||||
|
||||
/// Characters with a special meaning at the beginning of a name.
|
||||
// ~ expands a home directory.
|
||||
// # starts a comment.
|
||||
// ! is a common extension for expanding the shell history.
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!'];
|
||||
// Same deal as before, this is possibly incomplete.
|
||||
// A single stand-alone exclamation mark seems to have some special meaning.
|
||||
#[cfg(windows)]
|
||||
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '!'];
|
||||
|
||||
/// Characters that are interpreted specially in a double-quoted string.
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$', b'\\'];
|
||||
#[cfg(windows)]
|
||||
const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$'];
|
||||
|
||||
let text = match self.text.to_str() {
|
||||
None => return write_escaped(f, self.text),
|
||||
Some(text) => text,
|
||||
};
|
||||
|
||||
let mut is_single_safe = true;
|
||||
let mut is_double_safe = true;
|
||||
let mut requires_quote = self.force_quote;
|
||||
|
||||
if let Some(first) = text.chars().next() {
|
||||
if SPECIAL_SHELL_CHARS_START.contains(&first) {
|
||||
requires_quote = true;
|
||||
}
|
||||
// Unlike in Unix, quoting an argument may stop it
|
||||
// from being recognized as an option. I like that very much.
|
||||
// But we don't want to quote "-" because that's a common
|
||||
// special argument and PowerShell doesn't mind it.
|
||||
#[cfg(windows)]
|
||||
if first == '-' && text.len() > 1 {
|
||||
requires_quote = true;
|
||||
}
|
||||
} else {
|
||||
// Empty string
|
||||
requires_quote = true;
|
||||
}
|
||||
|
||||
for ch in text.chars() {
|
||||
if ch.is_ascii() {
|
||||
let ch = ch as u8;
|
||||
if ch == b'\'' {
|
||||
is_single_safe = false;
|
||||
}
|
||||
if DOUBLE_UNSAFE.contains(&ch) {
|
||||
is_double_safe = false;
|
||||
}
|
||||
if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) {
|
||||
requires_quote = true;
|
||||
}
|
||||
if ch.is_ascii_control() {
|
||||
return write_escaped(f, self.text);
|
||||
}
|
||||
}
|
||||
if !requires_quote && ch.is_whitespace() {
|
||||
// This includes unicode whitespace.
|
||||
// We maybe don't have to escape it, we don't escape other lookalike
|
||||
// characters either, but it's confusing if it goes unquoted.
|
||||
requires_quote = true;
|
||||
}
|
||||
}
|
||||
|
||||
if !requires_quote {
|
||||
return f.write_str(text);
|
||||
} else if is_single_safe {
|
||||
return write_simple(f, text, '\'');
|
||||
} else if is_double_safe {
|
||||
return write_simple(f, text, '\"');
|
||||
} else {
|
||||
return write_single_escaped(f, text);
|
||||
}
|
||||
|
||||
fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result {
|
||||
f.write_char(quote)?;
|
||||
f.write_str(text)?;
|
||||
f.write_char(quote)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result {
|
||||
let mut iter = text.split('\'');
|
||||
if let Some(chunk) = iter.next() {
|
||||
if !chunk.is_empty() {
|
||||
write_simple(f, chunk, '\'')?;
|
||||
}
|
||||
}
|
||||
for chunk in iter {
|
||||
f.write_str("\\'")?;
|
||||
if !chunk.is_empty() {
|
||||
write_simple(f, chunk, '\'')?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write using the syntax described here:
|
||||
/// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
|
||||
///
|
||||
/// Supported by these shells:
|
||||
/// - bash
|
||||
/// - zsh
|
||||
/// - busybox sh
|
||||
/// - mksh
|
||||
///
|
||||
/// Not supported by these:
|
||||
/// - fish
|
||||
/// - dash
|
||||
/// - tcsh
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result {
|
||||
f.write_str("$'")?;
|
||||
for chunk in from_utf8_iter(text.as_bytes()) {
|
||||
match chunk {
|
||||
Ok(chunk) => {
|
||||
for ch in chunk.chars() {
|
||||
match ch {
|
||||
'\n' => f.write_str("\\n")?,
|
||||
'\t' => f.write_str("\\t")?,
|
||||
'\r' => f.write_str("\\r")?,
|
||||
// We could do \b, \f, \v, etc., but those are
|
||||
// rare enough to be confusing.
|
||||
// \0 doesn't work consistently because of the
|
||||
// octal \nnn syntax, and null bytes can't appear
|
||||
// in filenames anyway.
|
||||
ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?,
|
||||
'\\' | '\'' => {
|
||||
// '?' and '"' can also be escaped this way
|
||||
// but AFAICT there's no reason to do so
|
||||
f.write_char('\\')?;
|
||||
f.write_char(ch)?;
|
||||
}
|
||||
ch => {
|
||||
f.write_char(ch)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(unit) => write!(f, "\\x{:02X}", unit)?,
|
||||
}
|
||||
}
|
||||
f.write_char('\'')?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result {
|
||||
// Quotes in Powershell can be escaped by doubling them
|
||||
f.write_char('\'')?;
|
||||
let mut iter = text.split('\'');
|
||||
if let Some(chunk) = iter.next() {
|
||||
f.write_str(chunk)?;
|
||||
}
|
||||
for chunk in iter {
|
||||
f.write_str("''")?;
|
||||
f.write_str(chunk)?;
|
||||
}
|
||||
f.write_char('\'')?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result {
|
||||
// ` takes the role of \ since \ is already used as the path separator.
|
||||
// Things are UTF-16-oriented, so we escape code units as "`u{1234}".
|
||||
use std::char::decode_utf16;
|
||||
use std::os::windows::ffi::OsStrExt;
|
||||
|
||||
f.write_char('"')?;
|
||||
for ch in decode_utf16(text.encode_wide()) {
|
||||
match ch {
|
||||
Ok(ch) => match ch {
|
||||
'\0' => f.write_str("`0")?,
|
||||
'\r' => f.write_str("`r")?,
|
||||
'\n' => f.write_str("`n")?,
|
||||
'\t' => f.write_str("`t")?,
|
||||
ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?,
|
||||
'`' => f.write_str("``")?,
|
||||
'$' => f.write_str("`$")?,
|
||||
'"' => f.write_str("\"\"")?,
|
||||
ch => f.write_char(ch)?,
|
||||
},
|
||||
Err(err) => write!(f, "`u{{{:04X}}}", err.unpaired_surrogate())?,
|
||||
}
|
||||
}
|
||||
f.write_char('"')?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(unix, target_os = "wasi", windows)))]
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
// As a fallback, we use Rust's own escaping rules.
|
||||
// This is reasonably sane and very easy to implement.
|
||||
// We use single quotes because that's hardcoded in a lot of tests.
|
||||
let text = self.text.to_string_lossy();
|
||||
if self.force_quote || !text.chars().all(|ch| ch.is_alphanumeric() || ch == '.') {
|
||||
write!(f, "'{}'", text.escape_debug())
|
||||
} else {
|
||||
f.write_str(&text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator<Item = Result<&str, u8>> {
|
||||
std::iter::from_fn(move || {
|
||||
if bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
match from_utf8(bytes) {
|
||||
Ok(text) => {
|
||||
bytes = &[];
|
||||
Some(Ok(text))
|
||||
}
|
||||
Err(err) if err.valid_up_to() == 0 => {
|
||||
let res = bytes[0];
|
||||
bytes = &bytes[1..];
|
||||
Some(Err(res))
|
||||
}
|
||||
Err(err) => {
|
||||
let (valid, rest) = bytes.split_at(err.valid_up_to());
|
||||
bytes = rest;
|
||||
Some(Ok(from_utf8(valid).unwrap()))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline,
|
||||
/// without losing any information if its encoding is invalid.
|
||||
///
|
||||
/// This function is appropriate for commands where printing paths is the point and the
|
||||
/// output is likely to be captured, like `pwd` and `basename`. For informational output
|
||||
/// use `Quotable::quote`.
|
||||
///
|
||||
/// FIXME: This is lossy on Windows. It could probably be implemented using some low-level
|
||||
/// API that takes UTF-16, without going through io::Write. This is not a big priority
|
||||
/// because broken filenames are much rarer on Windows than on Unix.
|
||||
pub fn println_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
{
|
||||
stdout.write_all(text.as_ref().as_bytes())?;
|
||||
stdout.write_all(b"\n")?;
|
||||
}
|
||||
#[cfg(not(any(unix, target_os = "wasi")))]
|
||||
{
|
||||
writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn verify_quote(cases: &[(impl Quotable, &str)]) {
|
||||
for (case, expected) in cases {
|
||||
assert_eq!(case.quote().to_string(), *expected);
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_maybe(cases: &[(impl Quotable, &str)]) {
|
||||
for (case, expected) in cases {
|
||||
assert_eq!(case.maybe_quote().to_string(), *expected);
|
||||
}
|
||||
}
|
||||
|
||||
/// This should hold on any platform, or else other tests fail.
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
verify_quote(&[
|
||||
("foo", "'foo'"),
|
||||
("", "''"),
|
||||
("foo/bar.baz", "'foo/bar.baz'"),
|
||||
]);
|
||||
verify_maybe(&[
|
||||
("foo", "foo"),
|
||||
("", "''"),
|
||||
("foo bar", "'foo bar'"),
|
||||
("$foo", "'$foo'"),
|
||||
("-", "-"),
|
||||
]);
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi", windows))]
|
||||
#[test]
|
||||
fn test_common() {
|
||||
verify_maybe(&[
|
||||
("a#b", "a#b"),
|
||||
("#ab", "'#ab'"),
|
||||
("a~b", "a~b"),
|
||||
("!", "'!'"),
|
||||
]);
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
#[test]
|
||||
fn test_unix() {
|
||||
verify_quote(&[
|
||||
("can't", r#""can't""#),
|
||||
(r#"can'"t"#, r#"'can'\''"t'"#),
|
||||
(r#"can'$t"#, r#"'can'\''$t'"#),
|
||||
("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#),
|
||||
("foo\x02", r#"$'foo\x02'"#),
|
||||
(r#"'$''"#, r#"\''$'\'\'"#),
|
||||
]);
|
||||
verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]);
|
||||
verify_maybe(&[
|
||||
("-x", "-x"),
|
||||
("a,b", "a,b"),
|
||||
("a\\b", "'a\\b'"),
|
||||
("}", ("}")),
|
||||
]);
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
#[test]
|
||||
fn test_windows() {
|
||||
use std::ffi::OsString;
|
||||
use std::os::windows::ffi::OsStringExt;
|
||||
verify_quote(&[
|
||||
(r#"foo\bar"#, r#"'foo\bar'"#),
|
||||
("can't", r#""can't""#),
|
||||
(r#"can'"t"#, r#"'can''"t'"#),
|
||||
(r#"can'$t"#, r#"'can''$t'"#),
|
||||
("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#),
|
||||
("foo\x02", r#""foo`u{0002}""#),
|
||||
(r#"'$''"#, r#"'''$'''''"#),
|
||||
]);
|
||||
verify_quote(&[(
|
||||
OsString::from_wide(&[b'x' as u16, 0xD800]),
|
||||
r#""x`u{D800}""#,
|
||||
)]);
|
||||
verify_maybe(&[
|
||||
("-x", "'-x'"),
|
||||
("a,b", "'a,b'"),
|
||||
("a\\b", "a\\b"),
|
||||
("}", "'}'"),
|
||||
]);
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
#[test]
|
||||
fn test_utf8_iter() {
|
||||
type ByteStr = &'static [u8];
|
||||
type Chunk = Result<&'static str, u8>;
|
||||
const CASES: &[(ByteStr, &[Chunk])] = &[
|
||||
(b"", &[]),
|
||||
(b"hello", &[Ok("hello")]),
|
||||
// Immediately invalid
|
||||
(b"\xFF", &[Err(b'\xFF')]),
|
||||
// Incomplete UTF-8
|
||||
(b"\xC2", &[Err(b'\xC2')]),
|
||||
(b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]),
|
||||
(b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]),
|
||||
(b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]),
|
||||
(b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]),
|
||||
(b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]),
|
||||
(b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]),
|
||||
(
|
||||
b"foo\xF4\x8Fbar",
|
||||
&[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")],
|
||||
),
|
||||
(
|
||||
b"foo\xFF\xC2bar",
|
||||
&[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")],
|
||||
),
|
||||
];
|
||||
for &(case, expected) in CASES {
|
||||
assert_eq!(
|
||||
from_utf8_iter(case).collect::<Vec<_>>().as_slice(),
|
||||
expected
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -354,6 +354,7 @@ fn test_ls_long_format() {
|
|||
at.mkdir(&at.plus_as_string("test-long-dir/test-long-dir"));
|
||||
|
||||
for arg in &["-l", "--long", "--format=long", "--format=verbose"] {
|
||||
#[allow(unused_variables)]
|
||||
let result = scene.ucmd().arg(arg).arg("test-long-dir").succeeds();
|
||||
// Assuming sane username do not have spaces within them.
|
||||
// A line of the output should be:
|
||||
|
@ -373,6 +374,7 @@ fn test_ls_long_format() {
|
|||
).unwrap());
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
let result = scene.ucmd().arg("-lan").arg("test-long-dir").succeeds();
|
||||
// This checks for the line with the .. entry. The uname and group should be digits.
|
||||
#[cfg(not(windows))]
|
||||
|
@ -1416,6 +1418,7 @@ fn test_ls_quoting_style() {
|
|||
// Default is shell-escape
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--hide-control-chars")
|
||||
.arg("one\ntwo")
|
||||
.succeeds()
|
||||
.stdout_only("'one'$'\\n''two'\n");
|
||||
|
@ -1437,23 +1440,8 @@ fn test_ls_quoting_style() {
|
|||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg(arg)
|
||||
.arg("one\ntwo")
|
||||
.succeeds()
|
||||
.stdout_only(format!("{}\n", correct));
|
||||
}
|
||||
|
||||
for (arg, correct) in &[
|
||||
("--quoting-style=literal", "one?two"),
|
||||
("-N", "one?two"),
|
||||
("--literal", "one?two"),
|
||||
("--quoting-style=shell", "one?two"),
|
||||
("--quoting-style=shell-always", "'one?two'"),
|
||||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg(arg)
|
||||
.arg("--hide-control-chars")
|
||||
.arg(arg)
|
||||
.arg("one\ntwo")
|
||||
.succeeds()
|
||||
.stdout_only(format!("{}\n", correct));
|
||||
|
@ -1463,7 +1451,7 @@ fn test_ls_quoting_style() {
|
|||
("--quoting-style=literal", "one\ntwo"),
|
||||
("-N", "one\ntwo"),
|
||||
("--literal", "one\ntwo"),
|
||||
("--quoting-style=shell", "one\ntwo"),
|
||||
("--quoting-style=shell", "one\ntwo"), // FIXME: GNU ls quotes this case
|
||||
("--quoting-style=shell-always", "'one\ntwo'"),
|
||||
] {
|
||||
scene
|
||||
|
@ -1490,6 +1478,7 @@ fn test_ls_quoting_style() {
|
|||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--hide-control-chars")
|
||||
.arg(arg)
|
||||
.arg("one\\two")
|
||||
.succeeds()
|
||||
|
@ -1505,6 +1494,7 @@ fn test_ls_quoting_style() {
|
|||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--hide-control-chars")
|
||||
.arg(arg)
|
||||
.arg("one\n&two")
|
||||
.succeeds()
|
||||
|
@ -1535,6 +1525,7 @@ fn test_ls_quoting_style() {
|
|||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--hide-control-chars")
|
||||
.arg(arg)
|
||||
.arg("one two")
|
||||
.succeeds()
|
||||
|
@ -1558,6 +1549,7 @@ fn test_ls_quoting_style() {
|
|||
] {
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--hide-control-chars")
|
||||
.arg(arg)
|
||||
.arg("one")
|
||||
.succeeds()
|
||||
|
|
Loading…
Reference in a new issue