cp: Implement --sparse flag (#3766)

* cp: Refactor `reflink`/`sparse` handling to enable `--sparse` flag

`--sparse` and `--reflink` options have a lot of similarities:
 - They have similar options (`always`, `never`, `auto`)
 - Both need OS specific handling
 - They can be mutually exclusive

Prior to this change, `sparse` was defined as `CopyMode`, but `reflink`
wasn't. Given the similarities, it makes sense to handle them similarly.

The idea behind this change is to move all OS specific file copy
handling in the `copy_on_write_*` functions. Those function then
dispatch to the correct logic depending on the arguments (at the moment,
the tuple `(reflink, sparse)`).

Also, move the handling of `--reflink=never` from `copy_file` to the
`copy_on_write_*` functions, at the cost of a bit of code duplication,
to allow `copy_on_write_*` to handle all cases (and later handle
`--reflink=never` with `--sparse`).

* cp: Implement `--sparse` flag

This begins to address #3362

At the moment, only the `--sparse=always` logic matches the requirement
form GNU cp info page, i.e. always make holes in destination when
possible.

Sparse copy is done by copying the source to the destination block by
block (blocks being of the destination's fs block size). If the block
only holds NUL bytes, we don't write to the destination.

About `--sparse=auto`: according to GNU cp info page, the destination
file will be made sparse if the source file is sparse as well. The next
step are likely to use `lseek` with `SEEK_HOLE` detect if the source
file has holes. Currently, this has the same behaviour as
`--sparse=never`. This `SEEK_HOLE` logic can also be applied to
`--sparse=always` to improve performance when copying sparse files.

About `--sparse=never`: from my understanding, it is not guaranteed that
Rust's `fs::copy` will always produce a file with no holes, as
["platform-specific behavior may change in the
future"](https://doc.rust-lang.org/std/fs/fn.copy.html#platform-specific-behavior)

About other platforms:
 - `macos`: The solution may be to use `fcntl` command `F_PUNCHHOLE`.
 - `windows`: I only see `FSCTL_SET_SPARSE`.

This should pass the following GNU tests:
 - `tests/cp/sparse.sh`
 - `tests/cp/sparse-2.sh`
 - `tests/cp/sparse-extents.sh`
 - `tests/cp/sparse-extents-2.sh`

`sparse-perf.sh` needs `--sparse=auto`, and in particular a way to skip
holes in the source file.

Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
This commit is contained in:
Pierre Marsais 2022-08-04 12:22:59 +01:00 committed by GitHub
parent 90a9829287
commit e1991525af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 239 additions and 33 deletions

View file

@ -9,7 +9,7 @@
// For the full copyright and license information, please view the LICENSE file
// that was distributed with this source code.
// spell-checker:ignore (ToDO) ficlone linkgs lstat nlink nlinks pathbuf reflink strs xattrs symlinked
// spell-checker:ignore (ToDO) ficlone ftruncate linkgs lstat nlink nlinks pathbuf pwrite reflink strs xattrs symlinked
#[macro_use]
extern crate quick_error;
@ -165,6 +165,14 @@ pub enum ReflinkMode {
Never,
}
/// Possible arguments for `--sparse`.
#[derive(Copy, Clone, Eq, PartialEq)]
pub enum SparseMode {
Always,
Auto,
Never,
}
/// Specifies the expected file type of copy target
pub enum TargetType {
Directory,
@ -174,7 +182,6 @@ pub enum TargetType {
pub enum CopyMode {
Link,
SymLink,
Sparse,
Copy,
Update,
AttrOnly,
@ -206,6 +213,7 @@ pub struct Options {
one_file_system: bool,
overwrite: OverwriteMode,
parents: bool,
sparse_mode: SparseMode,
strip_trailing_slashes: bool,
reflink_mode: ReflinkMode,
preserve_attributes: Vec<Attribute>,
@ -439,17 +447,18 @@ pub fn uu_app<'a>() -> Command<'a> {
.short('x')
.long(options::ONE_FILE_SYSTEM)
.help("stay on this file system"))
.arg(Arg::new(options::SPARSE)
.long(options::SPARSE)
.takes_value(true)
.value_name("WHEN")
.possible_values(["never", "auto", "always"])
.help("NotImplemented: control creation of sparse files. See below"))
// TODO: implement the following args
.arg(Arg::new(options::COPY_CONTENTS)
.long(options::COPY_CONTENTS)
.overrides_with(options::ATTRIBUTES_ONLY)
.help("NotImplemented: copy contents of special files when recursive"))
.arg(Arg::new(options::SPARSE)
.long(options::SPARSE)
.takes_value(true)
.value_name("WHEN")
.help("NotImplemented: control creation of sparse files. See below"))
.arg(Arg::new(options::CONTEXT)
.long(options::CONTEXT)
.takes_value(true)
@ -545,8 +554,6 @@ impl CopyMode {
Self::Link
} else if matches.contains_id(options::SYMBOLIC_LINK) {
Self::SymLink
} else if matches.contains_id(options::SPARSE) {
Self::Sparse
} else if matches.contains_id(options::UPDATE) {
Self::Update
} else if matches.contains_id(options::ATTRIBUTES_ONLY) {
@ -601,7 +608,6 @@ impl Options {
fn from_matches(matches: &ArgMatches) -> CopyResult<Self> {
let not_implemented_opts = vec![
options::COPY_CONTENTS,
options::SPARSE,
#[cfg(not(any(windows, unix)))]
options::ONE_FILE_SYSTEM,
options::CONTEXT,
@ -710,6 +716,18 @@ impl Options {
}
}
},
sparse_mode: match matches.value_of(options::SPARSE) {
Some("always") => SparseMode::Always,
Some("auto") => SparseMode::Auto,
Some("never") => SparseMode::Never,
Some(val) => {
return Err(Error::InvalidArgument(format!(
"invalid argument {} for \'sparse\'",
val
)));
}
None => SparseMode::Auto,
},
backup: backup_mode,
backup_suffix,
overwrite,
@ -1376,7 +1394,6 @@ fn copy_file(
CopyMode::SymLink => {
symlink_file(&source, &dest, context, symlinked_files)?;
}
CopyMode::Sparse => return Err(Error::NotImplemented(options::SPARSE.to_string())),
CopyMode::Update => {
if dest.exists() {
let src_metadata = fs::symlink_metadata(&source)?;
@ -1461,18 +1478,33 @@ fn copy_helper(
copy_fifo(dest, options.overwrite)?;
} else if source_is_symlink {
copy_link(source, dest, symlinked_files)?;
} else if options.reflink_mode != ReflinkMode::Never {
#[cfg(not(any(target_os = "linux", target_os = "android", target_os = "macos")))]
return Err("--reflink is only supported on linux and macOS"
.to_string()
.into());
#[cfg(target_os = "macos")]
copy_on_write_macos(source, dest, options.reflink_mode, context)?;
#[cfg(any(target_os = "linux", target_os = "android"))]
copy_on_write_linux(source, dest, options.reflink_mode, context)?;
} else {
fs::copy(source, dest).context(context)?;
#[cfg(target_os = "macos")]
copy_on_write_macos(
source,
dest,
options.reflink_mode,
options.sparse_mode,
context,
)?;
#[cfg(any(target_os = "linux", target_os = "android"))]
copy_on_write_linux(
source,
dest,
options.reflink_mode,
options.sparse_mode,
context,
)?;
#[cfg(not(any(target_os = "linux", target_os = "android", target_os = "macos")))]
copy_no_cow_fallback(
source,
dest,
options.reflink_mode,
options.sparse_mode,
context,
)?;
}
Ok(())
@ -1522,25 +1554,50 @@ fn copy_link(
symlink_file(&link, &dest, &context_for(&link, &dest), symlinked_files)
}
/// Copies `source` to `dest` for systems without copy-on-write
#[cfg(not(any(target_os = "linux", target_os = "android", target_os = "macos")))]
fn copy_no_cow_fallback(
source: &Path,
dest: &Path,
reflink_mode: ReflinkMode,
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
if reflink_mode != ReflinkMode::Never {
return Err("--reflink is only supported on linux and macOS"
.to_string()
.into());
}
if sparse_mode != SparseMode::Auto {
return Err("--sparse is only supported on linux".to_string().into());
}
fs::copy(source, dest).context(context)?;
Ok(())
}
/// Copies `source` to `dest` using copy-on-write if possible.
#[cfg(any(target_os = "linux", target_os = "android"))]
fn copy_on_write_linux(
source: &Path,
dest: &Path,
mode: ReflinkMode,
reflink_mode: ReflinkMode,
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
debug_assert!(mode != ReflinkMode::Never);
use std::os::unix::prelude::MetadataExt;
let src_file = File::open(source).context(context)?;
let mut src_file = File::open(source).context(context)?;
let dst_file = OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(dest)
.context(context)?;
match mode {
ReflinkMode::Always => unsafe {
match (reflink_mode, sparse_mode) {
(ReflinkMode::Always, SparseMode::Auto) => unsafe {
let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd());
if result != 0 {
@ -1555,7 +1612,43 @@ fn copy_on_write_linux(
Ok(())
}
},
ReflinkMode::Auto => unsafe {
(ReflinkMode::Always, SparseMode::Always) | (ReflinkMode::Always, SparseMode::Never) => {
Err("`--reflink=always` can be used only with --sparse=auto".into())
}
(_, SparseMode::Always) => unsafe {
let size: usize = src_file.metadata()?.size().try_into().unwrap();
if libc::ftruncate(dst_file.as_raw_fd(), size.try_into().unwrap()) < 0 {
return Err(format!(
"failed to ftruncate {:?} to size {}: {}",
dest,
size,
std::io::Error::last_os_error()
)
.into());
}
let blksize = dst_file.metadata()?.blksize();
let mut buf: Vec<u8> = vec![0; blksize.try_into().unwrap()];
let mut current_offset: usize = 0;
while current_offset < size {
use std::io::Read;
let this_read = src_file.read(&mut buf)?;
if buf.iter().any(|&x| x != 0) {
libc::pwrite(
dst_file.as_raw_fd(),
buf.as_ptr() as *const libc::c_void,
this_read,
current_offset.try_into().unwrap(),
);
}
current_offset += this_read;
}
Ok(())
},
(ReflinkMode::Auto, SparseMode::Auto) | (ReflinkMode::Auto, SparseMode::Never) => unsafe {
let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd());
if result != 0 {
@ -1563,7 +1656,10 @@ fn copy_on_write_linux(
}
Ok(())
},
ReflinkMode::Never => unreachable!(),
(ReflinkMode::Never, _) => {
fs::copy(source, dest).context(context)?;
Ok(())
}
}
}
@ -1572,10 +1668,13 @@ fn copy_on_write_linux(
fn copy_on_write_macos(
source: &Path,
dest: &Path,
mode: ReflinkMode,
reflink_mode: ReflinkMode,
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
debug_assert!(mode != ReflinkMode::Never);
if sparse_mode != SparseMode::Auto {
return Err("--sparse is only supported on linux".to_string().into());
}
// Extract paths in a form suitable to be passed to a syscall.
// The unwrap() is safe because they come from the command-line and so contain non nul
@ -1612,14 +1711,14 @@ fn copy_on_write_macos(
if raw_pfn.is_null() || error != 0 {
// clonefile(2) is either not supported or it errored out (possibly because the FS does not
// support COW).
match mode {
match reflink_mode {
ReflinkMode::Always => {
return Err(
format!("failed to clone {:?} from {:?}: {}", source, dest, error).into(),
)
}
ReflinkMode::Auto => fs::copy(source, dest).context(context)?,
ReflinkMode::Never => unreachable!(),
ReflinkMode::Never => fs::copy(source, dest).context(context)?,
};
}

View file

@ -1388,6 +1388,113 @@ fn test_closes_file_descriptors() {
.succeeds();
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_never_empty() {
let (at, mut ucmd) = at_and_ucmd!();
const BUFFER_SIZE: usize = 4096 * 4;
let buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];
at.make_file("src_file1");
at.write_bytes("src_file1", &buf);
ucmd.args(&["--sparse=never", "src_file1", "dst_file_non_sparse"])
.succeeds();
assert_eq!(at.read_bytes("dst_file_non_sparse"), buf);
assert_eq!(
at.metadata("dst_file_non_sparse").blocks() * 512,
buf.len() as u64
);
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_empty() {
let (at, mut ucmd) = at_and_ucmd!();
const BUFFER_SIZE: usize = 4096 * 4;
let buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];
at.make_file("src_file1");
at.write_bytes("src_file1", &buf);
ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"])
.succeeds();
assert_eq!(at.read_bytes("dst_file_sparse"), buf);
assert_eq!(at.metadata("dst_file_sparse").blocks(), 0);
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_non_empty() {
let (at, mut ucmd) = at_and_ucmd!();
const BUFFER_SIZE: usize = 4096 * 16 + 3;
let mut buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];
let blocks_to_touch = [buf.len() / 3, 2 * (buf.len() / 3)];
for i in blocks_to_touch {
buf[i] = b'x';
}
at.make_file("src_file1");
at.write_bytes("src_file1", &buf);
ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"])
.succeeds();
let touched_block_count =
blocks_to_touch.len() as u64 * at.metadata("dst_file_sparse").blksize() / 512;
assert_eq!(at.read_bytes("dst_file_sparse"), buf);
assert_eq!(at.metadata("dst_file_sparse").blocks(), touched_block_count);
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_invalid_option() {
let (at, mut ucmd) = at_and_ucmd!();
at.make_file("src_file1");
ucmd.args(&["--sparse=invalid", "src_file1", "dst_file"])
.fails();
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_reflink_always() {
let (at, mut ucmd) = at_and_ucmd!();
at.make_file("src_file1");
ucmd.args(&[
"--sparse=always",
"--reflink=always",
"src_file1",
"dst_file",
])
.fails();
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_never_reflink_always() {
let (at, mut ucmd) = at_and_ucmd!();
at.make_file("src_file1");
ucmd.args(&[
"--sparse=never",
"--reflink=always",
"src_file1",
"dst_file",
])
.fails();
}
#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_reflink_always_override() {