wc: count_fast windows optimization

This commit is contained in:
zhitkoff 2023-12-02 16:06:19 -05:00 committed by Yury Zhytkou
parent 9ff7b42d83
commit 54ac5a7e1a

View file

@ -20,6 +20,12 @@ use nix::sys::stat;
use std::io::{Seek, SeekFrom}; use std::io::{Seek, SeekFrom};
#[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(any(target_os = "linux", target_os = "android"))]
use std::os::unix::io::AsRawFd; use std::os::unix::io::AsRawFd;
#[cfg(windows)]
use std::os::windows::fs::MetadataExt;
#[cfg(windows)]
const FILE_ATTRIBUTE_ARCHIVE: u32 = 32;
#[cfg(windows)]
const FILE_ATTRIBUTE_NORMAL: u32 = 128;
#[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(any(target_os = "linux", target_os = "android"))]
use libc::S_IFIFO; use libc::S_IFIFO;
@ -76,6 +82,8 @@ fn count_bytes_using_splice(fd: &impl AsRawFd) -> Result<usize, usize> {
/// 1. On Unix, we can simply `stat` the file if it is regular. /// 1. On Unix, we can simply `stat` the file if it is regular.
/// 2. On Linux -- if the above did not work -- we can use splice to count /// 2. On Linux -- if the above did not work -- we can use splice to count
/// the number of bytes if the file is a FIFO. /// the number of bytes if the file is a FIFO.
/// 3. On Windows we can use `std::os::windows::fs::MetadataExt` to get file size
/// for regular files
/// 3. Otherwise, we just read normally, but without the overhead of counting /// 3. Otherwise, we just read normally, but without the overhead of counting
/// other things such as lines and words. /// other things such as lines and words.
#[inline] #[inline]
@ -130,9 +138,12 @@ pub(crate) fn count_bytes_fast<T: WordCountable>(handle: &mut T) -> (usize, Opti
// with size that is NOT a multiple of system page size // with size that is NOT a multiple of system page size
return (stat.st_size as usize, None); return (stat.st_size as usize, None);
} else if let Some(file) = handle.inner_file() { } else if let Some(file) = handle.inner_file() {
// On some platforms `stat.st_blksize` and/or `st.st_size` is of i32 type, // On some platforms `stat.st_blksize` and `st.st_size`
// are of different types: i64 vs i32
// i.e. MacOS on Apple Silicon (aarch64-apple-darwin), // i.e. MacOS on Apple Silicon (aarch64-apple-darwin),
// as well as Debian Linux on ARM (aarch64-unknown-linux-gnu), etc. // Debian Linux on ARM (aarch64-unknown-linux-gnu),
// 32bit i686 targets, etc.
// While on the others they are of the same type.
#[allow(clippy::unnecessary_cast)] #[allow(clippy::unnecessary_cast)]
let offset = let offset =
stat.st_size as i64 - stat.st_size as i64 % (stat.st_blksize as i64 + 1); stat.st_size as i64 - stat.st_size as i64 % (stat.st_blksize as i64 + 1);
@ -156,6 +167,22 @@ pub(crate) fn count_bytes_fast<T: WordCountable>(handle: &mut T) -> (usize, Opti
} }
} }
#[cfg(windows)]
{
if let Some(file) = handle.inner_file() {
if let Ok(metadata) = file.metadata() {
let attributes = metadata.file_attributes();
let size = metadata.file_size();
if (attributes & FILE_ATTRIBUTE_ARCHIVE) != 0
|| (attributes & FILE_ATTRIBUTE_NORMAL) != 0
{
return (size as usize, None);
}
}
}
}
// Fall back on `read`, but without the overhead of counting words and lines. // Fall back on `read`, but without the overhead of counting words and lines.
let mut buf = [0_u8; BUF_SIZE]; let mut buf = [0_u8; BUF_SIZE];
loop { loop {