Merge pull request #6226 from mvo5/du-h-precision

du: give `-h` output the same precision as GNU coreutils
This commit is contained in:
Daniel Hofstetter 2024-04-24 17:42:02 +02:00 committed by GitHub
commit 7cee2c5a2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 113 additions and 59 deletions

1
Cargo.lock generated
View file

@ -3325,6 +3325,7 @@ dependencies = [
"md-5",
"memchr",
"nix",
"number_prefix",
"once_cell",
"os_display",
"sha1",

View file

@ -19,7 +19,7 @@ chrono = { workspace = true }
# For the --exclude & --exclude-from options
glob = { workspace = true }
clap = { workspace = true }
uucore = { workspace = true }
uucore = { workspace = true, features = ["format"] }
[target.'cfg(target_os = "windows")'.dependencies]
windows-sys = { workspace = true, features = [

View file

@ -75,9 +75,6 @@ const ABOUT: &str = help_about!("du.md");
const AFTER_HELP: &str = help_section!("after help", "du.md");
const USAGE: &str = help_usage!("du.md");
// TODO: Support Z & Y (currently limited by size of u64)
const UNITS: [(char, u32); 6] = [('E', 6), ('P', 5), ('T', 4), ('G', 3), ('M', 2), ('K', 1)];
struct TraversalOptions {
all: bool,
separate_dirs: bool,
@ -117,7 +114,8 @@ enum Time {
#[derive(Clone)]
enum SizeFormat {
Human(u64),
HumanDecimal,
HumanBinary,
BlockSize(u64),
}
@ -549,18 +547,14 @@ impl StatPrinter {
return size.to_string();
}
match self.size_format {
SizeFormat::Human(multiplier) => {
if size == 0 {
return "0".to_string();
}
for &(unit, power) in &UNITS {
let limit = multiplier.pow(power);
if size >= limit {
return format!("{:.1}{}", (size as f64) / (limit as f64), unit);
}
}
format!("{size}B")
}
SizeFormat::HumanDecimal => uucore::format::human::human_readable(
size,
uucore::format::human::SizeFormat::Decimal,
),
SizeFormat::HumanBinary => uucore::format::human::human_readable(
size,
uucore::format::human::SizeFormat::Binary,
),
SizeFormat::BlockSize(block_size) => div_ceil(size, block_size).to_string(),
}
}
@ -688,9 +682,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
});
let size_format = if matches.get_flag(options::HUMAN_READABLE) {
SizeFormat::Human(1024)
SizeFormat::HumanBinary
} else if matches.get_flag(options::SI) {
SizeFormat::Human(1000)
SizeFormat::HumanDecimal
} else if matches.get_flag(options::BYTES) {
SizeFormat::BlockSize(1)
} else if matches.get_flag(options::BLOCK_SIZE_1K) {

View file

@ -26,6 +26,7 @@ lscolors = { workspace = true }
uucore = { workspace = true, features = [
"colors",
"entries",
"format",
"fs",
"fsxattr",
"quoting-style",

View file

@ -12,7 +12,6 @@ use clap::{
use glob::{MatchOptions, Pattern};
use lscolors::{LsColors, Style};
use number_prefix::NumberPrefix;
use std::{cell::OnceCell, num::IntErrorKind};
use std::{collections::HashSet, io::IsTerminal};
@ -37,6 +36,7 @@ use std::{
use term_grid::{Cell, Direction, Filling, Grid, GridOptions};
use unicode_width::UnicodeWidthStr;
use uucore::error::USimpleError;
use uucore::format::human::{human_readable, SizeFormat};
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
use uucore::fsxattr::has_acl;
#[cfg(any(
@ -313,13 +313,6 @@ enum Sort {
Width,
}
#[derive(PartialEq)]
enum SizeFormat {
Bytes,
Binary, // Powers of 1024, --human-readable, -h
Decimal, // Powers of 1000, --si
}
#[derive(PartialEq, Eq)]
enum Files {
All,
@ -3038,30 +3031,6 @@ fn display_date(metadata: &Metadata, config: &Config) -> String {
}
}
// There are a few peculiarities to how GNU formats the sizes:
// 1. One decimal place is given if and only if the size is smaller than 10
// 2. It rounds sizes up.
// 3. The human-readable format uses powers for 1024, but does not display the "i"
// that is commonly used to denote Kibi, Mebi, etc.
// 4. Kibi and Kilo are denoted differently ("k" and "K", respectively)
fn format_prefixed(prefixed: &NumberPrefix<f64>) -> String {
match prefixed {
NumberPrefix::Standalone(bytes) => bytes.to_string(),
NumberPrefix::Prefixed(prefix, bytes) => {
// Remove the "i" from "Ki", "Mi", etc. if present
let prefix_str = prefix.symbol().trim_end_matches('i');
// Check whether we get more than 10 if we round up to the first decimal
// because we want do display 9.81 as "9.9", not as "10".
if (10.0 * bytes).ceil() >= 100.0 {
format!("{:.0}{}", bytes.ceil(), prefix_str)
} else {
format!("{:.1}{}", (10.0 * bytes).ceil() / 10.0, prefix_str)
}
}
}
}
#[allow(dead_code)]
enum SizeOrDeviceId {
Size(String),
@ -3104,13 +3073,7 @@ fn display_len_or_rdev(metadata: &Metadata, config: &Config) -> SizeOrDeviceId {
}
fn display_size(size: u64, config: &Config) -> String {
// NOTE: The human-readable behavior deviates from the GNU ls.
// The GNU ls uses binary prefixes by default.
match config.size_format {
SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)),
SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)),
SizeFormat::Bytes => size.to_string(),
}
human_readable(size, config.size_format)
}
#[cfg(unix)]

View file

@ -20,6 +20,7 @@ path = "src/lib/lib.rs"
[dependencies]
clap = { workspace = true }
uucore_procs = { workspace = true }
number_prefix = { workspace = true }
dns-lookup = { version = "2.0.4", optional = true }
dunce = { version = "1.0.4", optional = true }
wild = "2.2"

View file

@ -0,0 +1,65 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore gnulibs sfmt
//! `human`-size formatting
//!
//! Format sizes like gnulibs human_readable() would
use number_prefix::NumberPrefix;
#[derive(Copy, Clone, PartialEq)]
pub enum SizeFormat {
Bytes,
Binary, // Powers of 1024, --human-readable, -h
Decimal, // Powers of 1000, --si
}
// There are a few peculiarities to how GNU formats the sizes:
// 1. One decimal place is given if and only if the size is smaller than 10
// 2. It rounds sizes up.
// 3. The human-readable format uses powers for 1024, but does not display the "i"
// that is commonly used to denote Kibi, Mebi, etc.
// 4. Kibi and Kilo are denoted differently ("k" and "K", respectively)
fn format_prefixed(prefixed: &NumberPrefix<f64>) -> String {
match prefixed {
NumberPrefix::Standalone(bytes) => bytes.to_string(),
NumberPrefix::Prefixed(prefix, bytes) => {
// Remove the "i" from "Ki", "Mi", etc. if present
let prefix_str = prefix.symbol().trim_end_matches('i');
// Check whether we get more than 10 if we round up to the first decimal
// because we want do display 9.81 as "9.9", not as "10".
if (10.0 * bytes).ceil() >= 100.0 {
format!("{:.0}{}", bytes.ceil(), prefix_str)
} else {
format!("{:.1}{}", (10.0 * bytes).ceil() / 10.0, prefix_str)
}
}
}
}
pub fn human_readable(size: u64, sfmt: SizeFormat) -> String {
match sfmt {
SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)),
SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)),
SizeFormat::Bytes => size.to_string(),
}
}
#[cfg(test)]
#[test]
fn test_human_readable() {
let test_cases = [
(133456345, SizeFormat::Binary, "128M"),
(12 * 1024 * 1024, SizeFormat::Binary, "12M"),
(8500, SizeFormat::Binary, "8.4K"),
];
for &(size, sfmt, expected_str) in &test_cases {
assert_eq!(human_readable(size, sfmt), expected_str);
}
}

View file

@ -32,6 +32,7 @@
mod argument;
mod escape;
pub mod human;
pub mod num_format;
pub mod num_parser;
mod spec;

View file

@ -3,7 +3,7 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist testdir testfile
// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile
#[cfg(not(windows))]
use regex::Regex;
@ -543,6 +543,34 @@ fn test_du_h_flag_empty_file() {
.stdout_only("0\tempty.txt\n");
}
#[test]
fn test_du_h_precision() {
let test_cases = [
(133456345, "128M"),
(12 * 1024 * 1024, "12M"),
(8500, "8.4K"),
];
for &(test_len, expected_output) in &test_cases {
let (at, mut ucmd) = at_and_ucmd!();
let fpath = at.plus("test.txt");
std::fs::File::create(&fpath)
.expect("cannot create test file")
.set_len(test_len)
.expect("cannot truncate test len to size");
ucmd.arg("-h")
.arg("--apparent-size")
.arg(&fpath)
.succeeds()
.stdout_only(format!(
"{}\t{}\n",
expected_output,
&fpath.to_string_lossy()
));
}
}
#[cfg(feature = "touch")]
#[test]
fn test_du_time() {