numfmt: align format output values with GNU (#1745)

When converting to SI or IEC, produce values that align with the conventions used by GNU numfmt. - values > 10 are represented without a decimal place, so 10000 becomes 10K instead of 10.0K - when truncating, take the ceiling of the value, so 100001 becomes 101K - values < 10 are truncated to the highest tenth, so 1001 becomes 1.1K closes #1726
2024-11-16 09:48:03 +00:00 · 2021-03-06 12:26:05 -05:00 · 2021-03-06 12:26:05 -05:00 · d06f91fbe2
commit d06f91fbe2
parent c06967a45a
7 changed files with 341 additions and 41 deletions
--- a/src/uu/numfmt/src/numfmt.rs
+++ b/src/uu/numfmt/src/numfmt.rs
@ -50,8 +50,9 @@ fn get_usage() -> String {
    format!("{0} [OPTION]... [NUMBER]...", executable!())
 }

+const SI_BASES: [f64; 10] = [1., 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24, 1e27];
+
 const IEC_BASES: [f64; 10] = [
-    //premature optimization
    1.,
    1_024.,
    1_048_576.,
@ -75,6 +76,7 @@ enum Unit {
    None,
 }

+#[derive(Clone, Copy, Debug)]
 enum RawSuffix {
    K,
    M,
@ -201,38 +203,74 @@ fn remove_suffix(i: f64, s: Option<Suffix>, u: &Unit) -> Result<f64> {

 fn transform_from(s: &str, opts: &Transform) -> Result<f64> {
    let (i, suffix) = parse_suffix(s)?;
-    remove_suffix(i, suffix, &opts.unit).map(|n| n.round())
+
+    remove_suffix(i, suffix, &opts.unit).map(|n| if n < 0.0 { -n.abs().ceil() } else { n.ceil() })
 }

-fn consider_suffix(i: f64, u: &Unit) -> Result<(f64, Option<Suffix>)> {
-    let j = i.abs();
-    match *u {
-        Unit::Si => match j {
-            _ if j < 1e3 => Ok((i, None)),
-            _ if j < 1e6 => Ok((i / 1e3, Some((RawSuffix::K, false)))),
-            _ if j < 1e9 => Ok((i / 1e6, Some((RawSuffix::M, false)))),
-            _ if j < 1e12 => Ok((i / 1e9, Some((RawSuffix::G, false)))),
-            _ if j < 1e15 => Ok((i / 1e12, Some((RawSuffix::T, false)))),
-            _ if j < 1e18 => Ok((i / 1e15, Some((RawSuffix::P, false)))),
-            _ if j < 1e21 => Ok((i / 1e18, Some((RawSuffix::E, false)))),
-            _ if j < 1e24 => Ok((i / 1e21, Some((RawSuffix::Z, false)))),
-            _ if j < 1e27 => Ok((i / 1e24, Some((RawSuffix::Y, false)))),
-            _ => Err("Number is too big and unsupported".to_owned()),
-        },
-        Unit::Iec(with_i) => match j {
-            _ if j < IEC_BASES[1] => Ok((i, None)),
-            _ if j < IEC_BASES[2] => Ok((i / IEC_BASES[1], Some((RawSuffix::K, with_i)))),
-            _ if j < IEC_BASES[3] => Ok((i / IEC_BASES[2], Some((RawSuffix::M, with_i)))),
-            _ if j < IEC_BASES[4] => Ok((i / IEC_BASES[3], Some((RawSuffix::G, with_i)))),
-            _ if j < IEC_BASES[5] => Ok((i / IEC_BASES[4], Some((RawSuffix::T, with_i)))),
-            _ if j < IEC_BASES[6] => Ok((i / IEC_BASES[5], Some((RawSuffix::P, with_i)))),
-            _ if j < IEC_BASES[7] => Ok((i / IEC_BASES[6], Some((RawSuffix::E, with_i)))),
-            _ if j < IEC_BASES[8] => Ok((i / IEC_BASES[7], Some((RawSuffix::Z, with_i)))),
-            _ if j < IEC_BASES[9] => Ok((i / IEC_BASES[8], Some((RawSuffix::Y, with_i)))),
-            _ => Err("Number is too big and unsupported".to_owned()),
-        },
-        Unit::Auto => Err("Unit 'auto' isn't supported with --to options".to_owned()),
-        Unit::None => Ok((i, None)),
+/// Divide numerator by denominator, with ceiling.
+///
+/// If the result of the division is less than 10.0, truncate the result
+/// to the next highest tenth.
+///
+/// Otherwise, truncate the result to the next highest whole number.
+///
+/// Examples:
+///
+/// ```
+/// use uu_numfmt::div_ceil;
+///
+/// assert_eq!(div_ceil(1.01, 1.0), 1.1);
+/// assert_eq!(div_ceil(999.1, 1000.), 1.0);
+/// assert_eq!(div_ceil(1001., 10.), 101.);
+/// assert_eq!(div_ceil(9991., 10.), 1000.);
+/// assert_eq!(div_ceil(-12.34, 1.0), -13.0);
+/// assert_eq!(div_ceil(1000.0, -3.14), -319.0);
+/// assert_eq!(div_ceil(-271828.0, -271.0), 1004.0);
+/// ```
+pub fn div_ceil(n: f64, d: f64) -> f64 {
+    let v = n / (d / 10.0);
+    let (v, sign) = if v < 0.0 { (v.abs(), -1.0) } else { (v, 1.0) };
+
+    if v < 100.0 {
+        v.ceil() / 10.0 * sign
+    } else {
+        (v / 10.0).ceil() * sign
+    }
+}
+
+fn consider_suffix(n: f64, u: &Unit) -> Result<(f64, Option<Suffix>)> {
+    use RawSuffix::*;
+
+    let abs_n = n.abs();
+    let suffixes = [K, M, G, T, P, E, Z, Y];
+
+    let (bases, with_i) = match *u {
+        Unit::Si => (&SI_BASES, false),
+        Unit::Iec(with_i) => (&IEC_BASES, with_i),
+        Unit::Auto => return Err("Unit 'auto' isn't supported with --to options".to_owned()),
+        Unit::None => return Ok((n, None)),
+    };
+
+    let i = match abs_n {
+        _ if abs_n <= bases[1] - 1.0 => return Ok((n, None)),
+        _ if abs_n < bases[2] => 1,
+        _ if abs_n < bases[3] => 2,
+        _ if abs_n < bases[4] => 3,
+        _ if abs_n < bases[5] => 4,
+        _ if abs_n < bases[6] => 5,
+        _ if abs_n < bases[7] => 6,
+        _ if abs_n < bases[8] => 7,
+        _ if abs_n < bases[9] => 8,
+        _ => return Err("Number is too big and unsupported".to_string()),
+    };
+
+    let v = div_ceil(n, bases[i]);
+
+    // check if rounding pushed us into the next base
+    if v.abs() >= bases[1] {
+        Ok((v / bases[1], Some((suffixes[i], with_i))))
+    } else {
+        Ok((v, Some((suffixes[i - 1], with_i))))
    }
 }

@ -240,7 +278,8 @@ fn transform_to(s: f64, opts: &Transform) -> Result<String> {
    let (i2, s) = consider_suffix(s, &opts.unit)?;
    Ok(match s {
        None => format!("{}", i2),
-        Some(s) => format!("{:.1}{}", i2, DisplayableSuffix(s)),
+        Some(s) if i2.abs() < 10.0 => format!("{:.1}{}", i2, DisplayableSuffix(s)),
+        Some(s) => format!("{:.0}{}", i2, DisplayableSuffix(s)),
    })
 }

--- a/tests/by-util/test_numfmt.rs
+++ b/tests/by-util/test_numfmt.rs
@ -15,16 +15,25 @@ fn test_from_iec() {
        .args(&["--from=iec"])
        .pipe_in("1024\n1.1M\n0.1G")
        .run()
-        .stdout_is("1024\n1153434\n107374182\n");
+        .stdout_is("1024\n1153434\n107374183\n");
 }

 #[test]
 fn test_from_iec_i() {
    new_ucmd!()
        .args(&["--from=iec-i"])
-        .pipe_in("1024\n1.1Mi\n0.1Gi")
+        .pipe_in("1.1Mi\n0.1Gi")
        .run()
-        .stdout_is("1024\n1153434\n107374182\n");
+        .stdout_is("1153434\n107374183\n");
+}
+
+#[test]
+#[ignore] // FIXME: GNU from iec-i requires suffix
+fn test_from_iec_i_requires_suffix() {
+    new_ucmd!()
+        .args(&["--from=iec-i", "1024"])
+        .fails()
+        .stderr_is("numfmt: missing 'i' suffix in input: ‘1024’ (e.g Ki/Mi/Gi)");
 }

 #[test]
@ -42,7 +51,7 @@ fn test_to_si() {
        .args(&["--to=si"])
        .pipe_in("1000\n1100000\n100000000")
        .run()
-        .stdout_is("1.0K\n1.1M\n100.0M\n");
+        .stdout_is("1.0K\n1.1M\n100M\n");
 }

 #[test]
@ -51,7 +60,7 @@ fn test_to_iec() {
        .args(&["--to=iec"])
        .pipe_in("1024\n1153434\n107374182")
        .run()
-        .stdout_is("1.0K\n1.1M\n102.4M\n");
+        .stdout_is("1.0K\n1.2M\n103M\n");
 }

 #[test]
@ -60,7 +69,7 @@ fn test_to_iec_i() {
        .args(&["--to=iec-i"])
        .pipe_in("1024\n1153434\n107374182")
        .run()
-        .stdout_is("1.0Ki\n1.1Mi\n102.4Mi\n");
+        .stdout_is("1.0Ki\n1.2Mi\n103Mi\n");
 }

 #[test]
@ -142,7 +151,7 @@ fn test_negative() {
        .args(&["--to=iec-i"])
        .pipe_in("-1024\n-1153434\n-107374182")
        .run()
-        .stdout_is("-1.0Ki\n-1.1Mi\n-102.4Mi\n");
+        .stdout_is("-1.0Ki\n-1.2Mi\n-103Mi\n");
 }

 #[test]
@ -159,7 +168,7 @@ fn test_normalize() {
        .args(&["--from=si", "--to=si"])
        .pipe_in("10000000K\n0.001K")
        .run()
-        .stdout_is("10.0G\n1\n");
+        .stdout_is("10G\n1\n");
 }

 #[test]
@ -167,7 +176,7 @@ fn test_si_to_iec() {
    new_ucmd!()
        .args(&["--from=si", "--to=iec", "15334263563K"])
        .run()
-        .stdout_is("13.9T\n");
+        .stdout_is("14T\n");
 }

 #[test]
@ -279,3 +288,30 @@ fn test_should_calculate_implicit_padding_per_free_argument() {
        .run()
        .stdout_is("  1024\n      2000\n");
 }
+
+#[test]
+fn test_to_si_should_truncate_output() {
+    new_ucmd!()
+        .args(&["--to=si"])
+        .pipe_in_fixture("gnutest_si_input.txt")
+        .succeeds()
+        .stdout_is_fixture("gnutest_si_result.txt");
+}
+
+#[test]
+fn test_to_iec_should_truncate_output() {
+    new_ucmd!()
+        .args(&["--to=iec"])
+        .pipe_in_fixture("gnutest_iec_input.txt")
+        .succeeds()
+        .stdout_is_fixture("gnutest_iec_result.txt");
+}
+
+#[test]
+fn test_to_iec_i_should_truncate_output() {
+    new_ucmd!()
+        .args(&["--to=iec-i"])
+        .pipe_in_fixture("gnutest_iec_input.txt")
+        .succeeds()
+        .stdout_is_fixture("gnutest_iec-i_result.txt");
+}
--- a/tests/fixtures/numfmt/gnutest_iec-i_result.txt
+++ b/tests/fixtures/numfmt/gnutest_iec-i_result.txt
@ -0,0 +1,49 @@
+-1.1Ki
+-1.1Ki
+-1.0Ki
+-1.0Ki
+-1023
+0
+1
+1023
+1.0Ki
+1.1Ki
+1.1Ki
+1.2Ki
+1.5Ki
+1.6Ki
+1.9Ki
+2.0Ki
+2.0Ki
+2.0Ki
+2.0Ki
+2.1Ki
+10Ki
+10Ki
+10Ki
+100Ki
+100Ki
+100Ki
+949Ki
+950Ki
+950Ki
+951Ki
+951Ki
+952Ki
+990Ki
+991Ki
+995Ki
+995Ki
+996Ki
+996Ki
+997Ki
+999Ki
+1000Ki
+1023Ki
+1.0Mi
+1.0Mi
+1.0Mi
+1.1Mi
+1.0Gi
+1.0Gi
+1.1Gi
--- a/tests/fixtures/numfmt/gnutest_iec_input.txt
+++ b/tests/fixtures/numfmt/gnutest_iec_input.txt
@ -0,0 +1,49 @@
+-1025
+-1024.1
+-1024
+-1023.1
+-1023
+0
+1
+1023
+1024
+1025
+1126
+1127
+1536
+1537
+1945
+1946
+1996
+1997
+2048
+2049
+10188
+10189
+10240
+102348
+102349
+102400
+971776
+972288
+972800
+972801
+973824
+973825
+1013760
+1013761
+1018879
+1018880
+1018881
+1019904
+1019905
+1022976
+1022977
+1047552
+1047553
+1048575
+1048576
+1048577
+1073741823
+1073741824
+1073741825
--- a/tests/fixtures/numfmt/gnutest_iec_result.txt
+++ b/tests/fixtures/numfmt/gnutest_iec_result.txt
@ -0,0 +1,49 @@
+-1.1K
+-1.1K
+-1.0K
+-1.0K
+-1023
+0
+1
+1023
+1.0K
+1.1K
+1.1K
+1.2K
+1.5K
+1.6K
+1.9K
+2.0K
+2.0K
+2.0K
+2.0K
+2.1K
+10K
+10K
+10K
+100K
+100K
+100K
+949K
+950K
+950K
+951K
+951K
+952K
+990K
+991K
+995K
+995K
+996K
+996K
+997K
+999K
+1000K
+1023K
+1.0M
+1.0M
+1.0M
+1.1M
+1.0G
+1.0G
+1.1G
--- a/tests/fixtures/numfmt/gnutest_si_input.txt
+++ b/tests/fixtures/numfmt/gnutest_si_input.txt
@ -0,0 +1,39 @@
+-1001
+-999.1
+-999
+1
+500
+999
+999.1
+1000
+1000.1
+1001
+9900
+9901
+9949
+9950
+9951
+10000
+10001
+10500
+10999
+50000
+99000
+99001
+99900
+99949
+99950
+100000
+100001
+100999
+101000
+101001
+999000
+999001
+999949
+999950
+999999
+1000000
+1000001
+999000000.1
+999000001
--- a/tests/fixtures/numfmt/gnutest_si_result.txt
+++ b/tests/fixtures/numfmt/gnutest_si_result.txt
@ -0,0 +1,39 @@
+-1.1K
+-1.0K
+-999
+1
+500
+999
+1.0K
+1.0K
+1.1K
+1.1K
+9.9K
+10K
+10K
+10K
+10K
+10K
+11K
+11K
+11K
+50K
+99K
+100K
+100K
+100K
+100K
+100K
+101K
+101K
+101K
+102K
+999K
+1.0M
+1.0M
+1.0M
+1.0M
+1.0M
+1.1M
+1.0G
+1.0G