df: always produce the same order in output table

Change the `filter_mount_list()` function so that it always produces the same order of `MountInfo` objects. This change ultimately results in `df` printing its table of filesystems in the same order on each execution. Previously, the table was in an arbitrary order because the `MountInfo` objects were read from a `HashMap`. Fixes #3086.
2024-11-16 09:48:03 +00:00 · 2022-02-12 21:02:09 -05:00 · 2022-02-12 21:02:09 -05:00 · b6d1aa3e73
commit b6d1aa3e73
parent 042e537ca8
2 changed files with 43 additions and 28 deletions
--- a/src/uu/df/src/df.rs
+++ b/src/uu/df/src/df.rs
@ -14,8 +14,6 @@ use uucore::fsext::{read_fs_list, FsUsage, MountInfo};

 use clap::{crate_version, App, AppSettings, Arg, ArgMatches};

-use std::cell::Cell;
-use std::collections::HashMap;
 use std::collections::HashSet;
 #[cfg(unix)]
 use std::ffi::CString;
@ -218,6 +216,19 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
    true
 }

+/// Whether to prioritize given mount info over all others on the same device.
+///
+/// This function decides whether the mount info `mi` is better than
+/// all others in `previous` that mount the same device as `mi`.
+fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool {
+    for seen in previous {
+        if seen.dev_id == mi.dev_id && mount_info_lt(mi, seen) {
+            return false;
+        }
+    }
+    true
+}
+
 /// Keep only the specified subset of [`MountInfo`] instances.
 ///
 /// If `paths` is non-empty, this function excludes any [`MountInfo`]
@ -229,35 +240,18 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
 /// Finally, if there are duplicate entries, the one with the shorter
 /// path is kept.
 fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
-    let mut mount_info_by_id = HashMap::<String, Cell<MountInfo>>::new();
+    let mut result = vec![];
    for mi in vmi {
-        if !is_included(&mi, paths, opt) {
-            continue;
-        }
-
-        // If the device ID has not been encountered yet, just store it.
-        let id = mi.dev_id.clone();
-        #[allow(clippy::map_entry)]
-        if !mount_info_by_id.contains_key(&id) {
-            mount_info_by_id.insert(id, Cell::new(mi));
-            continue;
-        }
-
-        // Otherwise, if we have seen the current device ID before,
-        // then check if we need to update it or keep the previously
-        // seen one.
-        let seen = mount_info_by_id[&id].replace(mi.clone());
-        if mount_info_lt(&mi, &seen) {
-            mount_info_by_id[&id].replace(seen);
+        // TODO The running time of the `is_best()` function is linear
+        // in the length of `result`. That makes the running time of
+        // this loop quadratic in the length of `vmi`. This could be
+        // improved by a more efficient implementation of `is_best()`,
+        // but `vmi` is probably not very long in practice.
+        if is_included(&mi, paths, opt) && is_best(&result, &mi) {
+            result.push(mi);
        }
    }
-
-    // Take ownership of the `MountInfo` instances and collect them
-    // into a `Vec`.
-    mount_info_by_id
-        .into_values()
-        .map(|m| m.into_inner())
-        .collect()
+    result
 }

 #[uucore::main]
--- a/tests/by-util/test_df.rs
+++ b/tests/by-util/test_df.rs
@ -37,4 +37,25 @@ fn test_df_output() {
    }
 }

+/// Test that the order of rows in the table does not change across executions.
+#[test]
+fn test_order_same() {
+    // TODO When #3057 is resolved, we should just use
+    //
+    //     new_ucmd!().arg("--output=source").succeeds().stdout_move_str();
+    //
+    // instead of parsing the entire `df` table as a string.
+    let output1 = new_ucmd!().succeeds().stdout_move_str();
+    let output2 = new_ucmd!().succeeds().stdout_move_str();
+    let output1: Vec<String> = output1
+        .lines()
+        .map(|l| String::from(l.split_once(' ').unwrap().0))
+        .collect();
+    let output2: Vec<String> = output2
+        .lines()
+        .map(|l| String::from(l.split_once(' ').unwrap().0))
+        .collect();
+    assert_eq!(output1, output2);
+}
+
 // ToDO: more tests...