Merge pull request #2521 from miDeb/sort/rand

sort: improve compatibility of --random-sort
This commit is contained in:
Sylvestre Ledru 2021-07-31 21:00:18 +02:00 committed by GitHub
commit 103a9d52ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 40 deletions

View file

@ -29,7 +29,6 @@ use custom_str_cmp::custom_str_cmp;
use ext_sort::ext_sort;
use fnv::FnvHasher;
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use rayon::prelude::*;
use std::cmp::Ordering;
@ -183,7 +182,7 @@ pub struct GlobalSettings {
unique: bool,
check: bool,
check_silent: bool,
salt: String,
salt: Option<[u8; 16]>,
selectors: Vec<FieldSelector>,
separator: Option<char>,
threads: String,
@ -266,7 +265,7 @@ impl Default for GlobalSettings {
unique: false,
check: false,
check_silent: false,
salt: String::new(),
salt: None,
selectors: vec![],
separator: None,
threads: String::new(),
@ -1006,7 +1005,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} else if matches.is_present(options::modes::RANDOM)
|| matches.value_of(options::modes::SORT) == Some("random")
{
settings.salt = get_rand_string();
settings.salt = Some(get_rand_string());
SortMode::Random
} else {
SortMode::Default
@ -1086,9 +1085,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if let Some(values) = matches.values_of(options::KEY) {
for value in values {
settings
.selectors
.push(FieldSelector::parse(value, &settings));
let selector = FieldSelector::parse(value, &settings);
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
settings.salt = Some(get_rand_string());
}
settings.selectors.push(selector);
}
}
@ -1397,7 +1398,22 @@ fn compare_by<'a>(
let settings = &selector.settings;
let cmp: Ordering = match settings.mode {
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
SortMode::Random => {
// check if the two strings are equal
if custom_str_cmp(
a_str,
b_str,
settings.ignore_non_printing,
settings.dictionary_order,
settings.ignore_case,
) == Ordering::Equal
{
Ordering::Equal
} else {
// Only if they are not equal compare by the hash
random_shuffle(a_str, b_str, &global_settings.salt.unwrap())
}
}
SortMode::Numeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
@ -1546,12 +1562,8 @@ fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult)
a.partial_cmp(b).unwrap()
}
fn get_rand_string() -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.take(16)
.map(char::from)
.collect::<String>()
fn get_rand_string() -> [u8; 16] {
thread_rng().sample(rand::distributions::Standard)
}
fn get_hash<T: Hash>(t: &T) -> u64 {
@ -1560,10 +1572,9 @@ fn get_hash<T: Hash>(t: &T) -> u64 {
s.finish()
}
fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering {
let da = get_hash(&[a, salt].concat());
let db = get_hash(&[b, salt].concat());
fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering {
let da = get_hash(&(a, salt));
let db = get_hash(&(b, salt));
da.cmp(&db)
}

View file

@ -220,32 +220,29 @@ fn test_random_shuffle_contains_all_lines() {
#[test]
fn test_random_shuffle_two_runs_not_the_same() {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the very unlikely event that the random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
for arg in &["-R", "-k1,1R"] {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the very unlikely event that the random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
assert_ne!(result, expected);
assert_ne!(result, unexpected);
assert_ne!(result, expected);
assert_ne!(result, unexpected);
}
}
#[test]
fn test_random_shuffle_contains_two_runs_not_the_same() {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the unlikely event that random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
assert_ne!(result, expected);
assert_ne!(result, unexpected);
fn test_random_ignore_case() {
let input = "ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n";
new_ucmd!()
.args(&["-fR"])
.pipe_in(input)
.succeeds()
.stdout_is(input);
}
#[test]