Fix bug in human_numeric convert

This commit is contained in:
electricboogie 2021-04-25 12:27:11 -05:00
parent cb0c667da5
commit 094d9a9e47
4 changed files with 33 additions and 36 deletions

3
Cargo.lock generated
View file

@ -1432,6 +1432,9 @@ name = "smallvec"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
dependencies = [
"serde",
]
[[package]]
name = "strsim"

View file

@ -23,7 +23,7 @@ clap = "2.33"
fnv = "1.0.7"
itertools = "0.10.0"
semver = "0.9.0"
smallvec = "1.6.1"
smallvec = { version="1.6.1", features=["serde"] }
unicode-width = "0.1.8"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -27,7 +27,7 @@ use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use rayon::prelude::*;
use semver::Version;
use serde::{Deserializer, Deserialize, Serialize};
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
@ -41,6 +41,7 @@ use std::ops::Range;
use std::path::Path;
use unicode_width::UnicodeWidthStr;
use uucore::fs::is_stdin_interactive; // for Iterator::dedup()
use std::path::PathBuf;
static NAME: &str = "sort";
static ABOUT: &str = "Display sorted concatenation of all FILE(s).";
@ -133,22 +134,20 @@ impl GlobalSettings {
// It's back to do conversions for command line opts!
// Probably want to do through numstrcmp somehow now?
fn human_numeric_convert(a: &str) -> usize {
let num_part = leading_num_common(a);
let (_, s) = a.split_at(num_part.len());
let num_part = permissive_f64_parse(num_part);
let suffix = match s.parse().unwrap_or('\0') {
let num_str = &a[get_leading_gen(a)];
let (_, suf_str) = a.split_at(num_str.len());
let num_usize = num_str.parse::<usize>().expect("Error parsing buffer size: ");
let suf_usize: usize = match suf_str.to_uppercase().as_str() {
// SI Units
'K' | 'k' => 1E3,
'M' => 1E6,
'G' => 1E9,
'T' => 1E12,
'P' => 1E15,
'E' => 1E18,
'Z' => 1E21,
'Y' => 1E24,
_ => 1f64,
"K" => 1000usize,
"M" => 1000000usize,
"G" => 1000000000usize,
"T" => 1000000000000usize,
"P" => 1000000000000000usize,
"E" => 1000000000000000000usize,
_ => 1usize,
};
num_part as usize * suffix as usize
num_usize * suf_usize
}
}
@ -236,22 +235,13 @@ impl SelectionRange {
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone)]
enum NumCache {
#[serde(deserialize_with="bailout_parse_f64")]
AsF64(GeneralF64ParseResult),
WithInfo(NumInfo),
None,
}
// Only used when serde can't parse a null value
fn bailout_parse_f64<'de, D>(d: D) -> Result<f64, D::Error> where D: Deserializer<'de> {
Deserialize::deserialize(d)
.map(|x: Option<_>| {
x.unwrap_or(0f64)
})
}
impl NumCache {
fn as_f64(&self) -> GeneralF64ParseResult {
match self {
@ -266,7 +256,7 @@ impl NumCache {
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone)]
struct Selection {
range: SelectionRange,
num_cache: NumCache,
@ -1218,7 +1208,7 @@ fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
if settings.merge {
if settings.unique {
print_sorted(
file_merger.dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal),
file_merger.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal),
&settings,
)
} else {
@ -1228,7 +1218,7 @@ fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
print_sorted(
lines
.into_iter()
.dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal),
.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal),
&settings,
)
} else {
@ -1303,11 +1293,15 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering
// serde JSON has issues with f64 null values, so caching them won't work for us with ext sort
SortMode::GeneralNumeric =>
if global_settings.buffer_size == DEFAULT_BUF_SIZE {
general_numeric_compare(a_selection.num_cache.as_f64(),
b_selection.num_cache.as_f64())
general_numeric_compare(
a_selection.num_cache.as_f64(),
b_selection.num_cache.as_f64()
)
} else {
general_numeric_compare(permissive_f64_parse(get_leading_gen(a_str)),
permissive_f64_parse(get_leading_gen(b_str)))
general_numeric_compare(
general_f64_parse(&a_str[get_leading_gen(a_str)]),
general_f64_parse(&b_str[get_leading_gen(b_str)])
)
},
SortMode::Month => month_compare(a_str, b_str),
SortMode::Version => version_compare(a_str, b_str),
@ -1385,7 +1379,7 @@ fn get_leading_gen(input: &str) -> Range<usize> {
leading_whitespace_len..input.len()
}
#[derive(Copy, Clone, PartialEq, PartialOrd)]
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)]
enum GeneralF64ParseResult {
Invalid,
NaN,

View file

@ -33,7 +33,7 @@ fn test_helper(file_name: &str, args: &str) {
fn test_larger_than_specified_segment() {
new_ucmd!()
.arg("-n")
.arg("-S 50K")
.arg("-S 50M")
.arg("ext_sort.txt")
.succeeds()
.stdout_is_fixture(format!("{}", "ext_sort.expected"));
@ -67,7 +67,7 @@ fn test_extsort_as64_bailout() {
.arg("-S 10K")
.arg("multiple_decimals_general.txt")
.succeeds()
.stdout_is("\n\n\n\n\n\n\n\nCARAvan\n-2028789030\n-896689\n-8.90880\n-1\n-.05\n000\n00000001\n1\n1.040000000\n1.444\n1.58590\n8.013\n45\n46.89\n576,446.88800000\n576,446.890\n 4567.\n4567.1\n4567.34\n\t\t\t\t\t\t\t\t\t\t4567..457\n\t\t\t\t37800\n\t\t\t\t\t\t45670.89079.098\n\t\t\t\t\t\t45670.89079.1\n4798908.340000000000\n4798908.45\n4798908.8909800\n");
.stdout_is_fixture("multiple_decimals_general.expected");
}
#[test]