mirror of
https://github.com/uutils/coreutils
synced 2025-01-07 10:49:09 +00:00
uucore: add options to parse_size::Parser to support parse_byte_count() in sort.rs
This commit is contained in:
parent
b8a755a396
commit
ef8f92f720
3 changed files with 106 additions and 27 deletions
|
@ -501,6 +501,7 @@ fn parse_bytes_only(s: &str) -> Result<u64, ParseError> {
|
||||||
fn parse_bytes_no_x(full: &str, s: &str) -> Result<u64, ParseError> {
|
fn parse_bytes_no_x(full: &str, s: &str) -> Result<u64, ParseError> {
|
||||||
let parser = SizeParser {
|
let parser = SizeParser {
|
||||||
capital_b_bytes: true,
|
capital_b_bytes: true,
|
||||||
|
..Default::default()
|
||||||
};
|
};
|
||||||
let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
|
let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
|
||||||
(None, None, None) => match parser.parse(s) {
|
(None, None, None) => match parser.parse(s) {
|
||||||
|
|
|
@ -46,7 +46,7 @@ use unicode_width::UnicodeWidthStr;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{set_exit_code, strip_errno, UError, UResult, USimpleError, UUsageError};
|
use uucore::error::{set_exit_code, strip_errno, UError, UResult, USimpleError, UUsageError};
|
||||||
use uucore::format_usage;
|
use uucore::format_usage;
|
||||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
use uucore::parse_size::{ParseSizeError, Parser};
|
||||||
use uucore::version_cmp::version_cmp;
|
use uucore::version_cmp::version_cmp;
|
||||||
|
|
||||||
use crate::tmp_dir::TmpDirWrapper;
|
use crate::tmp_dir::TmpDirWrapper;
|
||||||
|
@ -342,30 +342,20 @@ impl GlobalSettings {
|
||||||
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
|
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
|
||||||
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
|
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
|
||||||
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
|
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
|
||||||
const ALLOW_LIST: &[char] = &[
|
let size = Parser::default()
|
||||||
'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y',
|
.with_allow_list(&[
|
||||||
];
|
"b", "k", "K", "m", "M", "g", "G", "t", "T", "P", "E", "Z", "Y",
|
||||||
let mut size_string = input.trim().to_string();
|
])
|
||||||
|
.with_default_unit("K")
|
||||||
|
.with_b_byte_count(true)
|
||||||
|
.parse(input.trim())?;
|
||||||
|
|
||||||
if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c) || c.is_ascii_digit()) {
|
|
||||||
// b 1, K 1024 (default)
|
|
||||||
if size_string.ends_with(|c: char| c.is_ascii_digit()) {
|
|
||||||
size_string.push('K');
|
|
||||||
} else if size_string.ends_with('b') {
|
|
||||||
size_string.pop();
|
|
||||||
}
|
|
||||||
let size = parse_size(&size_string)?;
|
|
||||||
usize::try_from(size).map_err(|_| {
|
usize::try_from(size).map_err(|_| {
|
||||||
ParseSizeError::SizeTooBig(format!(
|
ParseSizeError::SizeTooBig(format!(
|
||||||
"Buffer size {} does not fit in address space",
|
"Buffer size {} does not fit in address space",
|
||||||
size
|
size
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
} else if size_string.starts_with(|c: char| c.is_ascii_digit()) {
|
|
||||||
Err(ParseSizeError::InvalidSuffix("invalid suffix".to_string()))
|
|
||||||
} else {
|
|
||||||
Err(ParseSizeError::ParseFailure("parse failure".to_string()))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Precompute some data needed for sorting.
|
/// Precompute some data needed for sorting.
|
||||||
|
|
|
@ -14,12 +14,33 @@ use crate::display::Quotable;
|
||||||
///
|
///
|
||||||
/// The [`Parser::parse`] function performs the parse.
|
/// The [`Parser::parse`] function performs the parse.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Parser {
|
pub struct Parser<'parser> {
|
||||||
/// Whether to treat the suffix "B" as meaning "bytes".
|
/// Whether to treat the suffix "B" as meaning "bytes".
|
||||||
pub capital_b_bytes: bool,
|
pub capital_b_bytes: bool,
|
||||||
|
/// Whether to treat "b" as a "byte count" instead of "block"
|
||||||
|
pub b_byte_count: bool,
|
||||||
|
/// Whitelist for the suffix
|
||||||
|
pub allow_list: Option<&'parser [&'parser str]>,
|
||||||
|
/// Default unit when no suffix is provided
|
||||||
|
pub default_unit: Option<&'parser str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser {
|
impl<'parser> Parser<'parser> {
|
||||||
|
pub fn with_allow_list(&mut self, allow_list: &'parser [&str]) -> &mut Self {
|
||||||
|
self.allow_list = Some(allow_list);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_default_unit(&mut self, default_unit: &'parser str) -> &mut Self {
|
||||||
|
self.default_unit = Some(default_unit);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_b_byte_count(&mut self, value: bool) -> &mut Self {
|
||||||
|
self.b_byte_count = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a size string into a number of bytes.
|
/// Parse a size string into a number of bytes.
|
||||||
///
|
///
|
||||||
/// A size string comprises an integer and an optional unit. The unit
|
/// A size string comprises an integer and an optional unit. The unit
|
||||||
|
@ -66,7 +87,34 @@ impl Parser {
|
||||||
// The lowercase "b" (used by `od`, `head`, `tail`, etc.) means
|
// The lowercase "b" (used by `od`, `head`, `tail`, etc.) means
|
||||||
// "block" and the Posix block size is 512. The uppercase "B"
|
// "block" and the Posix block size is 512. The uppercase "B"
|
||||||
// means "byte".
|
// means "byte".
|
||||||
let unit = &size[numeric_string.len()..];
|
let mut unit: &str = &size[numeric_string.len()..];
|
||||||
|
|
||||||
|
if let Some(default_unit) = self.default_unit {
|
||||||
|
// Check if `unit` is empty then assigns `default_unit` to `unit`
|
||||||
|
if unit.is_empty() {
|
||||||
|
unit = default_unit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if `b` is a byte count and remove `b`
|
||||||
|
if self.b_byte_count && unit.ends_with('b') {
|
||||||
|
// If `unit` = 'b' then return error
|
||||||
|
if numeric_string.is_empty() {
|
||||||
|
return Err(ParseSizeError::parse_failure(size));
|
||||||
|
}
|
||||||
|
unit = &unit[0..unit.len() - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(allow_list) = self.allow_list {
|
||||||
|
// Check if `unit` appears in `allow_list`, if not return error
|
||||||
|
if !allow_list.contains(&unit) && !unit.is_empty() {
|
||||||
|
if numeric_string.is_empty() {
|
||||||
|
return Err(ParseSizeError::parse_failure(size));
|
||||||
|
}
|
||||||
|
return Err(ParseSizeError::invalid_suffix(size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let (base, exponent): (u128, u32) = match unit {
|
let (base, exponent): (u128, u32) = match unit {
|
||||||
"" => (1, 0),
|
"" => (1, 0),
|
||||||
"B" if self.capital_b_bytes => (1, 0),
|
"B" if self.capital_b_bytes => (1, 0),
|
||||||
|
@ -362,4 +410,44 @@ mod tests {
|
||||||
assert_eq!(Ok(2_000_000_000_000_000), parse_size("2PB"));
|
assert_eq!(Ok(2_000_000_000_000_000), parse_size("2PB"));
|
||||||
assert_eq!(Ok(2_000_000_000_000_000_000), parse_size("2EB"));
|
assert_eq!(Ok(2_000_000_000_000_000_000), parse_size("2EB"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_size_options() {
|
||||||
|
let mut parser = Parser::default();
|
||||||
|
|
||||||
|
parser
|
||||||
|
.with_allow_list(&["k", "K", "G", "MB", "M"])
|
||||||
|
.with_default_unit("K");
|
||||||
|
|
||||||
|
assert_eq!(Ok(1024), parser.parse("1"));
|
||||||
|
assert_eq!(Ok(2 * 1024), parser.parse("2"));
|
||||||
|
assert_eq!(Ok(1 * 1000 * 1000), parser.parse("1MB"));
|
||||||
|
assert_eq!(Ok(1 * 1024 * 1024), parser.parse("1M"));
|
||||||
|
assert_eq!(Ok(1 * 1024 * 1024 * 1024), parser.parse("1G"));
|
||||||
|
|
||||||
|
assert!(parser.parse("1T").is_err());
|
||||||
|
assert!(parser.parse("1P").is_err());
|
||||||
|
assert!(parser.parse("1E").is_err());
|
||||||
|
|
||||||
|
parser
|
||||||
|
.with_allow_list(&[
|
||||||
|
"b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y",
|
||||||
|
])
|
||||||
|
.with_default_unit("K")
|
||||||
|
.with_b_byte_count(true);
|
||||||
|
|
||||||
|
assert_eq!(Ok(1024), parser.parse("1"));
|
||||||
|
assert_eq!(Ok(2 * 1024), parser.parse("2"));
|
||||||
|
assert_eq!(Ok(1 * 1000 * 1000), parser.parse("1MB"));
|
||||||
|
assert_eq!(Ok(1 * 1024 * 1024), parser.parse("1M"));
|
||||||
|
assert_eq!(Ok(1 * 1024 * 1024 * 1024), parser.parse("1G"));
|
||||||
|
|
||||||
|
assert_eq!(Ok(1), parser.parse("1b"));
|
||||||
|
assert_eq!(Ok(1024), parser.parse("1024b"));
|
||||||
|
assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1024Mb"));
|
||||||
|
|
||||||
|
assert!(parser.parse("b").is_err());
|
||||||
|
assert!(parser.parse("1B").is_err());
|
||||||
|
assert!(parser.parse("B").is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue