diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 657d29e46..391287062 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -501,6 +501,7 @@ fn parse_bytes_only(s: &str) -> Result { fn parse_bytes_no_x(full: &str, s: &str) -> Result { let parser = SizeParser { capital_b_bytes: true, + ..Default::default() }; let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) { (None, None, None) => match parser.parse(s) { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index dc5cfd91e..ae03fcfc7 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -46,7 +46,7 @@ use unicode_width::UnicodeWidthStr; use uucore::display::Quotable; use uucore::error::{set_exit_code, strip_errno, UError, UResult, USimpleError, UUsageError}; use uucore::format_usage; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{ParseSizeError, Parser}; use uucore::version_cmp::version_cmp; use crate::tmp_dir::TmpDirWrapper; @@ -342,30 +342,20 @@ impl GlobalSettings { fn parse_byte_count(input: &str) -> Result { // GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y // GNU sort (8.32) invalid: b, B, 1B, p, e, z, y - const ALLOW_LIST: &[char] = &[ - 'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y', - ]; - let mut size_string = input.trim().to_string(); + let size = Parser::default() + .with_allow_list(&[ + "b", "k", "K", "m", "M", "g", "G", "t", "T", "P", "E", "Z", "Y", + ]) + .with_default_unit("K") + .with_b_byte_count(true) + .parse(input.trim())?; - if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c) || c.is_ascii_digit()) { - // b 1, K 1024 (default) - if size_string.ends_with(|c: char| c.is_ascii_digit()) { - size_string.push('K'); - } else if size_string.ends_with('b') { - size_string.pop(); - } - let size = parse_size(&size_string)?; - usize::try_from(size).map_err(|_| { - ParseSizeError::SizeTooBig(format!( - "Buffer size {} does not fit in address space", - size - )) - }) - } else if size_string.starts_with(|c: char| c.is_ascii_digit()) { - Err(ParseSizeError::InvalidSuffix("invalid suffix".to_string())) - } else { - Err(ParseSizeError::ParseFailure("parse failure".to_string())) - } + usize::try_from(size).map_err(|_| { + ParseSizeError::SizeTooBig(format!( + "Buffer size {} does not fit in address space", + size + )) + }) } /// Precompute some data needed for sorting. diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 4ec8268de..9b59053f0 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -14,12 +14,33 @@ use crate::display::Quotable; /// /// The [`Parser::parse`] function performs the parse. #[derive(Default)] -pub struct Parser { +pub struct Parser<'parser> { /// Whether to treat the suffix "B" as meaning "bytes". pub capital_b_bytes: bool, + /// Whether to treat "b" as a "byte count" instead of "block" + pub b_byte_count: bool, + /// Whitelist for the suffix + pub allow_list: Option<&'parser [&'parser str]>, + /// Default unit when no suffix is provided + pub default_unit: Option<&'parser str>, } -impl Parser { +impl<'parser> Parser<'parser> { + pub fn with_allow_list(&mut self, allow_list: &'parser [&str]) -> &mut Self { + self.allow_list = Some(allow_list); + self + } + + pub fn with_default_unit(&mut self, default_unit: &'parser str) -> &mut Self { + self.default_unit = Some(default_unit); + self + } + + pub fn with_b_byte_count(&mut self, value: bool) -> &mut Self { + self.b_byte_count = value; + self + } + /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit @@ -66,7 +87,34 @@ impl Parser { // The lowercase "b" (used by `od`, `head`, `tail`, etc.) means // "block" and the Posix block size is 512. The uppercase "B" // means "byte". - let unit = &size[numeric_string.len()..]; + let mut unit: &str = &size[numeric_string.len()..]; + + if let Some(default_unit) = self.default_unit { + // Check if `unit` is empty then assigns `default_unit` to `unit` + if unit.is_empty() { + unit = default_unit; + } + } + + // Check if `b` is a byte count and remove `b` + if self.b_byte_count && unit.ends_with('b') { + // If `unit` = 'b' then return error + if numeric_string.is_empty() { + return Err(ParseSizeError::parse_failure(size)); + } + unit = &unit[0..unit.len() - 1]; + } + + if let Some(allow_list) = self.allow_list { + // Check if `unit` appears in `allow_list`, if not return error + if !allow_list.contains(&unit) && !unit.is_empty() { + if numeric_string.is_empty() { + return Err(ParseSizeError::parse_failure(size)); + } + return Err(ParseSizeError::invalid_suffix(size)); + } + } + let (base, exponent): (u128, u32) = match unit { "" => (1, 0), "B" if self.capital_b_bytes => (1, 0), @@ -362,4 +410,44 @@ mod tests { assert_eq!(Ok(2_000_000_000_000_000), parse_size("2PB")); assert_eq!(Ok(2_000_000_000_000_000_000), parse_size("2EB")); } + + #[test] + fn parse_size_options() { + let mut parser = Parser::default(); + + parser + .with_allow_list(&["k", "K", "G", "MB", "M"]) + .with_default_unit("K"); + + assert_eq!(Ok(1024), parser.parse("1")); + assert_eq!(Ok(2 * 1024), parser.parse("2")); + assert_eq!(Ok(1 * 1000 * 1000), parser.parse("1MB")); + assert_eq!(Ok(1 * 1024 * 1024), parser.parse("1M")); + assert_eq!(Ok(1 * 1024 * 1024 * 1024), parser.parse("1G")); + + assert!(parser.parse("1T").is_err()); + assert!(parser.parse("1P").is_err()); + assert!(parser.parse("1E").is_err()); + + parser + .with_allow_list(&[ + "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", + ]) + .with_default_unit("K") + .with_b_byte_count(true); + + assert_eq!(Ok(1024), parser.parse("1")); + assert_eq!(Ok(2 * 1024), parser.parse("2")); + assert_eq!(Ok(1 * 1000 * 1000), parser.parse("1MB")); + assert_eq!(Ok(1 * 1024 * 1024), parser.parse("1M")); + assert_eq!(Ok(1 * 1024 * 1024 * 1024), parser.parse("1G")); + + assert_eq!(Ok(1), parser.parse("1b")); + assert_eq!(Ok(1024), parser.parse("1024b")); + assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1024Mb")); + + assert!(parser.parse("b").is_err()); + assert!(parser.parse("1B").is_err()); + assert!(parser.parse("B").is_err()); + } }