From 5c04283d6ee1a3fc7d4f7f88476809e760933f3c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 13:54:58 +0100 Subject: [PATCH] printf: address fmt, clippy, spelling and failing test --- src/uu/printf/src/printf.rs | 17 +- src/uu/seq/src/extendedbigdecimal.rs | 2 +- src/uu/seq/src/number.rs | 12 +- src/uu/seq/src/seq.rs | 4 +- .../src/lib/features/format/argument.rs | 65 ++- src/uucore/src/lib/features/format/escape.rs | 29 +- src/uucore/src/lib/features/format/mod.rs | 65 +-- .../src/lib/features/format/num_format.rs | 116 +++--- src/uucore/src/lib/features/format/spec.rs | 384 ++++++++++-------- 9 files changed, 381 insertions(+), 313 deletions(-) diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 663411b89..cfb0315cf 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -10,9 +10,9 @@ use std::io::stdout; use std::ops::ControlFlow; use clap::{crate_version, Arg, ArgAction, Command}; -use uucore::error::{UResult, UUsageError}; +use uucore::error::{UError, UResult, UUsageError}; use uucore::format::{parse_spec_and_escape, FormatArgument}; -use uucore::{format_usage, help_about, help_section, help_usage}; +use uucore::{format_usage, help_about, help_section, help_usage, show}; const VERSION: &str = "version"; const HELP: &str = "help"; @@ -49,10 +49,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { while args.peek().is_some() { for item in parse_spec_and_escape(format_string.as_ref()) { - match item?.write(stdout(), &mut args)? { - ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => return Ok(()), - }; + match item { + Ok(item) => { + match item.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => return Ok(()), + }; + } + Err(e) => show!(e), + } } } Ok(()) diff --git a/src/uu/seq/src/extendedbigdecimal.rs b/src/uu/seq/src/extendedbigdecimal.rs index ecd460ceb..4f9a04152 100644 --- a/src/uu/seq/src/extendedbigdecimal.rs +++ b/src/uu/seq/src/extendedbigdecimal.rs @@ -70,7 +70,7 @@ pub enum ExtendedBigDecimal { impl ExtendedBigDecimal { #[cfg(test)] pub fn zero() -> Self { - Self::BigDecimal(1.into()) + Self::BigDecimal(0.into()) } pub fn one() -> Self { diff --git a/src/uu/seq/src/number.rs b/src/uu/seq/src/number.rs index 4da1146ef..182431a92 100644 --- a/src/uu/seq/src/number.rs +++ b/src/uu/seq/src/number.rs @@ -3,12 +3,6 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore extendedbigdecimal extendedbigint -//! A type to represent the possible start, increment, and end values for seq. -//! -//! The [`Number`] enumeration represents the possible values for the -//! start, increment, and end values for `seq`. These may be integers, -//! floating point numbers, negative zero, etc. A [`Number`] can be -//! parsed from a string by calling [`str::parse`]. use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; @@ -29,7 +23,11 @@ pub struct PreciseNumber { } impl PreciseNumber { - pub fn new(number: ExtendedBigDecimal, num_integral_digits: usize, num_fractional_digits: usize) -> Self { + pub fn new( + number: ExtendedBigDecimal, + num_integral_digits: usize, + num_fractional_digits: usize, + ) -> Self { Self { number, num_integral_digits, diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index a987405ce..053388645 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -122,7 +122,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { &options.terminator, options.equal_width, padding, - format, + &format, ); match result { Ok(_) => Ok(()), @@ -203,7 +203,7 @@ fn print_seq( terminator: &str, pad: bool, padding: usize, - format: Option>, + format: &Option>, ) -> std::io::Result<()> { let stdout = stdout(); let mut stdout = stdout.lock(); diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 96cfeddf3..6370c4177 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -1,3 +1,7 @@ +use os_display::Quotable; + +use crate::{error::set_exit_code, show_warning}; + #[derive(Clone, Debug)] pub enum FormatArgument { Char(char), @@ -44,16 +48,25 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => if let Some(s) = s.strip_prefix("0x") { - u64::from_str_radix(s, 16).ok() - } else if let Some(s) = s.strip_prefix("0") { - u64::from_str_radix(s, 8).ok() - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|c| c as u64) - } else { - s.parse().ok() + FormatArgument::Unparsed(s) => { + let opt = if let Some(s) = s.strip_prefix("0x") { + u64::from_str_radix(s, 16).ok() + } else if let Some(s) = s.strip_prefix('0') { + u64::from_str_radix(s, 8).ok() + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|c| c as u64) + } else { + s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0 + } + } } - .unwrap_or(0), _ => 0, } } @@ -67,7 +80,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { FormatArgument::Unparsed(s) => { // For hex, we parse `u64` because we do not allow another // minus sign. We might need to do more precise parsing here. - if let Some(s) = s.strip_prefix("-0x") { + let opt = if let Some(s) = s.strip_prefix("-0x") { u64::from_str_radix(s, 16).ok().map(|x| -(x as i64)) } else if let Some(s) = s.strip_prefix("0x") { u64::from_str_radix(s, 16).ok().map(|x| x as i64) @@ -77,8 +90,15 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { s.chars().next().map(|x| x as i64) } else { s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0 + } } - .unwrap_or(0) } _ => 0, } @@ -90,14 +110,23 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => if s.starts_with("0x") || s.starts_with("-0x") { - unimplemented!("Hexadecimal floats are unimplemented!") - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|x| x as u64 as f64) - } else { - s.parse().ok() + FormatArgument::Unparsed(s) => { + let opt = if s.starts_with("0x") || s.starts_with("-0x") { + unimplemented!("Hexadecimal floats are unimplemented!") + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|x| x as u64 as f64) + } else { + s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0.0 + } + } } - .unwrap_or(0.0), _ => 0.0, } } diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs index 1e06a8176..188dd1892 100644 --- a/src/uucore/src/lib/features/format/escape.rs +++ b/src/uucore/src/lib/features/format/escape.rs @@ -21,16 +21,16 @@ impl Base { } } - fn to_digit(&self, c: u8) -> Option { + fn convert_digit(&self, c: u8) -> Option { match self { - Base::Oct => { + Self::Oct => { if matches!(c, b'0'..=b'7') { Some(c - b'0') } else { None } } - Base::Hex => match c { + Self::Hex => match c { b'0'..=b'9' => Some(c - b'0'), b'A'..=b'F' => Some(c - b'A' + 10), b'a'..=b'f' => Some(c - b'a' + 10), @@ -49,32 +49,35 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option { // yield incorrect results because it will interpret values larger than // `u8::MAX` as unicode. let [c, rest @ ..] = input else { return None }; - let mut ret = base.to_digit(*c)?; - *input = &rest[..]; + let mut ret = base.convert_digit(*c)?; + *input = rest; for _ in 1..base.max_digits() { let [c, rest @ ..] = input else { break }; - let Some(n) = base.to_digit(*c) else { break }; + let Some(n) = base.convert_digit(*c) else { + break; + }; ret = ret.wrapping_mul(base as u8).wrapping_add(n); - *input = &rest[..]; + *input = rest; } Some(ret) } +// spell-checker:disable-next /// Parse `\uHHHH` and `\UHHHHHHHH` // TODO: This should print warnings and possibly halt execution when it fails to parse // TODO: If the character cannot be converted to u32, the input should be printed. fn parse_unicode(input: &mut &[u8], digits: u8) -> Option { let (c, rest) = input.split_first()?; - let mut ret = Base::Hex.to_digit(*c)? as u32; - *input = &rest[..]; + let mut ret = Base::Hex.convert_digit(*c)? as u32; + *input = rest; for _ in 1..digits { let (c, rest) = input.split_first()?; - let n = Base::Hex.to_digit(*c)?; + let n = Base::Hex.convert_digit(*c)?; ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32); - *input = &rest[..]; + *input = rest; } char::from_u32(ret) @@ -91,12 +94,12 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { } } - *rest = &new_rest[..]; + *rest = new_rest; match c { b'\\' => EscapedChar::Byte(b'\\'), b'a' => EscapedChar::Byte(b'\x07'), b'b' => EscapedChar::Byte(b'\x08'), - b'c' => return EscapedChar::End, + b'c' => EscapedChar::End, b'e' => EscapedChar::Byte(b'\x1b'), b'f' => EscapedChar::Byte(b'\x0c'), b'n' => EscapedChar::Byte(b'\n'), diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index cfa9a034f..9045b8b90 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -42,10 +42,13 @@ use self::{ #[derive(Debug)] pub enum FormatError { - SpecError, + SpecError(Vec), IoError(std::io::Error), NoMoreArguments, InvalidArgument(FormatArgument), + TooManySpecs, + NeedAtLeastOneSpec, + WrongSpecType, } impl Error for FormatError {} @@ -53,18 +56,26 @@ impl UError for FormatError {} impl From for FormatError { fn from(value: std::io::Error) -> Self { - FormatError::IoError(value) + Self::IoError(value) } } impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO: Be more precise about these match self { - FormatError::SpecError => write!(f, "invalid spec"), - FormatError::IoError(_) => write!(f, "io error"), - FormatError::NoMoreArguments => write!(f, "no more arguments"), - FormatError::InvalidArgument(_) => write!(f, "invalid argument"), + Self::SpecError(s) => write!( + f, + "%{}: invalid conversion specification", + String::from_utf8_lossy(s) + ), + // TODO: The next two should print the spec as well + Self::TooManySpecs => write!(f, "format has too many % directives"), + Self::NeedAtLeastOneSpec => write!(f, "format has no % directive"), + // TODO: Error message below needs some work + Self::WrongSpecType => write!(f, "wrong % directive type was given"), + Self::IoError(_) => write!(f, "io error"), + Self::NoMoreArguments => write!(f, "no more arguments"), + Self::InvalidArgument(_) => write!(f, "invalid argument"), } } } @@ -83,7 +94,7 @@ pub trait FormatChar { impl FormatChar for u8 { fn write(&self, mut writer: impl Write) -> std::io::Result> { - writer.write(&[*self])?; + writer.write_all(&[*self])?; Ok(ControlFlow::Continue(())) } } @@ -91,16 +102,16 @@ impl FormatChar for u8 { impl FormatChar for EscapedChar { fn write(&self, mut writer: impl Write) -> std::io::Result> { match self { - EscapedChar::Byte(c) => { - writer.write(&[*c])?; + Self::Byte(c) => { + writer.write_all(&[*c])?; } - EscapedChar::Char(c) => { + Self::Char(c) => { write!(writer, "{c}")?; } - EscapedChar::Backslash(c) => { - writer.write(&[b'\\', *c])?; + Self::Backslash(c) => { + writer.write_all(&[b'\\', *c])?; } - EscapedChar::End => return Ok(ControlFlow::Break(())), + Self::End => return Ok(ControlFlow::Break(())), } Ok(ControlFlow::Continue(())) } @@ -113,8 +124,8 @@ impl FormatItem { args: &mut impl Iterator, ) -> Result, FormatError> { match self { - FormatItem::Spec(spec) => spec.write(writer, args)?, - FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError), + Self::Spec(spec) => spec.write(writer, args)?, + Self::Char(c) => return c.write(writer).map_err(FormatError::IoError), }; Ok(ControlFlow::Continue(())) } @@ -125,7 +136,7 @@ pub fn parse_spec_and_escape( ) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) @@ -133,8 +144,8 @@ pub fn parse_spec_and_escape( [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { - Some(spec) => spec, - None => return Some(Err(FormatError::SpecError)), + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } @@ -152,7 +163,7 @@ pub fn parse_spec_and_escape( fn parse_spec_only(fmt: &[u8]) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(b'%'))) @@ -160,8 +171,8 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { - Some(spec) => spec, - None => return Some(Err(FormatError::SpecError)), + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } @@ -175,7 +186,7 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'\\', rest @ ..] => { current = rest; Some(parse_escape_code(&mut current)) @@ -248,8 +259,8 @@ pub fn sprintf<'a>( /// A parsed format for a single float value /// -/// This is used by `seq`. It can be constructed with [`FloatFormat::parse`] -/// and can write a value with [`FloatFormat::fmt`]. +/// This is used by `seq`. It can be constructed with [`Format::parse`] +/// and can write a value with [`Format::fmt`]. /// /// It can only accept a single specification without any asterisk parameters. /// If it does get more specifications, it will return an error. @@ -276,7 +287,7 @@ impl Format { } let Some(spec) = spec else { - return Err(FormatError::SpecError); + return Err(FormatError::NeedAtLeastOneSpec); }; let formatter = F::try_from_spec(spec)?; @@ -285,7 +296,7 @@ impl Format { for item in &mut iter { match item? { FormatItem::Spec(_) => { - return Err(FormatError::SpecError); + return Err(FormatError::TooManySpecs); } FormatItem::Char(c) => suffix.push(c), } diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 49edecce0..c9a2b8c16 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -97,19 +97,19 @@ impl Formatter for SignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -151,7 +151,7 @@ impl Formatter for UnsignedInt { }; if self.precision > s.len() { - s = format!("{:0width$}", s, width = self.precision) + s = format!("{:0width$}", s, width = self.precision); } match self.alignment { @@ -169,19 +169,19 @@ impl Formatter for UnsignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -212,7 +212,7 @@ impl Default for Float { width: 0, positive_sign: PositiveSign::None, alignment: NumberAlignment::Left, - precision: 2, + precision: 6, } } } @@ -229,19 +229,23 @@ impl Formatter for Float { }?; } - let s = match self.variant { - FloatVariant::Decimal => { - format_float_decimal(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Scientific => { - format_float_scientific(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Shortest => { - format_float_shortest(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Hexadecimal => { - format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) + let s = if x.is_finite() { + match self.variant { + FloatVariant::Decimal => { + format_float_decimal(x, self.precision, self.force_decimal) + } + FloatVariant::Scientific => { + format_float_scientific(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Shortest => { + format_float_shortest(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Hexadecimal => { + format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) + } } + } else { + format_float_non_finite(x, self.case) }; match self.alignment { @@ -265,19 +269,19 @@ impl Formatter for Float { precision, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -292,25 +296,16 @@ impl Formatter for Float { } } -fn format_float_nonfinite(f: f64, case: Case) -> String { +fn format_float_non_finite(f: f64, case: Case) -> String { debug_assert!(!f.is_finite()); let mut s = format!("{f}"); if case == Case::Uppercase { s.make_ascii_uppercase(); } - return s; + s } -fn format_float_decimal( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - +fn format_float_decimal(f: f64, precision: usize, force_decimal: ForceDecimal) -> String { if precision == 0 && force_decimal == ForceDecimal::Yes { format!("{f:.0}.") } else { @@ -324,11 +319,6 @@ fn format_float_scientific( case: Case, force_decimal: ForceDecimal, ) -> String { - // If the float is NaN, -Nan, Inf or -Inf, format like any other float - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - if f == 0.0 { return if force_decimal == ForceDecimal::Yes && precision == 0 { "0.e+00".into() @@ -337,13 +327,13 @@ fn format_float_scientific( }; } - let mut exponent: i32 = f.log10().floor() as i32; let mut normalized = f / 10.0_f64.powi(exponent); // If the normalized value will be rounded to a value greater than 10 // we need to correct. - if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 + { normalized /= 10.0; exponent += 1; } @@ -371,11 +361,6 @@ fn format_float_shortest( case: Case, force_decimal: ForceDecimal, ) -> String { - // If the float is NaN, -Nan, Inf or -Inf, format like any other float - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - // Precision here is about how many digits should be displayed // instead of how many digits for the fractional part, this means that if // we pass this to rust's format string, it's always gonna be one less. @@ -398,7 +383,9 @@ fn format_float_shortest( // If the normalized value will be rounded to a value greater than 10 // we need to correct. - if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) + >= 10.0 + { normalized /= 10.0; exponent += 1; } @@ -412,12 +399,7 @@ fn format_float_shortest( let mut normalized = format!("{normalized:.*}", precision); if force_decimal == ForceDecimal::No { - while normalized.ends_with('0') { - normalized.pop(); - } - if normalized.ends_with('.') { - normalized.pop(); - } + strip_zeros_and_dot(&mut normalized); } let exp_char = match case { @@ -439,12 +421,7 @@ fn format_float_shortest( }; if force_decimal == ForceDecimal::No { - while formatted.ends_with('0') { - formatted.pop(); - } - if formatted.ends_with('.') { - formatted.pop(); - } + strip_zeros_and_dot(&mut formatted); } formatted @@ -457,10 +434,6 @@ fn format_float_hexadecimal( case: Case, force_decimal: ForceDecimal, ) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - let (first_digit, mantissa, exponent) = if f == 0.0 { (0, 0, 0) } else { @@ -481,7 +454,16 @@ fn format_float_hexadecimal( s.make_ascii_uppercase(); } - return s; + s +} + +fn strip_zeros_and_dot(s: &mut String) { + while s.ends_with('0') { + s.pop(); + } + if s.ends_with('.') { + s.pop(); + } } #[cfg(test)] @@ -491,7 +473,7 @@ mod test { #[test] fn decimal_float() { use super::format_float_decimal; - let f = |x| format_float_decimal(x, 6, Case::Lowercase, ForceDecimal::No); + let f = |x| format_float_decimal(x, 6, ForceDecimal::No); assert_eq!(f(0.0), "0.000000"); assert_eq!(f(1.0), "1.000000"); assert_eq!(f(100.0), "100.000000"); @@ -576,7 +558,7 @@ mod test { assert_eq!(f(12.3456789), "1e+01"); assert_eq!(f(1000000.0), "1e+06"); assert_eq!(f(99999999.0), "1e+08"); - + let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.0), "0."); assert_eq!(f(1.0), "1."); diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 23c68c066..e74b6f866 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -1,4 +1,6 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety intmax ptrdiff + +use crate::quoting_style::{escape_name, QuotingStyle}; use super::{ num_format::{ @@ -16,11 +18,12 @@ pub enum Spec { align_left: bool, }, String { - width: Option>, precision: Option>, - parse_escape: bool, + width: Option>, align_left: bool, }, + EscapedString, + QuotedString, SignedInt { width: Option>, precision: Option>, @@ -76,12 +79,14 @@ enum Length { } impl Spec { - pub fn parse(rest: &mut &[u8]) -> Option { + pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { // Based on the C++ reference, the spec format looks like: // // %[flags][width][.precision][length]specifier // // However, we have already parsed the '%'. + let mut index = 0; + let start = *rest; let mut minus = false; let mut plus = false; @@ -89,111 +94,101 @@ impl Spec { let mut hash = false; let mut zero = false; - while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) { + while let Some(x) = rest.get(index) { match x { b'-' => minus = true, b'+' => plus = true, b' ' => space = true, b'#' => hash = true, b'0' => zero = true, - _ => unreachable!(), + _ => break, } - *rest = &rest[1..] + index += 1; } - let width = eat_asterisk_or_number(rest); + let alignment = match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }; - let precision = if let Some(b'.') = rest.get(0) { - *rest = &rest[1..]; - Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0))) + let positive_sign = match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }; + + let width = eat_asterisk_or_number(rest, &mut index); + + let precision = if let Some(b'.') = rest.get(index) { + index += 1; + Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0))) } else { None }; - // Parse 0..N length options, keep the last one - // Even though it is just ignored. We might want to use it later and we - // should parse those characters. - // - // TODO: This needs to be configurable: `seq` accepts only one length - // param - let mut _length = None; - loop { - let new_length = rest.get(0).and_then(|c| { - Some(match c { - b'h' => { - if let Some(b'h') = rest.get(1) { - *rest = &rest[1..]; - Length::Char - } else { - Length::Short - } - } - b'l' => { - if let Some(b'l') = rest.get(1) { - *rest = &rest[1..]; - Length::Long - } else { - Length::LongLong - } - } - b'j' => Length::IntMaxT, - b'z' => Length::SizeT, - b't' => Length::PtfDiffT, - b'L' => Length::LongDouble, - _ => return None, - }) - }); - if new_length.is_some() { - *rest = &rest[1..]; - _length = new_length; - } else { - break; - } - } + // We ignore the length. It's not really relevant to printf + let _ = Self::parse_length(rest, &mut index); - let type_spec = rest.get(0)?; - *rest = &rest[1..]; - Some(match type_spec { - b'c' => Spec::Char { - width, - align_left: minus, - }, - b's' => Spec::String { - width, - precision, - parse_escape: false, - align_left: minus, - }, - b'b' => Spec::String { - width, - precision, - parse_escape: true, - align_left: minus, - }, - b'd' | b'i' => Spec::SignedInt { - width, - precision, - alignment: match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }, - positive_sign: match (plus, space) { - (true, _) => PositiveSign::Plus, - (false, true) => PositiveSign::Space, - (false, false) => PositiveSign::None, - }, - }, + let Some(type_spec) = rest.get(index) else { + return Err(&start[..index]); + }; + index += 1; + *rest = &start[index..]; + + Ok(match type_spec { + // GNU accepts minus, plus and space even though they are not used + b'c' => { + if hash || precision.is_some() { + return Err(&start[..index]); + } + Self::Char { + width, + align_left: minus, + } + } + b's' => { + if hash { + return Err(&start[..index]); + } + Self::String { + precision, + width, + align_left: minus, + } + } + b'b' => { + if hash || minus || plus || space || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::EscapedString + } + b'q' => { + if hash || minus || plus || space || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::QuotedString + } + b'd' | b'i' => { + if hash { + return Err(&start[..index]); + } + Self::SignedInt { + width, + precision, + alignment, + positive_sign, + } + } c @ (b'u' | b'o' | b'x' | b'X') => { + // Normal unsigned integer cannot have a prefix + if *c == b'u' && hash { + return Err(&start[..index]); + } let prefix = match hash { false => Prefix::No, true => Prefix::Yes, }; - let alignment = match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }; let variant = match c { b'u' => UnsignedIntVariant::Decimal, b'o' => UnsignedIntVariant::Octal(prefix), @@ -201,14 +196,14 @@ impl Spec { b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), _ => unreachable!(), }; - Spec::UnsignedInt { + Self::UnsignedInt { variant, precision, width, alignment, } } - c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float { + c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float { width, precision, variant: match c { @@ -226,115 +221,157 @@ impl Spec { false => Case::Lowercase, true => Case::Uppercase, }, - alignment: match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }, - positive_sign: match (plus, space) { - (true, _) => PositiveSign::Plus, - (false, true) => PositiveSign::Space, - (false, false) => PositiveSign::None, - }, + alignment, + positive_sign, }, - _ => return None, + _ => return Err(&start[..index]), }) } + fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option { + // Parse 0..N length options, keep the last one + // Even though it is just ignored. We might want to use it later and we + // should parse those characters. + // + // TODO: This needs to be configurable: `seq` accepts only one length + // param + let mut length = None; + loop { + let new_length = rest.get(*index).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(*index + 1) { + *index += 1; + Length::Char + } else { + Length::Short + } + } + b'l' => { + if let Some(b'l') = rest.get(*index + 1) { + *index += 1; + Length::Long + } else { + Length::LongLong + } + } + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + if new_length.is_some() { + *index += 1; + length = new_length; + } else { + break; + } + } + length + } + pub fn write<'a>( &self, - writer: impl Write, + mut writer: impl Write, mut args: impl ArgumentIter<'a>, ) -> Result<(), FormatError> { match self { - &Spec::Char { width, align_left } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - write_padded(writer, args.get_char(), width, false, align_left) + Self::Char { width, align_left } => { + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + write_padded(writer, args.get_char(), width, false, *align_left) } - &Spec::String { + Self::String { width, - precision, - parse_escape, align_left, + precision, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?; + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + + // GNU does do this truncation on a byte level, see for instance: + // printf "%.1s" 🙃 + // > � + // For now, we let printf panic when we truncate within a code point. + // TODO: We need to not use Rust's formatting for aligning the output, + // so that we can just write bytes to stdout without panicking. + let precision = resolve_asterisk(*precision, &mut args)?; let s = args.get_str(); - if parse_escape { - let mut parsed = Vec::new(); - for c in parse_escape_only(s.as_bytes()) { - match c.write(&mut parsed)? { - ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => { - // TODO: This should break the _entire execution_ of printf - break; - } - }; - } - // GNU does do this truncation on a byte level, see for instance: - // printf "%.1s" 🙃 - // > � - // For now, we let printf panic when we truncate within a code point. - // TODO: We need to not use Rust's formatting for aligning the output, - // so that we can just write bytes to stdout without panicking. - let truncated = match precision { - Some(p) if p < parsed.len() => &parsed[..p], - _ => &parsed, - }; - write_padded( - writer, - std::str::from_utf8(&truncated).expect("TODO: Accept invalid utf8"), - width, - false, - align_left, - ) - } else { - let truncated = match precision { - Some(p) if p < s.len() => &s[..p], - _ => s, - }; - write_padded(writer, truncated, width, false, align_left) - } + let truncated = match precision { + Some(p) if p < s.len() => &s[..p], + _ => s, + }; + write_padded(writer, truncated, width, false, *align_left) } - &Spec::SignedInt { + Self::EscapedString => { + let s = args.get_str(); + let mut parsed = Vec::new(); + for c in parse_escape_only(s.as_bytes()) { + match c.write(&mut parsed)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => { + // TODO: This should break the _entire execution_ of printf + break; + } + }; + } + writer.write_all(&parsed).map_err(FormatError::IoError) + } + Self::QuotedString => { + let s = args.get_str(); + writer + .write_all( + escape_name( + s.as_ref(), + &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }, + ) + .as_bytes(), + ) + .map_err(FormatError::IoError) + } + Self::SignedInt { width, precision, positive_sign, alignment, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); let i = args.get_i64(); num_format::SignedInt { width, precision, - positive_sign, - alignment, + positive_sign: *positive_sign, + alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } - &Spec::UnsignedInt { + Self::UnsignedInt { variant, width, precision, alignment, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); let i = args.get_u64(); num_format::UnsignedInt { - variant, + variant: *variant, precision, width, - alignment, + alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } - &Spec::Float { + Self::Float { variant, case, force_decimal, @@ -343,18 +380,18 @@ impl Spec { alignment, precision, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6); let f = args.get_f64(); num_format::Float { - variant, - case, - force_decimal, width, - positive_sign, - alignment, precision, + variant: *variant, + case: *case, + force_decimal: *force_decimal, + positive_sign: *positive_sign, + alignment: *alignment, } .fmt(writer, f) .map_err(FormatError::IoError) @@ -390,23 +427,26 @@ fn write_padded( .map_err(FormatError::IoError) } -fn eat_asterisk_or_number(rest: &mut &[u8]) -> Option> { - if let Some(b'*') = rest.get(0) { - *rest = &rest[1..]; +fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option> { + if let Some(b'*') = rest.get(*index) { + *index += 1; Some(CanAsterisk::Asterisk) } else { - eat_number(rest).map(CanAsterisk::Fixed) + eat_number(rest, index).map(CanAsterisk::Fixed) } } -fn eat_number(rest: &mut &[u8]) -> Option { - match rest.iter().position(|b| !b.is_ascii_digit()) { +fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { + match rest[*index..].iter().position(|b| !b.is_ascii_digit()) { None | Some(0) => None, Some(i) => { // TODO: This might need to handle errors better // For example in case of overflow. - let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap(); - *rest = &rest[i..]; + let parsed = std::str::from_utf8(&rest[*index..(*index + i)]) + .unwrap() + .parse() + .unwrap(); + *index += i; Some(parsed) } }