printf: address fmt, clippy, spelling and failing test

This commit is contained in:
Terts Diepraam 2023-11-20 13:54:58 +01:00
parent 6d2698b802
commit 5c04283d6e
9 changed files with 381 additions and 313 deletions

View file

@ -10,9 +10,9 @@ use std::io::stdout;
use std::ops::ControlFlow;
use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError};
use uucore::error::{UError, UResult, UUsageError};
use uucore::format::{parse_spec_and_escape, FormatArgument};
use uucore::{format_usage, help_about, help_section, help_usage};
use uucore::{format_usage, help_about, help_section, help_usage, show};
const VERSION: &str = "version";
const HELP: &str = "help";
@ -49,10 +49,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
while args.peek().is_some() {
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()),
};
match item {
Ok(item) => {
match item.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()),
};
}
Err(e) => show!(e),
}
}
}
Ok(())

View file

@ -70,7 +70,7 @@ pub enum ExtendedBigDecimal {
impl ExtendedBigDecimal {
#[cfg(test)]
pub fn zero() -> Self {
Self::BigDecimal(1.into())
Self::BigDecimal(0.into())
}
pub fn one() -> Self {

View file

@ -3,12 +3,6 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore extendedbigdecimal extendedbigint
//! A type to represent the possible start, increment, and end values for seq.
//!
//! The [`Number`] enumeration represents the possible values for the
//! start, increment, and end values for `seq`. These may be integers,
//! floating point numbers, negative zero, etc. A [`Number`] can be
//! parsed from a string by calling [`str::parse`].
use num_traits::Zero;
use crate::extendedbigdecimal::ExtendedBigDecimal;
@ -29,7 +23,11 @@ pub struct PreciseNumber {
}
impl PreciseNumber {
pub fn new(number: ExtendedBigDecimal, num_integral_digits: usize, num_fractional_digits: usize) -> Self {
pub fn new(
number: ExtendedBigDecimal,
num_integral_digits: usize,
num_fractional_digits: usize,
) -> Self {
Self {
number,
num_integral_digits,

View file

@ -122,7 +122,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
&options.terminator,
options.equal_width,
padding,
format,
&format,
);
match result {
Ok(_) => Ok(()),
@ -203,7 +203,7 @@ fn print_seq(
terminator: &str,
pad: bool,
padding: usize,
format: Option<Format<num_format::Float>>,
format: &Option<Format<num_format::Float>>,
) -> std::io::Result<()> {
let stdout = stdout();
let mut stdout = stdout.lock();

View file

@ -1,3 +1,7 @@
use os_display::Quotable;
use crate::{error::set_exit_code, show_warning};
#[derive(Clone, Debug)]
pub enum FormatArgument {
Char(char),
@ -44,16 +48,25 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::UnsignedInt(n) => *n,
FormatArgument::Unparsed(s) => if let Some(s) = s.strip_prefix("0x") {
u64::from_str_radix(s, 16).ok()
} else if let Some(s) = s.strip_prefix("0") {
u64::from_str_radix(s, 8).ok()
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|c| c as u64)
} else {
s.parse().ok()
FormatArgument::Unparsed(s) => {
let opt = if let Some(s) = s.strip_prefix("0x") {
u64::from_str_radix(s, 16).ok()
} else if let Some(s) = s.strip_prefix('0') {
u64::from_str_radix(s, 8).ok()
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|c| c as u64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0
}
}
}
.unwrap_or(0),
_ => 0,
}
}
@ -67,7 +80,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
FormatArgument::Unparsed(s) => {
// For hex, we parse `u64` because we do not allow another
// minus sign. We might need to do more precise parsing here.
if let Some(s) = s.strip_prefix("-0x") {
let opt = if let Some(s) = s.strip_prefix("-0x") {
u64::from_str_radix(s, 16).ok().map(|x| -(x as i64))
} else if let Some(s) = s.strip_prefix("0x") {
u64::from_str_radix(s, 16).ok().map(|x| x as i64)
@ -77,8 +90,15 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
s.chars().next().map(|x| x as i64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0
}
}
.unwrap_or(0)
}
_ => 0,
}
@ -90,14 +110,23 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::Float(n) => *n,
FormatArgument::Unparsed(s) => if s.starts_with("0x") || s.starts_with("-0x") {
unimplemented!("Hexadecimal floats are unimplemented!")
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|x| x as u64 as f64)
} else {
s.parse().ok()
FormatArgument::Unparsed(s) => {
let opt = if s.starts_with("0x") || s.starts_with("-0x") {
unimplemented!("Hexadecimal floats are unimplemented!")
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|x| x as u64 as f64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0.0
}
}
}
.unwrap_or(0.0),
_ => 0.0,
}
}

View file

@ -21,16 +21,16 @@ impl Base {
}
}
fn to_digit(&self, c: u8) -> Option<u8> {
fn convert_digit(&self, c: u8) -> Option<u8> {
match self {
Base::Oct => {
Self::Oct => {
if matches!(c, b'0'..=b'7') {
Some(c - b'0')
} else {
None
}
}
Base::Hex => match c {
Self::Hex => match c {
b'0'..=b'9' => Some(c - b'0'),
b'A'..=b'F' => Some(c - b'A' + 10),
b'a'..=b'f' => Some(c - b'a' + 10),
@ -49,32 +49,35 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
// yield incorrect results because it will interpret values larger than
// `u8::MAX` as unicode.
let [c, rest @ ..] = input else { return None };
let mut ret = base.to_digit(*c)?;
*input = &rest[..];
let mut ret = base.convert_digit(*c)?;
*input = rest;
for _ in 1..base.max_digits() {
let [c, rest @ ..] = input else { break };
let Some(n) = base.to_digit(*c) else { break };
let Some(n) = base.convert_digit(*c) else {
break;
};
ret = ret.wrapping_mul(base as u8).wrapping_add(n);
*input = &rest[..];
*input = rest;
}
Some(ret)
}
// spell-checker:disable-next
/// Parse `\uHHHH` and `\UHHHHHHHH`
// TODO: This should print warnings and possibly halt execution when it fails to parse
// TODO: If the character cannot be converted to u32, the input should be printed.
fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
let (c, rest) = input.split_first()?;
let mut ret = Base::Hex.to_digit(*c)? as u32;
*input = &rest[..];
let mut ret = Base::Hex.convert_digit(*c)? as u32;
*input = rest;
for _ in 1..digits {
let (c, rest) = input.split_first()?;
let n = Base::Hex.to_digit(*c)?;
let n = Base::Hex.convert_digit(*c)?;
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
*input = &rest[..];
*input = rest;
}
char::from_u32(ret)
@ -91,12 +94,12 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
}
}
*rest = &new_rest[..];
*rest = new_rest;
match c {
b'\\' => EscapedChar::Byte(b'\\'),
b'a' => EscapedChar::Byte(b'\x07'),
b'b' => EscapedChar::Byte(b'\x08'),
b'c' => return EscapedChar::End,
b'c' => EscapedChar::End,
b'e' => EscapedChar::Byte(b'\x1b'),
b'f' => EscapedChar::Byte(b'\x0c'),
b'n' => EscapedChar::Byte(b'\n'),

View file

@ -42,10 +42,13 @@ use self::{
#[derive(Debug)]
pub enum FormatError {
SpecError,
SpecError(Vec<u8>),
IoError(std::io::Error),
NoMoreArguments,
InvalidArgument(FormatArgument),
TooManySpecs,
NeedAtLeastOneSpec,
WrongSpecType,
}
impl Error for FormatError {}
@ -53,18 +56,26 @@ impl UError for FormatError {}
impl From<std::io::Error> for FormatError {
fn from(value: std::io::Error) -> Self {
FormatError::IoError(value)
Self::IoError(value)
}
}
impl Display for FormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// TODO: Be more precise about these
match self {
FormatError::SpecError => write!(f, "invalid spec"),
FormatError::IoError(_) => write!(f, "io error"),
FormatError::NoMoreArguments => write!(f, "no more arguments"),
FormatError::InvalidArgument(_) => write!(f, "invalid argument"),
Self::SpecError(s) => write!(
f,
"%{}: invalid conversion specification",
String::from_utf8_lossy(s)
),
// TODO: The next two should print the spec as well
Self::TooManySpecs => write!(f, "format has too many % directives"),
Self::NeedAtLeastOneSpec => write!(f, "format has no % directive"),
// TODO: Error message below needs some work
Self::WrongSpecType => write!(f, "wrong % directive type was given"),
Self::IoError(_) => write!(f, "io error"),
Self::NoMoreArguments => write!(f, "no more arguments"),
Self::InvalidArgument(_) => write!(f, "invalid argument"),
}
}
}
@ -83,7 +94,7 @@ pub trait FormatChar {
impl FormatChar for u8 {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
writer.write(&[*self])?;
writer.write_all(&[*self])?;
Ok(ControlFlow::Continue(()))
}
}
@ -91,16 +102,16 @@ impl FormatChar for u8 {
impl FormatChar for EscapedChar {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
match self {
EscapedChar::Byte(c) => {
writer.write(&[*c])?;
Self::Byte(c) => {
writer.write_all(&[*c])?;
}
EscapedChar::Char(c) => {
Self::Char(c) => {
write!(writer, "{c}")?;
}
EscapedChar::Backslash(c) => {
writer.write(&[b'\\', *c])?;
Self::Backslash(c) => {
writer.write_all(&[b'\\', *c])?;
}
EscapedChar::End => return Ok(ControlFlow::Break(())),
Self::End => return Ok(ControlFlow::Break(())),
}
Ok(ControlFlow::Continue(()))
}
@ -113,8 +124,8 @@ impl<C: FormatChar> FormatItem<C> {
args: &mut impl Iterator<Item = &'a FormatArgument>,
) -> Result<ControlFlow<()>, FormatError> {
match self {
FormatItem::Spec(spec) => spec.write(writer, args)?,
FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError),
Self::Spec(spec) => spec.write(writer, args)?,
Self::Char(c) => return c.write(writer).map_err(FormatError::IoError),
};
Ok(ControlFlow::Continue(()))
}
@ -125,7 +136,7 @@ pub fn parse_spec_and_escape(
) -> impl Iterator<Item = Result<FormatItem<EscapedChar>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[] => None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%'))))
@ -133,8 +144,8 @@ pub fn parse_spec_and_escape(
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
Ok(spec) => spec,
Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))),
};
Some(Ok(FormatItem::Spec(spec)))
}
@ -152,7 +163,7 @@ pub fn parse_spec_and_escape(
fn parse_spec_only(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem<u8>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[] => None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(b'%')))
@ -160,8 +171,8 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem<u8>, Fo
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
Ok(spec) => spec,
Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))),
};
Some(Ok(FormatItem::Spec(spec)))
}
@ -175,7 +186,7 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem<u8>, Fo
fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[] => None,
[b'\\', rest @ ..] => {
current = rest;
Some(parse_escape_code(&mut current))
@ -248,8 +259,8 @@ pub fn sprintf<'a>(
/// A parsed format for a single float value
///
/// This is used by `seq`. It can be constructed with [`FloatFormat::parse`]
/// and can write a value with [`FloatFormat::fmt`].
/// This is used by `seq`. It can be constructed with [`Format::parse`]
/// and can write a value with [`Format::fmt`].
///
/// It can only accept a single specification without any asterisk parameters.
/// If it does get more specifications, it will return an error.
@ -276,7 +287,7 @@ impl<F: Formatter> Format<F> {
}
let Some(spec) = spec else {
return Err(FormatError::SpecError);
return Err(FormatError::NeedAtLeastOneSpec);
};
let formatter = F::try_from_spec(spec)?;
@ -285,7 +296,7 @@ impl<F: Formatter> Format<F> {
for item in &mut iter {
match item? {
FormatItem::Spec(_) => {
return Err(FormatError::SpecError);
return Err(FormatError::TooManySpecs);
}
FormatItem::Char(c) => suffix.push(c),
}

View file

@ -97,19 +97,19 @@ impl Formatter for SignedInt {
alignment,
} = s
else {
return Err(FormatError::SpecError);
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
@ -151,7 +151,7 @@ impl Formatter for UnsignedInt {
};
if self.precision > s.len() {
s = format!("{:0width$}", s, width = self.precision)
s = format!("{:0width$}", s, width = self.precision);
}
match self.alignment {
@ -169,19 +169,19 @@ impl Formatter for UnsignedInt {
alignment,
} = s
else {
return Err(FormatError::SpecError);
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
@ -212,7 +212,7 @@ impl Default for Float {
width: 0,
positive_sign: PositiveSign::None,
alignment: NumberAlignment::Left,
precision: 2,
precision: 6,
}
}
}
@ -229,19 +229,23 @@ impl Formatter for Float {
}?;
}
let s = match self.variant {
FloatVariant::Decimal => {
format_float_decimal(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Scientific => {
format_float_scientific(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Shortest => {
format_float_shortest(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Hexadecimal => {
format_float_hexadecimal(x, self.precision, self.case, self.force_decimal)
let s = if x.is_finite() {
match self.variant {
FloatVariant::Decimal => {
format_float_decimal(x, self.precision, self.force_decimal)
}
FloatVariant::Scientific => {
format_float_scientific(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Shortest => {
format_float_shortest(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Hexadecimal => {
format_float_hexadecimal(x, self.precision, self.case, self.force_decimal)
}
}
} else {
format_float_non_finite(x, self.case)
};
match self.alignment {
@ -265,19 +269,19 @@ impl Formatter for Float {
precision,
} = s
else {
return Err(FormatError::SpecError);
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
@ -292,25 +296,16 @@ impl Formatter for Float {
}
}
fn format_float_nonfinite(f: f64, case: Case) -> String {
fn format_float_non_finite(f: f64, case: Case) -> String {
debug_assert!(!f.is_finite());
let mut s = format!("{f}");
if case == Case::Uppercase {
s.make_ascii_uppercase();
}
return s;
s
}
fn format_float_decimal(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
if !f.is_finite() {
return format_float_nonfinite(f, case);
}
fn format_float_decimal(f: f64, precision: usize, force_decimal: ForceDecimal) -> String {
if precision == 0 && force_decimal == ForceDecimal::Yes {
format!("{f:.0}.")
} else {
@ -324,11 +319,6 @@ fn format_float_scientific(
case: Case,
force_decimal: ForceDecimal,
) -> String {
// If the float is NaN, -Nan, Inf or -Inf, format like any other float
if !f.is_finite() {
return format_float_nonfinite(f, case);
}
if f == 0.0 {
return if force_decimal == ForceDecimal::Yes && precision == 0 {
"0.e+00".into()
@ -337,13 +327,13 @@ fn format_float_scientific(
};
}
let mut exponent: i32 = f.log10().floor() as i32;
let mut normalized = f / 10.0_f64.powi(exponent);
// If the normalized value will be rounded to a value greater than 10
// we need to correct.
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 {
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0
{
normalized /= 10.0;
exponent += 1;
}
@ -371,11 +361,6 @@ fn format_float_shortest(
case: Case,
force_decimal: ForceDecimal,
) -> String {
// If the float is NaN, -Nan, Inf or -Inf, format like any other float
if !f.is_finite() {
return format_float_nonfinite(f, case);
}
// Precision here is about how many digits should be displayed
// instead of how many digits for the fractional part, this means that if
// we pass this to rust's format string, it's always gonna be one less.
@ -398,7 +383,9 @@ fn format_float_shortest(
// If the normalized value will be rounded to a value greater than 10
// we need to correct.
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 {
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32)
>= 10.0
{
normalized /= 10.0;
exponent += 1;
}
@ -412,12 +399,7 @@ fn format_float_shortest(
let mut normalized = format!("{normalized:.*}", precision);
if force_decimal == ForceDecimal::No {
while normalized.ends_with('0') {
normalized.pop();
}
if normalized.ends_with('.') {
normalized.pop();
}
strip_zeros_and_dot(&mut normalized);
}
let exp_char = match case {
@ -439,12 +421,7 @@ fn format_float_shortest(
};
if force_decimal == ForceDecimal::No {
while formatted.ends_with('0') {
formatted.pop();
}
if formatted.ends_with('.') {
formatted.pop();
}
strip_zeros_and_dot(&mut formatted);
}
formatted
@ -457,10 +434,6 @@ fn format_float_hexadecimal(
case: Case,
force_decimal: ForceDecimal,
) -> String {
if !f.is_finite() {
return format_float_nonfinite(f, case);
}
let (first_digit, mantissa, exponent) = if f == 0.0 {
(0, 0, 0)
} else {
@ -481,7 +454,16 @@ fn format_float_hexadecimal(
s.make_ascii_uppercase();
}
return s;
s
}
fn strip_zeros_and_dot(s: &mut String) {
while s.ends_with('0') {
s.pop();
}
if s.ends_with('.') {
s.pop();
}
}
#[cfg(test)]
@ -491,7 +473,7 @@ mod test {
#[test]
fn decimal_float() {
use super::format_float_decimal;
let f = |x| format_float_decimal(x, 6, Case::Lowercase, ForceDecimal::No);
let f = |x| format_float_decimal(x, 6, ForceDecimal::No);
assert_eq!(f(0.0), "0.000000");
assert_eq!(f(1.0), "1.000000");
assert_eq!(f(100.0), "100.000000");
@ -576,7 +558,7 @@ mod test {
assert_eq!(f(12.3456789), "1e+01");
assert_eq!(f(1000000.0), "1e+06");
assert_eq!(f(99999999.0), "1e+08");
let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes);
assert_eq!(f(0.0), "0.");
assert_eq!(f(1.0), "1.");

View file

@ -1,4 +1,6 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety intmax ptrdiff
use crate::quoting_style::{escape_name, QuotingStyle};
use super::{
num_format::{
@ -16,11 +18,12 @@ pub enum Spec {
align_left: bool,
},
String {
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
parse_escape: bool,
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
EscapedString,
QuotedString,
SignedInt {
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
@ -76,12 +79,14 @@ enum Length {
}
impl Spec {
pub fn parse(rest: &mut &[u8]) -> Option<Self> {
pub fn parse<'a>(rest: &mut &'a [u8]) -> Result<Self, &'a [u8]> {
// Based on the C++ reference, the spec format looks like:
//
// %[flags][width][.precision][length]specifier
//
// However, we have already parsed the '%'.
let mut index = 0;
let start = *rest;
let mut minus = false;
let mut plus = false;
@ -89,111 +94,101 @@ impl Spec {
let mut hash = false;
let mut zero = false;
while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) {
while let Some(x) = rest.get(index) {
match x {
b'-' => minus = true,
b'+' => plus = true,
b' ' => space = true,
b'#' => hash = true,
b'0' => zero = true,
_ => unreachable!(),
_ => break,
}
*rest = &rest[1..]
index += 1;
}
let width = eat_asterisk_or_number(rest);
let alignment = match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
};
let precision = if let Some(b'.') = rest.get(0) {
*rest = &rest[1..];
Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0)))
let positive_sign = match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
};
let width = eat_asterisk_or_number(rest, &mut index);
let precision = if let Some(b'.') = rest.get(index) {
index += 1;
Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0)))
} else {
None
};
// Parse 0..N length options, keep the last one
// Even though it is just ignored. We might want to use it later and we
// should parse those characters.
//
// TODO: This needs to be configurable: `seq` accepts only one length
// param
let mut _length = None;
loop {
let new_length = rest.get(0).and_then(|c| {
Some(match c {
b'h' => {
if let Some(b'h') = rest.get(1) {
*rest = &rest[1..];
Length::Char
} else {
Length::Short
}
}
b'l' => {
if let Some(b'l') = rest.get(1) {
*rest = &rest[1..];
Length::Long
} else {
Length::LongLong
}
}
b'j' => Length::IntMaxT,
b'z' => Length::SizeT,
b't' => Length::PtfDiffT,
b'L' => Length::LongDouble,
_ => return None,
})
});
if new_length.is_some() {
*rest = &rest[1..];
_length = new_length;
} else {
break;
}
}
// We ignore the length. It's not really relevant to printf
let _ = Self::parse_length(rest, &mut index);
let type_spec = rest.get(0)?;
*rest = &rest[1..];
Some(match type_spec {
b'c' => Spec::Char {
width,
align_left: minus,
},
b's' => Spec::String {
width,
precision,
parse_escape: false,
align_left: minus,
},
b'b' => Spec::String {
width,
precision,
parse_escape: true,
align_left: minus,
},
b'd' | b'i' => Spec::SignedInt {
width,
precision,
alignment: match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
},
positive_sign: match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
},
},
let Some(type_spec) = rest.get(index) else {
return Err(&start[..index]);
};
index += 1;
*rest = &start[index..];
Ok(match type_spec {
// GNU accepts minus, plus and space even though they are not used
b'c' => {
if hash || precision.is_some() {
return Err(&start[..index]);
}
Self::Char {
width,
align_left: minus,
}
}
b's' => {
if hash {
return Err(&start[..index]);
}
Self::String {
precision,
width,
align_left: minus,
}
}
b'b' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::EscapedString
}
b'q' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::QuotedString
}
b'd' | b'i' => {
if hash {
return Err(&start[..index]);
}
Self::SignedInt {
width,
precision,
alignment,
positive_sign,
}
}
c @ (b'u' | b'o' | b'x' | b'X') => {
// Normal unsigned integer cannot have a prefix
if *c == b'u' && hash {
return Err(&start[..index]);
}
let prefix = match hash {
false => Prefix::No,
true => Prefix::Yes,
};
let alignment = match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
};
let variant = match c {
b'u' => UnsignedIntVariant::Decimal,
b'o' => UnsignedIntVariant::Octal(prefix),
@ -201,14 +196,14 @@ impl Spec {
b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix),
_ => unreachable!(),
};
Spec::UnsignedInt {
Self::UnsignedInt {
variant,
precision,
width,
alignment,
}
}
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float {
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float {
width,
precision,
variant: match c {
@ -226,115 +221,157 @@ impl Spec {
false => Case::Lowercase,
true => Case::Uppercase,
},
alignment: match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
},
positive_sign: match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
},
alignment,
positive_sign,
},
_ => return None,
_ => return Err(&start[..index]),
})
}
fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option<Length> {
// Parse 0..N length options, keep the last one
// Even though it is just ignored. We might want to use it later and we
// should parse those characters.
//
// TODO: This needs to be configurable: `seq` accepts only one length
// param
let mut length = None;
loop {
let new_length = rest.get(*index).and_then(|c| {
Some(match c {
b'h' => {
if let Some(b'h') = rest.get(*index + 1) {
*index += 1;
Length::Char
} else {
Length::Short
}
}
b'l' => {
if let Some(b'l') = rest.get(*index + 1) {
*index += 1;
Length::Long
} else {
Length::LongLong
}
}
b'j' => Length::IntMaxT,
b'z' => Length::SizeT,
b't' => Length::PtfDiffT,
b'L' => Length::LongDouble,
_ => return None,
})
});
if new_length.is_some() {
*index += 1;
length = new_length;
} else {
break;
}
}
length
}
pub fn write<'a>(
&self,
writer: impl Write,
mut writer: impl Write,
mut args: impl ArgumentIter<'a>,
) -> Result<(), FormatError> {
match self {
&Spec::Char { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
write_padded(writer, args.get_char(), width, false, align_left)
Self::Char { width, align_left } => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
write_padded(writer, args.get_char(), width, false, *align_left)
}
&Spec::String {
Self::String {
width,
precision,
parse_escape,
align_left,
precision,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(precision, &mut args)?;
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
// GNU does do this truncation on a byte level, see for instance:
// printf "%.1s" 🙃
// > <20>
// For now, we let printf panic when we truncate within a code point.
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let precision = resolve_asterisk(*precision, &mut args)?;
let s = args.get_str();
if parse_escape {
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes()) {
match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {
// TODO: This should break the _entire execution_ of printf
break;
}
};
}
// GNU does do this truncation on a byte level, see for instance:
// printf "%.1s" 🙃
// > <20>
// For now, we let printf panic when we truncate within a code point.
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let truncated = match precision {
Some(p) if p < parsed.len() => &parsed[..p],
_ => &parsed,
};
write_padded(
writer,
std::str::from_utf8(&truncated).expect("TODO: Accept invalid utf8"),
width,
false,
align_left,
)
} else {
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
_ => s,
};
write_padded(writer, truncated, width, false, align_left)
}
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
_ => s,
};
write_padded(writer, truncated, width, false, *align_left)
}
&Spec::SignedInt {
Self::EscapedString => {
let s = args.get_str();
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes()) {
match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {
// TODO: This should break the _entire execution_ of printf
break;
}
};
}
writer.write_all(&parsed).map_err(FormatError::IoError)
}
Self::QuotedString => {
let s = args.get_str();
writer
.write_all(
escape_name(
s.as_ref(),
&QuotingStyle::Shell {
escape: true,
always_quote: false,
show_control: false,
},
)
.as_bytes(),
)
.map_err(FormatError::IoError)
}
Self::SignedInt {
width,
precision,
positive_sign,
alignment,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0);
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_i64();
num_format::SignedInt {
width,
precision,
positive_sign,
alignment,
positive_sign: *positive_sign,
alignment: *alignment,
}
.fmt(writer, i)
.map_err(FormatError::IoError)
}
&Spec::UnsignedInt {
Self::UnsignedInt {
variant,
width,
precision,
alignment,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0);
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_u64();
num_format::UnsignedInt {
variant,
variant: *variant,
precision,
width,
alignment,
alignment: *alignment,
}
.fmt(writer, i)
.map_err(FormatError::IoError)
}
&Spec::Float {
Self::Float {
variant,
case,
force_decimal,
@ -343,18 +380,18 @@ impl Spec {
alignment,
precision,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6);
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6);
let f = args.get_f64();
num_format::Float {
variant,
case,
force_decimal,
width,
positive_sign,
alignment,
precision,
variant: *variant,
case: *case,
force_decimal: *force_decimal,
positive_sign: *positive_sign,
alignment: *alignment,
}
.fmt(writer, f)
.map_err(FormatError::IoError)
@ -390,23 +427,26 @@ fn write_padded(
.map_err(FormatError::IoError)
}
fn eat_asterisk_or_number(rest: &mut &[u8]) -> Option<CanAsterisk<usize>> {
if let Some(b'*') = rest.get(0) {
*rest = &rest[1..];
fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option<CanAsterisk<usize>> {
if let Some(b'*') = rest.get(*index) {
*index += 1;
Some(CanAsterisk::Asterisk)
} else {
eat_number(rest).map(CanAsterisk::Fixed)
eat_number(rest, index).map(CanAsterisk::Fixed)
}
}
fn eat_number(rest: &mut &[u8]) -> Option<usize> {
match rest.iter().position(|b| !b.is_ascii_digit()) {
fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option<usize> {
match rest[*index..].iter().position(|b| !b.is_ascii_digit()) {
None | Some(0) => None,
Some(i) => {
// TODO: This might need to handle errors better
// For example in case of overflow.
let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap();
*rest = &rest[i..];
let parsed = std::str::from_utf8(&rest[*index..(*index + i)])
.unwrap()
.parse()
.unwrap();
*index += i;
Some(parsed)
}
}