From 7002571cf8448f89e8967643b14490a2d161301b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 May 2024 15:27:10 -0700 Subject: [PATCH] Add printf crate to workspace This adds a crate containing a new implementation of printf, ported from musl. This has some advantages: - locale support is direct instead of being "applied after". - No dependencies on libc printf. No unsafe code at all. - No more WideWrite - just uses std::fmt::Write. - Rounding is handled directly in all cases, instead of relying on Rust and/or libc. - No essential dependency on WString. - Supports %n. - Implementation is more likely to be correct since it's based on a widely used printf, instead of a low-traffic Rust crate. - Significantly faster. --- Cargo.lock | 14 +- Cargo.toml | 1 + printf/Cargo.toml | 8 + printf/src/arg.rs | 240 ++++++++++ printf/src/fmt_fp/decimal.rs | 309 +++++++++++++ printf/src/fmt_fp/mod.rs | 573 ++++++++++++++++++++++++ printf/src/fmt_fp/tests.rs | 289 ++++++++++++ printf/src/lib.rs | 84 ++++ printf/src/locale.rs | 207 +++++++++ printf/src/printf_impl.rs | 530 ++++++++++++++++++++++ printf/src/tests.rs | 845 +++++++++++++++++++++++++++++++++++ 11 files changed, 3097 insertions(+), 3 deletions(-) create mode 100644 printf/Cargo.toml create mode 100644 printf/src/arg.rs create mode 100644 printf/src/fmt_fp/decimal.rs create mode 100644 printf/src/fmt_fp/mod.rs create mode 100644 printf/src/fmt_fp/tests.rs create mode 100644 printf/src/lib.rs create mode 100644 printf/src/locale.rs create mode 100644 printf/src/printf_impl.rs create mode 100644 printf/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 637ba6906..9bcb00d7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -348,6 +348,14 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "printf" +version = "0.1.0" +dependencies = [ + "libc", + "widestring", +] + [[package]] name = "printf-compat" version = "0.1.1" @@ -488,9 +496,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.61" +version = "2.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9" +checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704" dependencies = [ "proc-macro2", "quote", @@ -646,5 +654,5 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] diff --git a/Cargo.toml b/Cargo.toml index 6f8263024..61f70668a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "printf" ] [workspace.package] diff --git a/printf/Cargo.toml b/printf/Cargo.toml new file mode 100644 index 000000000..76519815f --- /dev/null +++ b/printf/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "printf" +edition = "2021" +version = "0.1.0" + +[dependencies] +libc = "= 0.2.151" +widestring = "1.0.2" diff --git a/printf/src/arg.rs b/printf/src/arg.rs new file mode 100644 index 000000000..dcce21521 --- /dev/null +++ b/printf/src/arg.rs @@ -0,0 +1,240 @@ +use super::printf_impl::Error; +use std::result::Result; +use widestring::{Utf32Str as wstr, Utf32String as WString}; + +/// Printf argument types. +/// Note no implementation of ToArg constructs the owned variants (String and WString); +/// callers can do so explicitly. +#[derive(Debug, PartialEq)] +pub enum Arg<'a> { + Str(&'a str), + WStr(&'a wstr), + String(String), + WString(WString), + UInt(u64), + SInt(i64, u8), // signed integers track their width as the number of bits + Float(f64), + USizeRef(&'a mut usize), // for use with %n +} + +impl<'a> Arg<'a> { + pub fn set_count(&mut self, count: usize) -> Result<(), Error> { + match self { + Arg::USizeRef(p) => **p = count, + _ => return Err(Error::BadArgType), + } + Ok(()) + } + + // Convert this to a narrow string, using the provided storage if necessary. + pub fn as_str<'s>(&'s self, storage: &'s mut String) -> Result<&'s str, Error> + where + 'a: 's, + { + match self { + Arg::Str(s) => Ok(s), + Arg::String(s) => Ok(s), + Arg::WStr(s) => { + storage.clear(); + storage.extend(s.chars()); + Ok(storage) + } + Arg::WString(s) => { + storage.clear(); + storage.extend(s.chars()); + Ok(storage) + } + _ => Err(Error::BadArgType), + } + } + + // Return this value as an unsigned integer. Negative signed values will report overflow. + pub fn as_uint(&self) -> Result { + match *self { + Arg::UInt(u) => Ok(u), + Arg::SInt(i, _w) => i.try_into().map_err(|_| Error::Overflow), + _ => Err(Error::BadArgType), + } + } + + // Return this value as a signed integer. Unsigned values > i64::MAX will report overflow. + pub fn as_sint(&self) -> Result { + match *self { + Arg::UInt(u) => u.try_into().map_err(|_| Error::Overflow), + Arg::SInt(i, _w) => Ok(i), + _ => Err(Error::BadArgType), + } + } + + // If this is a signed value, then return the sign (true if negative) and the magnitude, + // masked to the value's width. This allows for e.g. -1 to be returned as 0xFF, 0xFFFF, etc. + // depending on the original width. + // If this is an unsigned value, simply return (false, u64). + pub fn as_wrapping_sint(&self) -> Result<(bool, u64), Error> { + match *self { + Arg::UInt(u) => Ok((false, u)), + Arg::SInt(i, w) => { + // Need to shift twice in case w is 64. + debug_assert!(w > 0); + let mask = ((1u64 << (w - 1)) << 1).wrapping_sub(1); + let ui = (i as u64) & mask; + Ok((i < 0, ui)) + } + _ => Err(Error::BadArgType), + } + } + + // Note we allow passing ints as floats, even allowing precision loss. + pub fn as_float(&self) -> Result { + #[allow(clippy::cast_precision_loss)] + match *self { + Arg::Float(f) => Ok(f), + Arg::UInt(u) => Ok(u as f64), + Arg::SInt(i, _w) => Ok(i as f64), + _ => Err(Error::BadArgType), + } + } + + pub fn as_char(&self) -> Result { + let v: u32 = self.as_uint()?.try_into().map_err(|_| Error::Overflow)?; + v.try_into().map_err(|_| Error::Overflow) + } +} + +/// Conversion from a raw value to a printf argument. +pub trait ToArg<'a> { + fn to_arg(self) -> Arg<'a>; +} + +impl<'a> ToArg<'a> for &'a str { + fn to_arg(self) -> Arg<'a> { + Arg::Str(self) + } +} + +impl<'a> ToArg<'a> for &'a String { + fn to_arg(self) -> Arg<'a> { + Arg::Str(self) + } +} + +impl<'a> ToArg<'a> for &'a wstr { + fn to_arg(self) -> Arg<'a> { + Arg::WStr(self) + } +} + +impl<'a> ToArg<'a> for &'a WString { + fn to_arg(self) -> Arg<'a> { + Arg::WStr(self) + } +} + +impl<'a> ToArg<'a> for f32 { + fn to_arg(self) -> Arg<'a> { + Arg::Float(self.into()) + } +} + +impl<'a> ToArg<'a> for f64 { + fn to_arg(self) -> Arg<'a> { + Arg::Float(self) + } +} + +impl<'a> ToArg<'a> for char { + fn to_arg(self) -> Arg<'a> { + Arg::UInt((self as u32).into()) + } +} + +impl<'a> ToArg<'a> for &'a mut usize { + fn to_arg(self) -> Arg<'a> { + Arg::USizeRef(self) + } +} + +impl<'a, T> ToArg<'a> for &'a *const T { + fn to_arg(self) -> Arg<'a> { + Arg::UInt((*self) as usize as u64) + } +} + +/// All signed types. +macro_rules! impl_to_arg { + ($($t:ty),*) => { + $( + impl<'a> ToArg<'a> for $t { + fn to_arg(self) -> Arg<'a> { + Arg::SInt(self as i64, <$t>::BITS as u8) + } + } + )* + }; +} +impl_to_arg!(i8, i16, i32, i64, isize); + +/// All unsigned types. +macro_rules! impl_to_arg_u { + ($($t:ty),*) => { + $( + impl<'a> ToArg<'a> for $t { + fn to_arg(self) -> Arg<'a> { + Arg::UInt(self as u64) + } + } + )* + }; +} +impl_to_arg_u!(u8, u16, u32, u64, usize); + +#[cfg(test)] +mod tests { + use super::*; + use widestring::utf32str; + + #[test] + fn test_to_arg() { + const SIZE_WIDTH: u8 = isize::BITS as u8; + + assert!(matches!("test".to_arg(), Arg::Str("test"))); + assert!(matches!(String::from("test").to_arg(), Arg::Str(_))); + assert!(matches!(utf32str!("test").to_arg(), Arg::WStr(_))); + assert!(matches!(WString::from("test").to_arg(), Arg::WStr(_))); + assert!(matches!(42f32.to_arg(), Arg::Float(_))); + assert!(matches!(42f64.to_arg(), Arg::Float(_))); + assert!(matches!('x'.to_arg(), Arg::UInt(120))); + let mut usize_val: usize = 0; + assert!(matches!((&mut usize_val).to_arg(), Arg::USizeRef(_))); + assert!(matches!(42i8.to_arg(), Arg::SInt(42, 8))); + assert!(matches!(42i16.to_arg(), Arg::SInt(42, 16))); + assert!(matches!(42i32.to_arg(), Arg::SInt(42, 32))); + assert!(matches!(42i64.to_arg(), Arg::SInt(42, 64))); + assert!(matches!(42isize.to_arg(), Arg::SInt(42, SIZE_WIDTH))); + + assert_eq!((-42i8).to_arg(), Arg::SInt(-42, 8)); + assert_eq!((-42i16).to_arg(), Arg::SInt(-42, 16)); + assert_eq!((-42i32).to_arg(), Arg::SInt(-42, 32)); + assert_eq!((-42i64).to_arg(), Arg::SInt(-42, 64)); + assert_eq!((-42isize).to_arg(), Arg::SInt(-42, SIZE_WIDTH)); + + assert!(matches!(42u8.to_arg(), Arg::UInt(42))); + assert!(matches!(42u16.to_arg(), Arg::UInt(42))); + assert!(matches!(42u32.to_arg(), Arg::UInt(42))); + assert!(matches!(42u64.to_arg(), Arg::UInt(42))); + assert!(matches!(42usize.to_arg(), Arg::UInt(42))); + + let ptr = &42f32 as *const f32; + assert!(matches!(ptr.to_arg(), Arg::UInt(_))); + } + + #[test] + fn test_negative_to_arg() { + assert_eq!((-1_i8).to_arg().as_sint(), Ok(-1)); + assert_eq!((-1_i16).to_arg().as_sint(), Ok(-1)); + assert_eq!((-1_i32).to_arg().as_sint(), Ok(-1)); + assert_eq!((-1_i64).to_arg().as_sint(), Ok(-1)); + + assert_eq!((u64::MAX).to_arg().as_sint(), Err(Error::Overflow)); + } +} diff --git a/printf/src/fmt_fp/decimal.rs b/printf/src/fmt_fp/decimal.rs new file mode 100644 index 000000000..23123454f --- /dev/null +++ b/printf/src/fmt_fp/decimal.rs @@ -0,0 +1,309 @@ +use super::{frexp, log10u}; +use std::collections::VecDeque; + +// A module which represents a floating point value using base 1e9 digits, +// and tracks the radix point. + +// We represent floating point values in base 1e9. +pub const DIGIT_WIDTH: usize = 9; +pub const DIGIT_BASE: u32 = 1_000_000_000; + +// log2(1e9) = 29.9, so store 29 binary digits per base 1e9 decimal digit. +pub const BITS_PER_DIGIT: usize = 29; + +// Returns n/d and n%d, rounding towards negative infinity. +#[inline] +pub fn divmod_floor(n: i32, d: i32) -> (i32, i32) { + (n.div_euclid(d), n.rem_euclid(d)) +} + +// Helper to limit excess precision in our decimal representation. +// Do not compute more digits (in our base) than needed. +#[derive(Debug, Clone, Copy)] +pub enum DigitLimit { + Total(usize), + Fractional(usize), +} + +// A struct representing an array of digits in base 1e9, along with the offset from the +// first digit to the least significant digit before the decimal, and the sign. +#[derive(Debug)] +pub struct Decimal { + // The list of digits, in our base. + pub digits: VecDeque, + + // The offset from first digit to least significant digit before the decimal. + // Possibly negative! + pub radix: i32, + + // Whether our initial value was negative. + pub negative: bool, +} + +impl Decimal { + // Construct a Decimal from a floating point number. + // The number must be finite. + pub fn new(y: f64, limit: DigitLimit) -> Self { + debug_assert!(y.is_finite()); + let negative = y.is_sign_negative(); + + // Break the number into exponent and and mantissa. + // Normalize mantissa to a single leading digit, if nonzero. + let (mut y, mut e2) = frexp(y.abs()); + if y != 0.0 { + y *= (1 << BITS_PER_DIGIT) as f64; + e2 -= BITS_PER_DIGIT as i32; + } + + // Express the mantissa as a decimal string in our base. + let mut digits = Vec::new(); + while y != 0.0 { + debug_assert!(y >= 0.0 && y < DIGIT_BASE as f64); + let digit = y as u32; + digits.push(digit); + y = (DIGIT_BASE as f64) * (y - digit as f64); + } + + // Construct ourselves and apply our exponent. + let mut decimal = Decimal { + digits: digits.into(), + radix: 0, + negative, + }; + if e2 >= 0 { + decimal.shift_left(e2 as usize); + } else { + decimal.shift_right(-e2 as usize, limit); + } + decimal + } + + // Push a digit to the beginning, preserving the radix point. + pub fn push_front(&mut self, digit: u32) { + self.digits.push_front(digit); + self.radix += 1; + } + + // Push a digit to the end, preserving the radix point. + pub fn push_back(&mut self, digit: u32) { + self.digits.push_back(digit); + } + + // Return the least significant digit. + pub fn last(&self) -> Option { + self.digits.back().copied() + } + + // Return the most significant digit. + pub fn first(&self) -> Option { + self.digits.front().copied() + } + + // Shift left by a power of 2. + pub fn shift_left(&mut self, mut amt: usize) { + while amt > 0 { + let sh = amt.min(BITS_PER_DIGIT); + let mut carry: u32 = 0; + for digit in self.digits.iter_mut().rev() { + let nd = ((*digit as u64) << sh) + carry as u64; + *digit = (nd % DIGIT_BASE as u64) as u32; + carry = (nd / DIGIT_BASE as u64) as u32; + } + if carry != 0 { + self.push_front(carry); + } + self.trim_trailing_zeros(); + amt -= sh; + } + } + + // Shift right by a power of 2, limiting the precision. + pub fn shift_right(&mut self, mut amt: usize, limit: DigitLimit) { + // Divide by 2^sh, moving left to right. + // Do no more than DIGIT_WIDTH at a time because that is the largest + // power of 2 that divides DIGIT_BASE; therefore DIGIT_BASE >> sh + // is always exact. + while amt > 0 { + let sh = amt.min(DIGIT_WIDTH); + let mut carry: u32 = 0; + // It is significantly faster to iterate over the two slices of the deque + // than the deque itself. + let (s1, s2) = self.digits.as_mut_slices(); + for digit in s1.iter_mut() { + let remainder = *digit & ((1 << sh) - 1); // digit % 2^sh + *digit = (*digit >> sh) + carry; + carry = (DIGIT_BASE >> sh) * remainder; + } + for digit in s2.iter_mut() { + let remainder = *digit & ((1 << sh) - 1); // digit % 2^sh + *digit = (*digit >> sh) + carry; + carry = (DIGIT_BASE >> sh) * remainder; + } + self.trim_leading_zeros(); + if carry != 0 { + self.push_back(carry); + } + amt -= sh; + // Truncate if we have computed more than we need. + match limit { + DigitLimit::Total(n) => { + self.digits.truncate(n); + } + DigitLimit::Fractional(n) => { + let current = (self.digits.len() as i32 - self.radix - 1).max(0) as usize; + let to_trunc = current.saturating_sub(n); + self.digits + .truncate(self.digits.len().saturating_sub(to_trunc)); + } + } + } + } + + // Return the length as an i32. + pub fn len_i32(&self) -> i32 { + self.digits.len() as i32 + } + + // Compute the exponent, base 10. + pub fn exponent(&self) -> i32 { + let Some(first_digit) = self.first() else { + return 0; + }; + self.radix * (DIGIT_WIDTH as i32) + log10u(first_digit) + } + + // Compute the number of fractional digits - possibly negative. + pub fn fractional_digit_count(&self) -> i32 { + (DIGIT_WIDTH as i32) * (self.digits.len() as i32 - self.radix - 1) + } + + // Trim leading zeros. + fn trim_leading_zeros(&mut self) { + while self.digits.front() == Some(&0) { + self.digits.pop_front(); + self.radix -= 1; + } + } + + // Trim trailing zeros. + fn trim_trailing_zeros(&mut self) { + while self.digits.iter().last() == Some(&0) { + self.digits.pop_back(); + } + } + + // Round to a given number of fractional digits (possibly negative). + pub fn round_to_fractional_digits(&mut self, desired_frac_digits: i32) { + let frac_digit_count = self.fractional_digit_count(); + if desired_frac_digits >= frac_digit_count { + return; + } + let (quot, rem) = divmod_floor(desired_frac_digits, DIGIT_WIDTH as i32); + // Find the index of the last digit to keep. + let mut last_digit_idx = self.radix + 1 + quot; + + // If desired_frac_digits is small, and we are very small, then last_digit_idx may be negative. + while last_digit_idx < 0 { + self.push_front(0); + last_digit_idx += 1; + } + + // Now we have the index of the digit - figure out how much of the digit to keep. + // If 'rem' is 0 then we keep all of it; if 'rem' is 8 then we keep only the most + // significant power of 10 (mod by 10**8). + debug_assert!(DIGIT_WIDTH as i32 > rem); + let mod_base = 10u32.pow((DIGIT_WIDTH as i32 - rem) as u32); + debug_assert!(mod_base <= DIGIT_BASE); + + let remainder_to_round = self[last_digit_idx] % mod_base; + self[last_digit_idx] -= remainder_to_round; + + // Round up if necessary. + if self.should_round_up(last_digit_idx, remainder_to_round, mod_base) { + self[last_digit_idx] += mod_base; + // Propogate carry. + while self[last_digit_idx] >= DIGIT_BASE { + self[last_digit_idx] = 0; + last_digit_idx -= 1; + if last_digit_idx < 0 { + self.push_front(0); + last_digit_idx = 0; + } + self[last_digit_idx] += 1; + } + } + self.digits.truncate(last_digit_idx as usize + 1); + self.trim_trailing_zeros(); + } + + // We are about to round ourself such that digit_idx is the last digit, + // with mod_base being a power of 10 such that we round off self[digit_idx] % mod_base, + // which is given by remainder (that is, remainder = self[digit_idx] % mod_base). + // Return true if we should round up (in magnitude), as determined by the floating point + // rounding mode. + #[inline] + fn should_round_up(&self, digit_idx: i32, remainder: u32, mod_base: u32) -> bool { + if remainder == 0 && digit_idx + 1 == self.len_i32() { + // No remaining digits. + return false; + } + + // 'round' is the first float such that 'round + 1.0' is not representable. + // We will add a value to it and see whether it rounds up or down, thus + // matching the fp rounding mode. + let mut round = 2.0_f64.powi(f64::MANTISSA_DIGITS as i32); + + // In the likely event that the fp rounding mode is FE_TONEAREST, then ties are rounded to + // the nearest value with a least significant digit of 0. Ensure 'round's least significant + // bit agrees with whether our rounding digit is odd. + let rounding_digit = if mod_base < DIGIT_BASE { + self[digit_idx] / mod_base + } else if digit_idx > 0 { + self[digit_idx - 1] + } else { + 0 + }; + if rounding_digit & 1 != 0 { + round += 2.0; + // round now has an odd lsb (though round itself is even). + debug_assert!(round.to_bits() & 1 != 0); + } + + // Set 'small' to a value which is less than halfway, exactly halfway, or more than halfway + // between round and the next representable float (which is round + 2.0). + let mut small = if remainder < mod_base / 2 { + 0.5 + } else if remainder == mod_base / 2 && digit_idx + 1 == self.len_i32() { + 1.0 + } else { + 1.5 + }; + + // If the initial value was negative, then negate round and small, thus respecting FE_UPWARD / FE_DOWNWARD. + if self.negative { + round = -round; + small = -small; + } + + // Round up if round + small increases (in magnitude). + round + small != round + } +} + +// Index, with i32. +impl std::ops::Index for Decimal { + type Output = u32; + + fn index(&self, index: i32) -> &Self::Output { + assert!(index >= 0); + &self.digits[index as usize] + } +} + +// IndexMut, with i32. +impl std::ops::IndexMut for Decimal { + fn index_mut(&mut self, index: i32) -> &mut Self::Output { + assert!(index >= 0); + &mut self.digits[index as usize] + } +} diff --git a/printf/src/fmt_fp/mod.rs b/printf/src/fmt_fp/mod.rs new file mode 100644 index 000000000..c6c1ddb21 --- /dev/null +++ b/printf/src/fmt_fp/mod.rs @@ -0,0 +1,573 @@ +mod decimal; +#[cfg(test)] +mod tests; + +use super::locale::Locale; +use super::printf_impl::{pad, ConversionSpec, Error, ModifierFlags}; +use decimal::{Decimal, DigitLimit, DIGIT_WIDTH}; +use std::cmp::min; +use std::fmt::Write; + +// Number of binary digits in the mantissa, including any implicit 1. +const MANTISSA_BITS: usize = f64::MANTISSA_DIGITS as usize; + +// Break a floating point number into a normalized fraction and a power of 2. +// The fraction's magnitude will either be 0, or in the range [1/2, 1). +// We have value = frac * 2^exp. +fn frexp(x: f64) -> (f64, i32) { + const EXPLICIT_MANTISSA_BITS: i32 = MANTISSA_BITS as i32 - 1; + const EXPONENT_BIAS: i32 = 1023; + let mut i = x.to_bits(); + let ee = ((i >> EXPLICIT_MANTISSA_BITS) & 0x7ff) as i32; // exponent + if ee == 0 { + if x == 0.0 { + (x, 0) + } else { + // Subnormal. Scale up. + let (x, e) = frexp(x * 2.0f64.powi(64)); + (x, e - 64) + } + } else if ee == 0x7ff { + // Inf or NaN. + (x, 0) + } else { + // Normal. + // The mantissa is conceptually in the range [1, 2), but we want to + // return it in the range [1/2, 1); remove the exponent bias but increase the + // exponent by 1. + let e = ee - (EXPONENT_BIAS - 1); + // Set the exponent to -1, so we are in the range [1/2, 1). + i &= 0x800fffffffffffff; + i |= (EXPONENT_BIAS as u64 - 1) << EXPLICIT_MANTISSA_BITS; + (f64::from_bits(i), e) + } +} + +// Return floor of log base 10 of an unsigned value. +// The log base 10 of 0 is treated as 0, for convenience. +fn log10u(x: u32) -> i32 { + if x >= 1_000_000_000 { + return 9; + } + let mut result = 0; + let mut prod = 10; + while prod <= x { + result += 1; + prod *= 10; + } + result +} + +// Returns the number of trailing decimal zeros in the given value. +// If the value is 0, return 9. +fn trailing_decimal_zeros(mut d: u32) -> i32 { + if d == 0 { + return 9; + } + let mut zeros = 0; + while d % 10 == 0 { + zeros += 1; + d /= 10; + } + zeros +} + +/// A helper type to store common formatting parameters. +struct FormatParams<'a, W: Write> { + // The receiver of formatted output. + f: &'a mut W, + + // Width of the output. + width: usize, + + // Precision of the output. This defaults to 6. + prec: usize, + + // Whether the precision was explicitly set. + had_prec: bool, + + // Flags to control formatting options. + flags: ModifierFlags, + + // The locale to apply. + locale: &'a Locale, + + // The initial prefix such as sign or space. Not used for hex. + prefix: &'static str, + + // Whether our conversion specifier was lowercase. + lower: bool, + + // A buffer to use for temporary storage. + buf: &'a mut String, +} + +/// Formats a floating-point number `y` into a provided writer `f` with specified formatting options. +/// +/// # Parameters +/// - `f`: The receiver of formatted output. +/// - `y`: The value to format. +/// - `width`: The minimum width of the formatted string. If the result is shorter, it will be padded. +/// - `prec`: The precision, i.e., the number of digits after the decimal point, or None if not given. +/// - `flags`: ModifierFlags to control formatting options. +/// - `locale`: The locale. +/// - `conv_spec`: The type of formatting : 'e', 'f', 'g', 'a', 'E', 'F', 'G', 'A'. +/// - `buf`: A buffer to use for temporary storage. +/// +/// # Returns +/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`. +#[allow(clippy::too_many_arguments)] +pub(crate) fn format_float( + f: &mut impl Write, + y: f64, + width: usize, + prec: Option, + flags: ModifierFlags, + locale: &Locale, + conv_spec: ConversionSpec, + buf: &mut String, +) -> Result { + // Only float conversions are expected. + type CS = ConversionSpec; + debug_assert!(matches!( + conv_spec, + CS::e | CS::E | CS::f | CS::F | CS::g | CS::G | CS::a | CS::A + )); + let prefix = match (y.is_sign_negative(), flags.mark_pos, flags.pad_pos) { + (true, _, _) => "-", + (false, true, _) => "+", + (false, false, true) => " ", + (false, false, false) => "", + }; + + // "If the precision is missing, it is taken as 6" (except for %a and %A, which care about a missing precision). + let had_prec = prec.is_some(); + let prec = prec.unwrap_or(6); + + let params = FormatParams { + f, + width, + prec, + had_prec, + flags, + locale, + prefix, + lower: conv_spec.is_lower(), + buf, + }; + + // Handle infinities and NaNs. + if !y.is_finite() { + return format_nonfinite(y, params); + } + + // Handle hex formatting. + if matches!(conv_spec, CS::a | CS::A) { + return format_a(y, params); + } + + // As an optimization, allow the precision to limit the number of digits we compute. + // Count this as number of desired decimal digits, converted to our base, rounded up, +1 for + // rounding off. + // For 'f'/'F', precision is after the decimal; for others it is total number of digits. + let prec_limit = match conv_spec { + CS::f | CS::F => DigitLimit::Fractional(prec / DIGIT_WIDTH + 2), + _ => DigitLimit::Total(prec / DIGIT_WIDTH + 2), + }; + + // Construct our digits. + let mut decimal = Decimal::new(y, prec_limit); + + // Compute the number of desired fractional digits - possibly negative. + let mut desired_frac_digits: i32 = prec.try_into().map_err(|_| Error::Overflow)?; + if matches!(conv_spec, CS::e | CS::E | CS::g | CS::G) { + // For 'e' and 'E', the precision is the number of digits after the decimal point. + // We are going to divide by 10^e, so adjust desired_frac_digits accordingly. + // Note that e10 may be negative, so guard against overflow in the positive direction. + let e10 = decimal.exponent(); + desired_frac_digits = desired_frac_digits.saturating_sub(e10); + } + if matches!(conv_spec, CS::g | CS::G) && prec != 0 { + desired_frac_digits -= 1; + } + decimal.round_to_fractional_digits(desired_frac_digits); + + match conv_spec { + CS::e | CS::E => format_e_f(&mut decimal, params, true), + CS::f | CS::F => format_e_f(&mut decimal, params, false), + CS::g | CS::G => format_g(&mut decimal, params), + _ => unreachable!(), + } +} + +// Format a non-finite float. +fn format_nonfinite(y: f64, params: FormatParams<'_, impl Write>) -> Result { + let FormatParams { + f, + width, + flags, + prefix, + lower, + .. + } = params; + let s = match (y.is_nan(), lower) { + (true, true) => "nan", + (true, false) => "NAN", + (false, true) => "inf", + (false, false) => "INF", + }; + let unpadded_width = s.len() + prefix.len(); + if !flags.left_adj { + pad(f, ' ', width, unpadded_width)?; + } + f.write_str(prefix)?; + f.write_str(s)?; + if flags.left_adj { + pad(f, ' ', width, unpadded_width)?; + } + Ok(width.max(unpadded_width)) +} + +/// Formats a floating-point number `y` as hex (%a/%A). +/// +/// # Parameters +/// - `y`: The value to format. This is always finite. +/// - `params`: Params controlling formatting. +/// +/// # Returns +/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`. +fn format_a(mut y: f64, params: FormatParams<'_, impl Write>) -> Result { + debug_assert!(y.is_finite()); + let negative = y.is_sign_negative(); + y = y.abs(); + + let FormatParams { + f, + width, + had_prec, + prec, + flags, + locale, + lower, + buf, + .. + } = params; + + let (mut y, mut e2) = frexp(y); + + // normalize to range [1, 2), or 0.0. + if y != 0.0 { + y *= 2.0; + e2 -= 1; + } + + let prefix = if lower { + match (negative, flags.mark_pos, flags.pad_pos) { + (true, _, _) => "-0x", + (false, true, _) => "+0x", + (false, false, true) => " 0x", + (false, false, false) => "0x", + } + } else { + match (negative, flags.mark_pos, flags.pad_pos) { + (true, _, _) => "-0X", + (false, true, _) => "+0X", + (false, false, true) => " 0X", + (false, false, false) => "0X", + } + }; + + // Compute the number of hex digits in the mantissa after the decimal. + // -1 for leading 1 bit (we are to the range [1, 2)), then divide by 4, rounding up. + const MANTISSA_HEX_DIGITS: usize = (MANTISSA_BITS - 1 + 3) / 4; + if had_prec && prec < MANTISSA_HEX_DIGITS { + // Decide how many least-significant bits to round off the mantissa. + let desired_bits = prec * 4; + let bits_to_round = MANTISSA_BITS - 1 - desired_bits; + debug_assert!(bits_to_round > 0 && bits_to_round < MANTISSA_BITS); + let round = 2.0f64.powi(bits_to_round as i32); + if negative { + y = -y; + y -= round; + y += round; + y = -y; + } else { + y += round; + y -= round; + } + } + let estr = format!( + "{}{}{}", + if lower { 'p' } else { 'P' }, + if e2 < 0 { '-' } else { '+' }, + e2.unsigned_abs() + ); + + let xdigits: &[u8; 16] = if lower { + b"0123456789abcdef" + } else { + b"0123456789ABCDEF" + }; + let body = buf; + loop { + let x = y as i32; + body.push(xdigits[x as usize] as char); + y = 16.0 * (y - (x as f64)); + if body.len() == 1 && (y != 0.0 || (had_prec && prec > 0) || flags.alt_form) { + body.push(locale.decimal_point); + } + if y == 0.0 { + break; + } + } + + let mut body_exp_len = body.len() + estr.len(); + if had_prec && prec > 0 { + // +2 for leading digit and decimal. + let len_with_prec = prec.checked_add(2 + estr.len()).ok_or(Error::Overflow)?; + body_exp_len = body_exp_len.max(len_with_prec); + } + + let prefix_len = prefix.len(); + let unpadded_width = prefix_len + .checked_add(body_exp_len) + .ok_or(Error::Overflow)?; + + // Pad on the left with spaces to the desired width? + if !flags.left_adj && !flags.zero_pad { + pad(f, ' ', width, unpadded_width)?; + } + + // Output any prefix. + f.write_str(prefix)?; + + // Pad after the prefix with zeros to the desired width? + if !flags.left_adj && flags.zero_pad { + pad(f, '0', width, unpadded_width)?; + } + + // Output the actual value. + f.write_str(body)?; + + // Pad the body with zeros on the right (reflecting precision)? + pad(f, '0', body_exp_len - estr.len() - body.len(), 0)?; + + // Output the exponent. + f.write_str(&estr)?; + + // Pad on the right with spaces to the desired width? + if flags.left_adj { + pad(f, ' ', width, prefix_len + body_exp_len)?; + } + Ok(width.max(unpadded_width)) +} + +/// Formats a floating-point number in formats %e/%E/%f/%F. +/// +/// # Parameters +/// - `digits`: The extracted digits of the value. +/// - `params`: Params controlling formatting. +/// - `is_e`: If true, the conversion specifier is 'e' or 'E', otherwise 'f' or 'F'. +fn format_e_f( + decimal: &mut Decimal, + params: FormatParams<'_, impl Write>, + is_e: bool, +) -> Result { + let FormatParams { + f, + width, + prec, + flags, + locale, + prefix, + lower, + buf, + .. + } = params; + + // Exponent base 10. + let e10 = decimal.exponent(); + + // Compute an exponent string for 'e' / 'E'. + let estr = if is_e { + // "The exponent always contains at least two digits." + let sign = if e10 < 0 { '-' } else { '+' }; + let e = if lower { 'e' } else { 'E' }; + format!("{}{}{:02}", e, sign, e10.unsigned_abs()) + } else { + // No exponent for 'f' / 'F'. + String::new() + }; + + // Compute the body length. + // For 'f' / 'F' formats, the precision is after the decimal point, so a positive exponent + // will increase the body length. We also must consider insertion of separators. + // Note the body length must be correct, as it is used to compute the width. + let integer_len = if is_e { + 1 + } else { + let mut len = 1 + e10.max(0) as usize; + if flags.grouped { + len += locale.separator_count(len); + } + len + }; + let decimal_len = if prec > 0 || flags.alt_form { 1 } else { 0 }; + let body_len = integer_len + decimal_len + prec + estr.len(); + + let prefix_len = prefix.len(); + // Emit the prefix and any padding. + if !flags.left_adj && !flags.zero_pad { + pad(f, ' ', width, prefix_len + body_len)?; + } + f.write_str(prefix)?; + if !flags.left_adj && flags.zero_pad { + pad(f, '0', width, prefix_len + body_len)?; + } + + if is_e { + format_mantissa_e(decimal, prec, flags, locale, f, buf)?; + // Emit the exponent. + f.write_str(&estr)?; + } else { + format_mantissa_f(decimal, prec, flags, locale, f, buf)?; + } + if flags.left_adj && !flags.zero_pad { + pad(f, ' ', width, prefix_len + body_len)?; + } + Ok(width.max(prefix_len + body_len)) +} + +/// Formats a floating point number in "g" / "G" form. +/// +/// # Parameters +/// - `digits`: The extracted digits of the value. +/// - `params`: Params controlling formatting. +fn format_g( + decimal: &mut Decimal, + mut params: FormatParams<'_, impl Write>, +) -> Result { + // "If the precision is zero, it is treated as 1." + params.prec = params.prec.max(1); + + // "Style e is used if the exponent from its conversion is less than -4 or greater than or equal to the precision." + let use_style_e; + let e10 = decimal.exponent(); + let e10mag = e10.unsigned_abs() as usize; + if e10 < -4 || (e10 >= 0 && e10mag >= params.prec) { + use_style_e = true; + params.prec -= 1; + } else { + use_style_e = false; + params.prec -= 1; + // prec -= e10. Overflow is impossible since prec <= i32::MAX. + params.prec = if e10 < 0 { + params.prec.checked_add(e10mag).unwrap() + } else { + params.prec.checked_sub(e10mag).unwrap() + }; + } + if !params.flags.alt_form { + // Count trailing zeros in last place. + let trailing_zeros = trailing_decimal_zeros(decimal.last().unwrap_or(0)); + let mut computed_prec = decimal.fractional_digit_count() - trailing_zeros; + if use_style_e { + computed_prec += e10; + } + params.prec = params.prec.min(computed_prec.max(0) as usize); + } + format_e_f(decimal, params, use_style_e) +} + +// Helper to format the mantissa of a floating point number in "e" / "E" form. +fn format_mantissa_e( + decimal: &Decimal, + prec: usize, + flags: ModifierFlags, + locale: &Locale, + f: &mut impl Write, + buf: &mut String, +) -> Result<(), Error> { + let mut prec_left = prec; + // The decimal may be empty, so ensure we loop at least once. + for d in 0..decimal.len_i32().max(1) { + let digit = if d < decimal.len_i32() { decimal[d] } else { 0 }; + let min_width = if d > 0 { DIGIT_WIDTH } else { 1 }; + buf.clear(); + write!(buf, "{:0width$}", digit, width = min_width)?; + let mut s = buf.as_str(); + if d == 0 { + // First digit. Emit it, and likely also a decimal point. + f.write_str(&s[..1])?; + s = &s[1..]; + if prec_left > 0 || flags.alt_form { + f.write_char(locale.decimal_point)?; + } + } + let outlen = s.len().min(prec_left); + f.write_str(&s[..outlen])?; + prec_left -= outlen; + if prec_left == 0 { + break; + } + } + // Emit trailing zeros for excess precision. + pad(f, '0', prec_left, 0)?; + Ok(()) +} + +// Helper to format the mantissa of a floating point number in "f" / "F" form. +fn format_mantissa_f( + decimal: &mut Decimal, + prec: usize, + flags: ModifierFlags, + locale: &Locale, + f: &mut impl Write, + buf: &mut String, +) -> Result<(), Error> { + // %f conversions (almost) always have at least one digit before the decimal, + // so ensure that the radix is not-negative and the decimal covers the radix. + while decimal.radix < 0 { + decimal.push_front(0); + } + while decimal.len_i32() <= decimal.radix { + decimal.push_back(0); + } + + // Emit digits before the decimal. + // We may need thousands grouping here (but for no other floating point types). + let do_grouping = flags.grouped && locale.thousands_sep.is_some(); + for d in 0..=decimal.radix { + let min_width = if d > 0 { DIGIT_WIDTH } else { 1 }; + if do_grouping { + // Emit into our buffer so we can later apply thousands grouping. + write!(buf, "{:0width$}", decimal[d], width = min_width)?; + } else { + // Write digits directly. + write!(f, "{:0width$}", decimal[d], width = min_width)?; + } + } + if do_grouping { + f.write_str(&locale.apply_grouping(buf))?; + } + + // Emit decimal point. + if prec != 0 || flags.alt_form { + f.write_char(locale.decimal_point)?; + } + // Emit prec digits after the decimal, stopping if we run out. + let mut prec_left: usize = prec; + for d in (decimal.radix + 1)..decimal.len_i32() { + if prec_left == 0 { + break; + } + let max_digits = min(DIGIT_WIDTH, prec_left); + buf.clear(); + write!(buf, "{:0width$}", decimal[d], width = DIGIT_WIDTH)?; + f.write_str(&buf[..max_digits])?; + prec_left -= max_digits; + } + // Emit trailing zeros for excess precision. + pad(f, '0', prec_left, 0)?; + Ok(()) +} diff --git a/printf/src/fmt_fp/tests.rs b/printf/src/fmt_fp/tests.rs new file mode 100644 index 000000000..b02b8ea24 --- /dev/null +++ b/printf/src/fmt_fp/tests.rs @@ -0,0 +1,289 @@ +use super::*; +use decimal::*; +use std::collections::VecDeque; + +#[test] +fn test_frexp() { + // Note f64::MIN_POSITIVE is normalized - we want denormal. + let min_pos_denormal = f64::from_bits(1); + let min_neg_denormal = -min_pos_denormal; + let cases = vec![ + (0.0, (0.0, 0)), + (-0.0, (-0.0, 0)), + (1.0, (0.5, 1)), + (-1.0, (-0.5, 1)), + (2.5, (0.625, 2)), + (-2.5, (-0.625, 2)), + (1024.0, (0.5, 11)), + (f64::MAX, (0.9999999999999999, 1024)), + (-f64::MAX, (-0.9999999999999999, 1024)), + (f64::INFINITY, (f64::INFINITY, 0)), + (f64::NEG_INFINITY, (f64::NEG_INFINITY, 0)), + (f64::NAN, (f64::NAN, 0)), + (min_pos_denormal, (0.5, -1073)), + (min_neg_denormal, (-0.5, -1073)), + ]; + + for (x, (want_frac, want_exp)) in cases { + let (frac, exp) = frexp(x); + if x.is_nan() { + assert!(frac.is_nan()); + continue; + } + assert_eq!(frac, want_frac); + assert_eq!(frac.is_sign_negative(), want_frac.is_sign_negative()); + assert_eq!(exp, want_exp); + } +} + +#[test] +fn test_log10u() { + assert_eq!(log10u(0), 0); + assert_eq!(log10u(1), 0); + assert_eq!(log10u(5), 0); + assert_eq!(log10u(9), 0); + assert_eq!(log10u(10), 1); + assert_eq!(log10u(500), 2); + assert_eq!(log10u(6000), 3); + assert_eq!(log10u(9999), 3); + assert_eq!(log10u(70000), 4); + assert_eq!(log10u(70001), 4); + assert_eq!(log10u(900000), 5); + assert_eq!(log10u(3000000), 6); + assert_eq!(log10u(50000000), 7); + assert_eq!(log10u(100000000), 8); + assert_eq!(log10u(1840683745), 9); + assert_eq!(log10u(4000000000), 9); + assert_eq!(log10u(u32::MAX), 9); +} + +#[test] +fn test_div_floor() { + for numer in -100..100 { + for denom in 1..100 { + let (q, r) = divmod_floor(numer, denom); + assert!(r >= 0, "Remainder should be non-negative"); + assert!(r < denom.abs(), "Remainder should be less than divisor"); + assert_eq!(numer, q * denom + r, "Quotient should be exact"); + } + } + assert_eq!(divmod_floor(i32::MIN, 1), (i32::MIN, 0)); + assert_eq!(divmod_floor(i32::MIN, i32::MAX), (-2, i32::MAX - 1)); + assert_eq!(divmod_floor(i32::MAX, i32::MAX), (1, 0)); +} + +#[test] +fn test_digits_new() { + let unlimit = DigitLimit::Total(usize::MAX); + let mut decimal = Decimal::new(0.0, unlimit); + assert_eq!(decimal.digits, &[]); + assert_eq!(decimal.radix, 0); + + decimal = Decimal::new(1.0, unlimit); + assert_eq!(decimal.digits, &[1]); + assert_eq!(decimal.radix, 0); + + decimal = Decimal::new(0.5, unlimit); + assert_eq!(decimal.digits, &[500_000_000]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(0.25, unlimit); + assert_eq!(decimal.digits, &[250_000_000]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0, unlimit); + assert_eq!(decimal.digits, &[2]); + assert_eq!(decimal.radix, 0); + + decimal = Decimal::new(1_234_567_890.5, unlimit); + assert_eq!(decimal.digits, &[1, 234_567_890, 500_000_000]); + assert_eq!(decimal.radix, 1); + + decimal = Decimal::new(12_345_678_901.0, unlimit); + assert_eq!(decimal.digits, &[12, 345_678_901]); + assert_eq!(decimal.radix, 1); + + decimal = Decimal::new(2.0_f64.powi(-1), unlimit); + assert_eq!(decimal.digits, &[500_000_000]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0_f64.powi(-2), unlimit); + assert_eq!(decimal.digits, &[250_000_000]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0_f64.powi(-4), unlimit); + assert_eq!(decimal.digits, &[62_500_000]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0_f64.powi(-8), unlimit); + assert_eq!(decimal.digits, &[3_906_250]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0_f64.powi(-16), unlimit); + assert_eq!(decimal.digits, &[15_258, 789_062_500]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal::new(2.0_f64.powi(-64), unlimit); + assert_eq!( + decimal.digits, + &[ + 54_210_108, + 624_275_221, + 700_372_640, + 43_497_085, + 571_289_062, + 500_000_000 + ] + ); + assert_eq!(decimal.radix, -3); + + assert!(!Decimal::new(1.0, unlimit).negative); + assert!(Decimal::new(-1.0, unlimit).negative); + assert!(!Decimal::new(0.0, unlimit).negative); + assert!(Decimal::new(-0.0, unlimit).negative); +} + +#[test] +fn test_shift_left() { + // No carry. + let mut decimal = Decimal { + digits: VecDeque::from(vec![1, 2]), + radix: 0, + negative: false, + }; + decimal.shift_left(1); + assert_eq!(decimal.digits, &[2, 4]); + assert_eq!(decimal.radix, 0); + + // Simple carry. Trailing zeros are trimmed. + let mut decimal = Decimal { + digits: VecDeque::from(vec![500_000_000, 500_000_000]), + radix: 0, + negative: false, + }; + decimal.shift_left(1); + assert_eq!(decimal.digits, &[1, 1]); + assert_eq!(decimal.radix, 1); + + // Big carry. + // 1 << 100 == 1267650600228229401496703205376 + let mut decimal = Decimal { + digits: VecDeque::from(vec![1]), + radix: 0, + negative: false, + }; + decimal.shift_left(100); + assert_eq!( + decimal.digits, + &[1267, 650_600_228, 229_401_496, 703_205_376] + ); + assert_eq!(decimal.radix, 3); +} + +#[test] +fn test_shift_right() { + let unlimit = DigitLimit::Total(usize::MAX); + // No carry. + let mut decimal = Decimal { + digits: VecDeque::from(vec![2, 4]), + radix: 0, + negative: false, + }; + decimal.shift_right(1, unlimit); + assert_eq!(decimal.digits, &[1, 2]); + assert_eq!(decimal.radix, 0); + + // Carry. Leading zeros are trimmed. + let mut decimal = Decimal { + digits: VecDeque::from(vec![1, 0, 0]), + radix: 1, + negative: false, + }; + decimal.shift_right(1, unlimit); + assert_eq!(decimal.digits, &[500_000_000, 0]); + assert_eq!(decimal.radix, 0); + + // Big shift right + // 1267650600228229401496703205376 >> 100 should logically result in 1 + let mut decimal = Decimal { + digits: VecDeque::from(vec![1_267, 650_600_228, 229_401_496, 703_205_376]), + radix: 3, + negative: false, + }; + decimal.shift_right(100, unlimit); + assert_eq!(decimal.digits, VecDeque::from(vec![1])); + assert_eq!(decimal.radix, 0); +} + +#[test] +fn test_shift_right_with_precision() { + let mut decimal = Decimal { + digits: VecDeque::from(vec![1]), + radix: 1, + negative: false, + }; + decimal.shift_right(10, DigitLimit::Total(1)); + assert_eq!(decimal.digits, &[976562]); + assert_eq!(decimal.radix, 0); + + decimal = Decimal { + digits: VecDeque::from(vec![10000000, 10000000, 0]), + radix: 3, + negative: false, + }; + decimal.shift_right(10, DigitLimit::Total(3)); + assert_eq!(decimal.digits, &[9765, 625009765, 625000000]); + assert_eq!(decimal.radix, 3); + + let mut decimal = Decimal { + digits: VecDeque::from(vec![1]), + radix: 1, + negative: false, + }; + decimal.shift_right(10, DigitLimit::Fractional(1)); + assert_eq!(decimal.digits, &[976562, 500000000]); + assert_eq!(decimal.radix, 0); + decimal.shift_right(20, DigitLimit::Fractional(1)); + assert_eq!(decimal.digits, &[931322574]); + assert_eq!(decimal.radix, -1); + + decimal = Decimal { + digits: VecDeque::from(vec![10000000, 10000000, 0]), + radix: 3, + negative: false, + }; + decimal.shift_right(10, DigitLimit::Total(3)); + assert_eq!(decimal.digits, &[9765, 625009765, 625000000]); + assert_eq!(decimal.radix, 3); +} + +#[test] +fn test_exponent() { + let decimal = Decimal { + digits: VecDeque::from(vec![123456789]), + radix: 2, + negative: false, + }; + assert_eq!(decimal.exponent(), 2 * (DIGIT_WIDTH as i32) + 8); + + let decimal = Decimal { + digits: VecDeque::from(vec![12345]), + radix: -1, + negative: false, + }; + assert_eq!(decimal.exponent(), -(DIGIT_WIDTH as i32) + 4); + + let decimal = Decimal { + digits: VecDeque::from(vec![123456789]), + radix: 0, + negative: false, + }; + assert_eq!(decimal.exponent(), 8); + + let decimal = Decimal { + digits: VecDeque::new(), + radix: 0, + negative: false, + }; + assert_eq!(decimal.exponent(), 0); +} diff --git a/printf/src/lib.rs b/printf/src/lib.rs new file mode 100644 index 000000000..19c26ec8f --- /dev/null +++ b/printf/src/lib.rs @@ -0,0 +1,84 @@ +/** Rust printf implementation, based on musl. */ +mod arg; +pub use arg::{Arg, ToArg}; + +mod fmt_fp; +mod printf_impl; +pub use printf_impl::{sprintf_locale, Error}; +pub mod locale; +pub use locale::{Locale, C_LOCALE, EN_US_LOCALE}; + +#[cfg(test)] +mod tests; + +#[macro_export] +macro_rules! sprintf { + // Variant which allows a string literal and returns a `Utf32String`. + ($fmt:literal, $($arg:expr),* $(,)?) => { + { + let mut target = widestring::Utf32String::new(); + $crate::sprintf!(=> &mut target, widestring::utf32str!($fmt), $($arg),*); + target + } + }; + + // Variant which allows a string literal and writes to a target. + // The target should implement std::fmt::Write. + ( + => $target:expr, // target string + $fmt:literal, // format string + $($arg:expr),* // arguments + $(,)? // optional trailing comma + ) => { + { + $crate::sprintf!(=> $target, widestring::utf32str!($fmt), $($arg),*); + } + }; + + // Variant which allows a `Utf32String` as a format, and writes to a target. + ( + => $target:expr, // target string + $fmt:expr, // format string as UTF32String + $($arg:expr),* // arguments + $(,)? // optional trailing comma + ) => { + { + // May be no args! + #[allow(unused_imports)] + use $crate::ToArg; + $crate::sprintf_c_locale( + $target, + $fmt.as_char_slice(), + &mut [$($arg.to_arg()),*], + ).unwrap() + } + }; + + // Variant which allows a `Utf32String` as a format, and returns a `Utf32String`. + ($fmt:expr, $($arg:expr),* $(,)?) => { + { + let mut target = widestring::Utf32String::new(); + $crate::sprintf!(=> &mut target, $fmt, $($arg),*); + target + } + }; +} + +/// Formats a string using the provided format specifiers and arguments, using the C locale, +/// and writes the output to the given `Write` implementation. +/// +/// # Parameters +/// - `f`: The receiver of formatted output. +/// - `fmt`: The format string being parsed. +/// - `locale`: The locale to use for number formatting. +/// - `args`: Iterator over the arguments to format. +/// +/// # Returns +/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`. +pub fn sprintf_c_locale( + f: &mut impl std::fmt::Write, + fmt: &[char], + args: &mut [Arg], +) -> Result { + sprintf_locale(f, fmt, &locale::C_LOCALE, args) +} diff --git a/printf/src/locale.rs b/printf/src/locale.rs new file mode 100644 index 000000000..cf8005332 --- /dev/null +++ b/printf/src/locale.rs @@ -0,0 +1,207 @@ +/// The numeric locale. Note this is a pure value type. +#[derive(Debug, Clone, Copy)] +pub struct Locale { + /// The decimal point. Only single-char decimal points are supported. + pub decimal_point: char, + + /// The thousands separator, or None if none. + /// Note some obscure locales like it_IT.ISO8859-15 seem to have a multi-char thousands separator! + /// We do not support that. + pub thousands_sep: Option, + + /// The grouping of digits. + /// This is to be read from left to right. + /// For example, the number 88888888888888 with a grouping of [2, 3, 4, 4] + /// would produce the string "8,8888,8888,888,88". + /// If 0, no grouping at all. + pub grouping: [u8; 4], + + /// If true, the group is repeated. + /// If false, there are no groups after the last. + pub group_repeat: bool, +} + +impl Locale { + /// Given a string containing only ASCII digits, return a new string with thousands separators applied. + /// This panics if the locale has no thousands separator; callers should only call this if there is a + /// thousands separator. + pub fn apply_grouping(&self, mut input: &str) -> String { + debug_assert!(input.bytes().all(|b| b.is_ascii_digit())); + let sep = self.thousands_sep.expect("no thousands separator"); + let mut result = String::with_capacity(input.len() + self.separator_count(input.len())); + while !input.is_empty() { + let group_size = self.next_group_size(input.len()); + let (group, rest) = input.split_at(group_size); + result.push_str(group); + if !rest.is_empty() { + result.push(sep); + } + input = rest; + } + result + } + + // Given a count of remaining digits, return the number of characters in the next group, from the left (most significant). + fn next_group_size(&self, digits_left: usize) -> usize { + let mut accum: usize = 0; + for group in self.grouping { + if digits_left <= accum + group as usize { + return digits_left - accum; + } + accum += group as usize; + } + // accum now contains the sum of all groups. + // Maybe repeat. + debug_assert!(digits_left >= accum); + let repeat_group = if self.group_repeat { + *self.grouping.last().unwrap() + } else { + 0 + }; + + if repeat_group == 0 { + // No further grouping. + digits_left - accum + } else { + // Divide remaining digits by repeat_group. + // Apply any remainder to the first group. + let res = (digits_left - accum) % (repeat_group as usize); + if res > 0 { + res + } else { + repeat_group as usize + } + } + } + + // Given a count of remaining digits, return the total number of separators. + pub fn separator_count(&self, digits_count: usize) -> usize { + if self.thousands_sep.is_none() { + return 0; + } + let mut sep_count = 0; + let mut accum = 0; + for group in self.grouping { + if digits_count <= accum + group as usize { + return sep_count; + } + if group > 0 { + sep_count += 1; + } + accum += group as usize; + } + debug_assert!(digits_count >= accum); + let repeat_group = if self.group_repeat { + *self.grouping.last().unwrap() + } else { + 0 + }; + // Divide remaining digits by repeat_group. + // -1 because it's "100,000" and not ",100,100". + if repeat_group > 0 && digits_count > accum { + sep_count += (digits_count - accum - 1) / repeat_group as usize; + } + sep_count + } +} + +/// The "C" numeric locale. +pub const C_LOCALE: Locale = Locale { + decimal_point: '.', + thousands_sep: None, + grouping: [0; 4], + group_repeat: false, +}; + +// en_us numeric locale, for testing. +#[allow(dead_code)] +pub const EN_US_LOCALE: Locale = Locale { + decimal_point: '.', + thousands_sep: Some(','), + grouping: [3, 3, 3, 3], + group_repeat: true, +}; + +#[test] +fn test_apply_grouping() { + let input = "123456789"; + let mut result: String; + + // en_US has commas. + assert_eq!(EN_US_LOCALE.thousands_sep, Some(',')); + result = EN_US_LOCALE.apply_grouping(input); + assert_eq!(result, "123,456,789"); + + // Test weird locales. + let input: &str = "1234567890123456"; + let mut locale: Locale = C_LOCALE; + locale.thousands_sep = Some('!'); + + locale.grouping = [5, 3, 1, 0]; + locale.group_repeat = false; + result = locale.apply_grouping(input); + assert_eq!(result, "1234567!8!901!23456"); + + // group_repeat doesn't matter because trailing group is 0 + locale.grouping = [5, 3, 1, 0]; + locale.group_repeat = true; + result = locale.apply_grouping(input); + assert_eq!(result, "1234567!8!901!23456"); + + locale.grouping = [5, 3, 1, 2]; + locale.group_repeat = false; + result = locale.apply_grouping(input); + assert_eq!(result, "12345!67!8!901!23456"); + + locale.grouping = [5, 3, 1, 2]; + locale.group_repeat = true; + result = locale.apply_grouping(input); + assert_eq!(result, "1!23!45!67!8!901!23456"); +} + +#[test] +#[should_panic] +fn test_thousands_grouping_length_panics_if_no_sep() { + // We should panic if we try to group with no thousands separator. + assert_eq!(C_LOCALE.thousands_sep, None); + C_LOCALE.apply_grouping("123"); +} + +#[test] +fn test_thousands_grouping_length() { + fn validate_grouping_length_hint(locale: Locale, mut input: &str) { + loop { + let expected = locale.separator_count(input.len()) + input.len(); + let actual = locale.apply_grouping(input).len(); + assert_eq!(expected, actual); + if input.is_empty() { + break; + } + input = &input[1..]; + } + } + + validate_grouping_length_hint(EN_US_LOCALE, "123456789"); + + // Test weird locales. + let input = "1234567890123456"; + let mut locale: Locale = C_LOCALE; + locale.thousands_sep = Some('!'); + + locale.grouping = [5, 3, 1, 0]; + locale.group_repeat = false; + validate_grouping_length_hint(locale, input); + + // group_repeat doesn't matter because trailing group is 0 + locale.grouping = [5, 3, 1, 0]; + locale.group_repeat = true; + validate_grouping_length_hint(locale, input); + + locale.grouping = [5, 3, 1, 2]; + locale.group_repeat = false; + validate_grouping_length_hint(locale, input); + + locale.grouping = [5, 3, 1, 2]; + locale.group_repeat = true; + validate_grouping_length_hint(locale, input); +} diff --git a/printf/src/printf_impl.rs b/printf/src/printf_impl.rs new file mode 100644 index 000000000..b658072d7 --- /dev/null +++ b/printf/src/printf_impl.rs @@ -0,0 +1,530 @@ +/** Rust printf implementation, based on musl. */ +use super::arg::Arg; +use super::fmt_fp::format_float; +use super::locale::Locale; +use std::fmt::{self, Write}; +use std::mem; +use std::ops::{AddAssign, Index}; +use std::result::Result; + +/// Possible errors from printf. +#[derive(Debug, PartialEq, Eq)] +pub enum Error { + /// Invalid format string. + BadFormatString, + /// Too few arguments. + MissingArg, + /// Too many arguments. + ExtraArg, + /// Argument type doesn't match format specifier. + BadArgType, + /// Precision is too large to represent. + Overflow, + /// Error emitted by the output stream. + Fmt(fmt::Error), +} + +// Convenience conversion from fmt::Error. +impl From for Error { + fn from(err: fmt::Error) -> Error { + Error::Fmt(err) + } +} + +#[derive(Debug, Copy, Clone, Default)] +pub(super) struct ModifierFlags { + pub alt_form: bool, // # + pub zero_pad: bool, // 0 + pub left_adj: bool, // negative field width + pub pad_pos: bool, // space: blank before positive numbers + pub mark_pos: bool, // +: sign before positive numbers + pub grouped: bool, // ': group indicator +} + +impl ModifierFlags { + // If c is a modifier character, set the flag and return true. + // Otherwise return false. Note we allow repeated modifier flags. + fn try_set(&mut self, c: char) -> bool { + match c { + '#' => self.alt_form = true, + '0' => self.zero_pad = true, + '-' => self.left_adj = true, + ' ' => self.pad_pos = true, + '+' => self.mark_pos = true, + '\'' => self.grouped = true, + _ => return false, + }; + true + } +} + +// The set of prefixes of conversion specifiers. +// Note that we mostly ignore prefixes - we take sizes of values from the arguments themselves. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[allow(non_camel_case_types)] +enum ConversionPrefix { + Empty, + hh, + h, + l, + ll, + j, + t, + z, + L, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[allow(non_camel_case_types)] +#[rustfmt::skip] +pub(super) enum ConversionSpec { + // Integers, with prefixes "hh", "h", "l", "ll", "j", "t", "z" + // Note that we treat '%i' as '%d'. + d, o, u, x, X, + + // USizeRef receiver, with same prefixes as ints + n, + + // Float, with prefixes "l" and "L" + a, A, e, E, f, F, g, G, + + // Pointer, no prefixes + p, + + // Character or String, with supported prefixes "l" + // Note that we treat '%C' as '%c' and '%S' as '%s'. + c, s, +} + +impl ConversionSpec { + // Returns true if the given prefix is supported by this conversion specifier. + fn supports_prefix(self, prefix: ConversionPrefix) -> bool { + use ConversionPrefix::*; + use ConversionSpec::*; + if matches!(prefix, Empty) { + // No prefix is always supported. + return true; + } + match self { + d | o | u | x | X | n => matches!(prefix, hh | h | l | ll | j | t | z), + a | A | e | E | f | F | g | G => matches!(prefix, l | L), + p => false, + c | s => matches!(prefix, l), + } + } + + // Returns true if the conversion specifier is lowercase, + // which affects certain rendering. + #[inline] + pub(super) fn is_lower(self) -> bool { + use ConversionSpec::*; + match self { + d | o | u | x | n | a | e | f | g | p | c | s => true, + X | A | E | F | G => false, + } + } + + // Returns a ConversionSpec from a character, or None if none. + fn from_char(cc: char) -> Option { + use ConversionSpec::*; + let res = match cc { + 'd' | 'i' => d, + 'o' => o, + 'u' => u, + 'x' => x, + 'X' => X, + 'n' => n, + 'a' => a, + 'A' => A, + 'e' => e, + 'E' => E, + 'f' => f, + 'F' => F, + 'g' => g, + 'G' => G, + 'p' => p, + 'c' | 'C' => c, + 's' | 'S' => s, + _ => return None, + }; + Some(res) + } +} + +// A helper type that holds a format string slice and points into it. +// As a convenience, this returns '\0' for one-past-the-end. +#[derive(Debug)] +struct FormatString<'a>(&'a [char]); + +impl<'a> FormatString<'a> { + // Return the underlying slice. + fn as_slice(&self) -> &'a [char] { + self.0 + } + + // Return true if we are empty. + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + // Read an int from our cursor, stopping at the first non-digit. + // Negative values are not supported. + // If there are no digits, return 0. + // Adjust the cursor to point to the char after the int. + fn get_int(&mut self) -> Result { + use Error::Overflow; + let mut i: usize = 0; + while let Some(digit) = self[0].to_digit(10) { + i = i.checked_mul(10).ok_or(Overflow)?; + i = i.checked_add(digit as usize).ok_or(Overflow)?; + *self += 1; + } + Ok(i) + } + + // Read a conversion prefix from our cursor, advancing it. + fn get_prefix(&mut self) -> ConversionPrefix { + use ConversionPrefix as CP; + let prefix = match self[0] { + 'h' if self[1] == 'h' => CP::hh, + 'h' => CP::h, + 'l' if self[1] == 'l' => CP::ll, + 'l' => CP::l, + 'j' => CP::j, + 't' => CP::t, + 'z' => CP::z, + 'L' => CP::L, + _ => CP::Empty, + }; + *self += match prefix { + CP::Empty => 0, + CP::hh | CP::ll => 2, + _ => 1, + }; + prefix + } + + // Read an (optionally prefixed) format specifier, such as d, Lf, etc. + // Adjust the cursor to point to the char after the specifier. + fn get_specifier(&mut self) -> Result { + let prefix = self.get_prefix(); + // Awkwardly placed hack to disallow %lC and %lS, since we otherwise treat + // them as the same. + if prefix != ConversionPrefix::Empty && matches!(self[0], 'C' | 'S') { + return Err(Error::BadFormatString); + } + let spec = ConversionSpec::from_char(self[0]).ok_or(Error::BadFormatString)?; + if !spec.supports_prefix(prefix) { + return Err(Error::BadFormatString); + } + *self += 1; + Ok(spec) + } + + // Read a sequence of characters to be output literally, advancing the cursor. + // This handles a tail of %%. + fn get_lit(&mut self) -> &'a [char] { + let s = self.0; + let non_percents = s.iter().take_while(|&&c| c != '%').count(); + // Take only an even number of percents. + let percent_pairs: usize = s[non_percents..].iter().take_while(|&&c| c == '%').count() / 2; + *self += non_percents + percent_pairs * 2; + &s[..non_percents + percent_pairs] + } +} + +// Advance this format string by a number of chars. +impl AddAssign for FormatString<'_> { + fn add_assign(&mut self, rhs: usize) { + self.0 = &self.0[rhs..]; + } +} + +// Index into FormatString, returning \0 for one-past-the-end. +impl Index for FormatString<'_> { + type Output = char; + + fn index(&self, idx: usize) -> &char { + let s = self.as_slice(); + if idx == s.len() { + &'\0' + } else { + &s[idx] + } + } +} + +// Pad output by emitting `c` until `min_width` is reached. +pub(super) fn pad( + f: &mut impl Write, + c: char, + min_width: usize, + current_width: usize, +) -> fmt::Result { + assert!(c == '0' || c == ' '); + if current_width >= min_width { + return Ok(()); + } + const ZEROS: &str = "0000000000000000"; + const SPACES: &str = " "; + let buff = if c == '0' { ZEROS } else { SPACES }; + let mut remaining = min_width - current_width; + while remaining > 0 { + let n = remaining.min(buff.len()); + f.write_str(&buff[..n])?; + remaining -= n; + } + Ok(()) +} + +/// Formats a string using the provided format specifiers, arguments, and locale, +/// and writes the output to the given `Write` implementation. +/// +/// # Parameters +/// - `f`: The receiver of formatted output. +/// - `fmt`: The format string being parsed. +/// - `locale`: The locale to use for number formatting. +/// - `args`: Iterator over the arguments to format. +/// +/// # Returns +/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`. +pub fn sprintf_locale( + f: &mut impl Write, + fmt: &[char], + locale: &Locale, + args: &mut [Arg], +) -> Result { + use ConversionSpec as CS; + let mut s = FormatString(fmt); + let mut args = args.iter_mut(); + let mut out_len: usize = 0; + + // Shared storage for the output of the conversion specifier. + let buf = &mut String::new(); + 'main: while !s.is_empty() { + buf.clear(); + + // Handle literal text and %% format specifiers. + let lit = s.get_lit(); + if !lit.is_empty() { + buf.extend(lit.iter()); + f.write_str(buf)?; + out_len = out_len.checked_add(lit.len()).ok_or(Error::Overflow)?; + continue 'main; + } + + // Consume the % at the start of the format specifier. + debug_assert!(s[0] == '%'); + s += 1; + + // Read modifier flags. '-' and '0' flags are mutually exclusive. + let mut flags = ModifierFlags::default(); + while flags.try_set(s[0]) { + s += 1; + } + if flags.left_adj { + flags.zero_pad = false; + } + + // Read field width. We do not support $. + let width = if s[0] == '*' { + let arg_width = args.next().ok_or(Error::MissingArg)?.as_sint()?; + s += 1; + if arg_width < 0 { + flags.left_adj = true; + } + arg_width + .unsigned_abs() + .try_into() + .map_err(|_| Error::Overflow)? + } else { + s.get_int()? + }; + + // Optionally read precision. We do not support $. + let mut prec: Option = if s[0] == '.' && s[1] == '*' { + // "A negative precision is treated as though it were missing." + // Here we assume the precision is always signed. + s += 2; + let p = args.next().ok_or(Error::MissingArg)?.as_sint()?; + p.try_into().ok() + } else if s[0] == '.' { + s += 1; + Some(s.get_int()?) + } else { + None + }; + // Disallow precisions larger than i32::MAX, in keeping with C. + if prec.unwrap_or(0) > i32::MAX as usize { + return Err(Error::Overflow); + } + + // Read out the format specifier and arg. + let conv_spec = s.get_specifier()?; + let arg = args.next().ok_or(Error::MissingArg)?; + let mut prefix = ""; + + // Thousands grouping only works for d,u,i,f,F. + // 'i' is mapped to 'd'. + if flags.grouped && !matches!(conv_spec, CS::d | CS::u | CS::f | CS::F) { + return Err(Error::BadFormatString); + } + + // Disable zero-pad if we have an explicit precision. + // "If a precision is given with a numeric conversion (d, i, o, u, i, x, and X), + // the 0 flag is ignored." p is included here. + let spec_is_numeric = matches!(conv_spec, CS::d | CS::u | CS::o | CS::p | CS::x | CS::X); + if spec_is_numeric && prec.is_some() { + flags.zero_pad = false; + } + + // Apply the formatting. Some cases continue the main loop. + // Note that numeric conversions must leave 'body' empty if the value is 0. + let body: &str = match conv_spec { + CS::n => { + arg.set_count(out_len)?; + continue 'main; + } + CS::e | CS::f | CS::g | CS::a | CS::E | CS::F | CS::G | CS::A => { + // Floating point types handle output on their own. + let float = arg.as_float()?; + let len = format_float(f, float, width, prec, flags, locale, conv_spec, buf)?; + out_len = out_len.checked_add(len).ok_or(Error::Overflow)?; + continue 'main; + } + CS::p => { + const PTR_HEX_DIGITS: usize = 2 * mem::size_of::<*const u8>(); + prec = prec.map(|p| p.max(PTR_HEX_DIGITS)); + let uint = arg.as_uint()?; + if uint != 0 { + prefix = "0x"; + write!(buf, "{:x}", uint)?; + } + buf + } + CS::x | CS::X => { + // If someone passes us a negative value, format it with the width + // we were given. + let lower = conv_spec.is_lower(); + let (_, uint) = arg.as_wrapping_sint()?; + if uint != 0 { + if flags.alt_form { + prefix = if lower { "0x" } else { "0X" }; + } + if lower { + write!(buf, "{:x}", uint)?; + } else { + write!(buf, "{:X}", uint)?; + } + } + buf + } + CS::o => { + let uint = arg.as_uint()?; + if uint != 0 { + write!(buf, "{:o}", uint)?; + } + if flags.alt_form && prec.unwrap_or(0) <= buf.len() + 1 { + prec = Some(buf.len() + 1); + } + buf + } + CS::u => { + let uint = arg.as_uint()?; + if uint != 0 { + write!(buf, "{}", uint)?; + } + buf + } + CS::d => { + let arg_i = arg.as_sint()?; + if arg_i < 0 { + prefix = "-"; + } else if flags.mark_pos { + prefix = "+"; + } else if flags.pad_pos { + prefix = " "; + } + if arg_i != 0 { + write!(buf, "{}", arg_i.unsigned_abs())?; + } + buf + } + CS::c => { + // also 'C' + flags.zero_pad = false; + buf.push(arg.as_char()?); + buf + } + CS::s => { + // also 'S' + let s = arg.as_str(buf)?; + let p = prec.unwrap_or(s.len()).min(s.len()); + prec = Some(p); + flags.zero_pad = false; + &s[..p] + } + }; + // Numeric output should be empty iff the value is 0. + if spec_is_numeric && body.is_empty() { + debug_assert!(arg.as_uint().unwrap() == 0); + } + + // Decide if we want to apply thousands grouping to the body, and compute its size. + // Note we have already errored out if grouped is set and this is non-numeric. + let wants_grouping = flags.grouped && locale.thousands_sep.is_some(); + let body_len = match wants_grouping { + true => body.len() + locale.separator_count(body.len()), + false => body.len(), + }; + + // Resolve the precision. + // In the case of a non-numeric conversion, update the precision to at least the + // length of the string. + let prec = if !spec_is_numeric { + prec.unwrap_or(body_len) + } else { + prec.unwrap_or(1).max(body_len) + }; + + let prefix_len = prefix.len(); + let unpadded_width = prefix_len.checked_add(prec).ok_or(Error::Overflow)?; + let width = width.max(unpadded_width); + + // Pad on the left with spaces to the desired width? + if !flags.left_adj && !flags.zero_pad { + pad(f, ' ', width, unpadded_width)?; + } + + // Output any prefix. + f.write_str(prefix)?; + + // Pad after the prefix with zeros to the desired width? + if !flags.left_adj && flags.zero_pad { + pad(f, '0', width, unpadded_width)?; + } + + // Pad on the left to the given precision? + pad(f, '0', prec, body_len)?; + + // Output the actual value, perhaps with grouping. + if wants_grouping { + f.write_str(&locale.apply_grouping(body))?; + } else { + f.write_str(body)?; + } + + // Pad on the right with spaces if we are left adjusted? + if flags.left_adj { + pad(f, ' ', width, unpadded_width)?; + } + + out_len = out_len.checked_add(width).ok_or(Error::Overflow)?; + } + + // Too many args? + if args.next().is_some() { + return Err(Error::ExtraArg); + } + Ok(out_len) +} diff --git a/printf/src/tests.rs b/printf/src/tests.rs new file mode 100644 index 000000000..9f1dc8f01 --- /dev/null +++ b/printf/src/tests.rs @@ -0,0 +1,845 @@ +use crate::arg::ToArg; +use crate::locale::{Locale, C_LOCALE, EN_US_LOCALE}; +use crate::{sprintf_locale, Error}; +use std::f64::consts::{E, PI, TAU}; +use std::fmt; +use widestring::{utf32str, Utf32Str}; + +// sprintf, checking length +macro_rules! sprintf_check { + ( + $fmt:expr, // format string + $($arg:expr),* // arguments + $(,)? // optional trailing comma + ) => { + { + let mut target = String::new(); + let chars: Vec = $fmt.chars().collect(); + let len = $crate::sprintf_c_locale( + &mut target, + &chars, + &mut [$($arg.to_arg()),*] + ).expect("printf failed"); + assert!(len == target.len(), "Wrong length returned: {} vs {}", len, target.len()); + target + } + }; +} + +macro_rules! assert_fmt { + ($fmt:expr $(, $arg:expr)* => $expected:expr) => { + assert_eq!(sprintf_check!($fmt, $($arg),*), $expected) + }; +} + +macro_rules! assert_fmt1 { + ($fmt:expr, $arg:expr, $expected:expr) => { + assert_fmt!($fmt, $arg => $expected) + }; +} + +// sprintf, except we expect to return an error. +macro_rules! sprintf_err { + ($fmt:expr, $($arg:expr),* => $expected:expr) => { + { + let chars: Vec = $fmt.chars().collect(); + let err = $crate::sprintf_c_locale( + &mut NullOutput, + &chars, + &mut [$($arg.to_arg()),*], + ).unwrap_err(); + assert_eq!(err, $expected, "Wrong error returned: {:?}", err); + } + }; +} + +// sprintf, except we throw away the output and return only the count. +macro_rules! sprintf_count { + ($fmt:expr $(, $arg:expr)*) => { + { + let chars: Vec = $fmt.chars().collect(); + $crate::sprintf_c_locale( + &mut NullOutput, + &chars, + &mut [$($arg.to_arg()),*], + ).expect("printf failed") + } + }; +} + +// Null writer which ignores all input. +struct NullOutput; +impl fmt::Write for NullOutput { + fn write_str(&mut self, _s: &str) -> fmt::Result { + Ok(()) + } +} + +#[test] +fn smoke() { + assert_fmt!("Hello, %s!", "world" => "Hello, world!"); + assert_fmt!("Hello, %ls!", "world" => "Hello, world!"); + assert_fmt!("Hello, world! %d %%%%", 3 => "Hello, world! 3 %%"); + assert_fmt!("" => ""); +} + +#[test] +fn test1() { + // A convenient place to isolate a single test, e.g. cargo test -- test1 + assert_fmt!("%.0e", 0 => "0e+00"); +} + +#[test] +fn test_n() { + // Test that the %n specifier correctly stores the number of characters written. + let mut count: usize = 0; + assert_fmt!("%d%n", 123, &mut count => "123"); + assert_eq!(count, 3); + + assert_fmt!("%256d%%%n", 123, &mut count => format!("{:>256}%", 123)); + assert_eq!(count, 257); + + assert_fmt!("%d %s%n", 123, "hello", &mut count => "123 hello"); + assert_eq!(count, 3 + 1 + 5); + + assert_fmt!("%%%n", &mut count => "%"); + assert_eq!(count, 1); +} + +#[test] +fn test_plain() { + assert_fmt!("abc" => "abc"); + assert_fmt!("" => ""); + assert_fmt!("%%" => "%"); + assert_fmt!("%% def" => "% def"); + assert_fmt!("abc %%" => "abc %"); + assert_fmt!("abc %% def" => "abc % def"); + assert_fmt!("abc %%%% def" => "abc %% def"); + assert_fmt!("%%%%%%" => "%%%"); +} + +#[test] +fn test_str() { + assert_fmt!("hello %s", "world" => "hello world"); + assert_fmt!("hello %%%s", "world" => "hello %world"); + assert_fmt!("%10s", "world" => " world"); + assert_fmt!("%.4s", "world" => "worl"); + assert_fmt!("%10.4s", "world" => " worl"); + assert_fmt!("%-10.4s", "world" => "worl "); + assert_fmt!("%-10s", "world" => "world "); + + assert_fmt!("test %% with string: %s yay\n", "FOO" => "test % with string: FOO yay\n"); + assert_fmt!("test char %c", '~' => "test char ~"); + + assert_fmt!("%.0s", "test" => ""); + assert_fmt!("%.1s", "test" => "t"); + assert_fmt!("%.3s", "test" => "tes"); + assert_fmt!("%5.3s", "test" => " tes"); + assert_fmt!("%.4s", "test" => "test"); + assert_fmt!("%.100s", "test" => "test"); +} + +#[test] +fn test_int() { + assert_fmt!("% 0*i", 23125, 17 => format!(" {:023124}", 17)); + assert_fmt!("% 010i", 23125 => " 000023125"); + assert_fmt!("% 10i", 23125 => " 23125"); + assert_fmt!("% 5i", 23125 => " 23125"); + assert_fmt!("% 4i", 23125 => " 23125"); + assert_fmt!("%- 010i", 23125 => " 23125 "); + assert_fmt!("%- 10i", 23125 => " 23125 "); + assert_fmt!("%- 5i", 23125 => " 23125"); + assert_fmt!("%- 4i", 23125 => " 23125"); + assert_fmt!("%+ 010i", 23125 => "+000023125"); + assert_fmt!("%+ 10i", 23125 => " +23125"); + assert_fmt!("%+ 5i", 23125 => "+23125"); + assert_fmt!("%+ 4i", 23125 => "+23125"); + assert_fmt!("%-010i", 23125 => "23125 "); + assert_fmt!("%-10i", 23125 => "23125 "); + assert_fmt!("%-5i", 23125 => "23125"); + assert_fmt!("%-4i", 23125 => "23125"); + + assert_fmt!("%d", 12 => "12"); + assert_fmt!("%d", -123 => "-123"); + assert_fmt!("~%d~", 148 => "~148~"); + assert_fmt!("00%dxx", -91232 => "00-91232xx"); + assert_fmt!("%x", -9232 => "ffffdbf0"); + assert_fmt!("%X", 432 => "1B0"); + assert_fmt!("%09X", 432 => "0000001B0"); + assert_fmt!("%9X", 432 => " 1B0"); + assert_fmt!("%+9X", 492 => " 1EC"); + assert_fmt!("% #9x", 4589 => " 0x11ed"); + assert_fmt!("%2o", 4 => " 4"); + assert_fmt!("% 12d", -4 => " -4"); + assert_fmt!("% 12d", 48 => " 48"); + assert_fmt!("%ld", -4_i64 => "-4"); + assert_fmt!("%lld", -4_i64 => "-4"); + assert_fmt!("%lX", -4_i64 => "FFFFFFFFFFFFFFFC"); + assert_fmt!("%ld", 48_i64 => "48"); + assert_fmt!("%lld", 48_i64 => "48"); + assert_fmt!("%-8hd", -12_i16 => "-12 "); + + assert_fmt!("%u", 12 => "12"); + assert_fmt!("~%u~", 148 => "~148~"); + assert_fmt!("00%uxx", 91232 => "0091232xx"); + assert_fmt!("%x", 9232 => "2410"); + assert_fmt!("%9X", 492 => " 1EC"); + assert_fmt!("% 12u", 4 => " 4"); + assert_fmt!("% 12u", 48 => " 48"); + assert_fmt!("%lu", 4_u64 => "4"); + assert_fmt!("%llu", 4_u64 => "4"); + assert_fmt!("%lX", 4_u64 => "4"); + assert_fmt!("%lu", 48_u64 => "48"); + assert_fmt!("%llu", 48_u64 => "48"); + assert_fmt!("%-8hu", 12_u16 => "12 "); + + // Gross combinations of padding and precision. + assert_fmt!("%30d", 1234565678 => " 1234565678"); + assert_fmt!("%030d", 1234565678 => "000000000000000000001234565678"); + assert_fmt!("%30.20d", 1234565678 => " 00000000001234565678"); + // Here we specify both a precision and request zero-padding. + // "If a precision is given with a numeric conversion (d, i, o, u, x, and X), the 0 flag is ignored." + assert_fmt!("%030.20d", 1234565678 => " 00000000001234565678"); + assert_fmt!("%030.0d", 1234565678 => " 1234565678"); + + // width, precision, alignment + assert_fmt1!("%04d", 12, "0012"); + assert_fmt1!("%.3d", 12, "012"); + assert_fmt1!("%3d", 12, " 12"); + assert_fmt1!("%-3d", 12, "12 "); + assert_fmt1!("%+3d", 12, "+12"); + assert_fmt1!("%+-5d", 12, "+12 "); + assert_fmt1!("%+- 5d", 12, "+12 "); + assert_fmt1!("%- 5d", 12, " 12 "); + assert_fmt1!("% d", 12, " 12"); + assert_fmt1!("%0-5d", 12, "12 "); + assert_fmt1!("%-05d", 12, "12 "); + + // ...explicit precision of 0 shall be no characters except for alt-octal. + assert_fmt1!("%.0d", 0, ""); + assert_fmt1!("%.0o", 0, ""); + assert_fmt1!("%#.0d", 0, ""); + assert_fmt1!("%#.0o", 0, "0"); + assert_fmt1!("%#.0x", 0, ""); + + // ...but it still has to honor width and flags. + assert_fmt1!("%2.0u", 0, " "); + assert_fmt1!("%02.0u", 0, " "); + assert_fmt1!("%2.0d", 0, " "); + assert_fmt1!("%02.0d", 0, " "); + assert_fmt1!("% .0d", 0, " "); + assert_fmt1!("%+.0d", 0, "+"); +} + +#[test] +fn test_octal() { + assert_fmt!("% 010o", 23125 => "0000055125"); + assert_fmt!("% 10o", 23125 => " 55125"); + assert_fmt!("% 5o", 23125 => "55125"); + assert_fmt!("% 4o", 23125 => "55125"); + assert_fmt!("%- 010o", 23125 => "55125 "); + assert_fmt!("%- 10o", 23125 => "55125 "); + assert_fmt!("%- 5o", 23125 => "55125"); + assert_fmt!("%- 4o", 23125 => "55125"); + assert_fmt!("%+ 010o", 23125 => "0000055125"); + assert_fmt!("%+ 10o", 23125 => " 55125"); + assert_fmt!("%+ 5o", 23125 => "55125"); + assert_fmt!("%+ 4o", 23125 => "55125"); + assert_fmt!("%-010o", 23125 => "55125 "); + assert_fmt!("%-10o", 23125 => "55125 "); + assert_fmt!("%-5o", 23125 => "55125"); + assert_fmt!("%-4o", 23125 => "55125"); + assert_fmt1!("%o", 15, "17"); + assert_fmt1!("%#o", 15, "017"); + assert_fmt1!("%#o", 0, "0"); + assert_fmt1!("%#.0o", 0, "0"); + assert_fmt1!("%#.1o", 0, "0"); + assert_fmt1!("%#o", 1, "01"); + assert_fmt1!("%#.0o", 1, "01"); + assert_fmt1!("%#.1o", 1, "01"); + assert_fmt1!("%#04o", 1, "0001"); + assert_fmt1!("%#04.0o", 1, " 01"); + assert_fmt1!("%#04.1o", 1, " 01"); + assert_fmt1!("%04o", 1, "0001"); + assert_fmt1!("%04.0o", 1, " 1"); + assert_fmt1!("%04.1o", 1, " 1"); +} + +#[test] +fn test_hex() { + assert_fmt!("% 010x", 23125 => "0000005a55"); + assert_fmt!("% 10x", 23125 => " 5a55"); + assert_fmt!("% 5x", 23125 => " 5a55"); + assert_fmt!("% 4x", 23125 => "5a55"); + assert_fmt!("%- 010x", 23125 => "5a55 "); + assert_fmt!("%- 10x", 23125 => "5a55 "); + assert_fmt!("%- 5x", 23125 => "5a55 "); + assert_fmt!("%- 4x", 23125 => "5a55"); + assert_fmt!("%+ 010x", 23125 => "0000005a55"); + assert_fmt!("%+ 10x", 23125 => " 5a55"); + assert_fmt!("%+ 5x", 23125 => " 5a55"); + assert_fmt!("%+ 4x", 23125 => "5a55"); + assert_fmt!("%-010x", 23125 => "5a55 "); + assert_fmt!("%-10x", 23125 => "5a55 "); + assert_fmt!("%-5x", 23125 => "5a55 "); + assert_fmt!("%-4x", 23125 => "5a55"); + + assert_fmt!("%# 010x", 23125 => "0x00005a55"); + assert_fmt!("%# 10x", 23125 => " 0x5a55"); + assert_fmt!("%# 5x", 23125 => "0x5a55"); + assert_fmt!("%# 4x", 23125 => "0x5a55"); + assert_fmt!("%#- 010x", 23125 => "0x5a55 "); + assert_fmt!("%#- 10x", 23125 => "0x5a55 "); + assert_fmt!("%#- 5x", 23125 => "0x5a55"); + assert_fmt!("%#- 4x", 23125 => "0x5a55"); + assert_fmt!("%#+ 010x", 23125 => "0x00005a55"); + assert_fmt!("%#+ 10x", 23125 => " 0x5a55"); + assert_fmt!("%#+ 5x", 23125 => "0x5a55"); + assert_fmt!("%#+ 4x", 23125 => "0x5a55"); + assert_fmt!("%#-010x", 23125 => "0x5a55 "); + assert_fmt!("%#-10x", 23125 => "0x5a55 "); + assert_fmt!("%#-5x", 23125 => "0x5a55"); + assert_fmt!("%#-4x", 23125 => "0x5a55"); + + assert_fmt!("% 010X", 23125 => "0000005A55"); + assert_fmt!("% 10X", 23125 => " 5A55"); + assert_fmt!("% 5X", 23125 => " 5A55"); + assert_fmt!("% 4X", 23125 => "5A55"); + assert_fmt!("%- 010X", 23125 => "5A55 "); + assert_fmt!("%- 10X", 23125 => "5A55 "); + assert_fmt!("%- 5X", 23125 => "5A55 "); + assert_fmt!("%- 4X", 23125 => "5A55"); + assert_fmt!("%+ 010X", 23125 => "0000005A55"); + assert_fmt!("%+ 10X", 23125 => " 5A55"); + assert_fmt!("%+ 5X", 23125 => " 5A55"); + assert_fmt!("%+ 4X", 23125 => "5A55"); + assert_fmt!("%-010X", 23125 => "5A55 "); + assert_fmt!("%-10X", 23125 => "5A55 "); + assert_fmt!("%-5X", 23125 => "5A55 "); + assert_fmt!("%-4X", 23125 => "5A55"); + + assert_fmt!("%#x", 234834 => "0x39552"); + assert_fmt!("%#X", 234834 => "0X39552"); + assert_fmt!("%#.10o", 54834 => "0000153062"); + + assert_fmt1!("%x", 63, "3f"); + assert_fmt1!("%#x", 63, "0x3f"); + assert_fmt1!("%X", 63, "3F"); +} + +#[test] +fn test_char() { + assert_fmt!("%c", 'a' => "a"); + assert_fmt!("%10c", 'a' => " a"); + assert_fmt!("%-10c", 'a' => "a "); +} + +#[test] +fn test_ptr() { + assert_fmt!("%p", core::ptr::null::() => "0"); + assert_fmt!("%p", 0xDEADBEEF_usize as *const u8 => "0xdeadbeef"); +} + +#[test] +fn test_float() { + // Basic form, handling of exponent/precision for 0 + assert_fmt1!("%a", 0.0, "0x0p+0"); + assert_fmt1!("%e", 0.0, "0.000000e+00"); + assert_fmt1!("%f", 0.0, "0.000000"); + assert_fmt1!("%g", 0.0, "0"); + assert_fmt1!("%#g", 0.0, "0.00000"); + assert_fmt1!("%la", 0.0, "0x0p+0"); + assert_fmt1!("%le", 0.0, "0.000000e+00"); + assert_fmt1!("%lf", 0.0, "0.000000"); + assert_fmt1!("%lg", 0.0, "0"); + assert_fmt1!("%#lg", 0.0, "0.00000"); + + // rounding + assert_fmt1!("%f", 1.1, "1.100000"); + assert_fmt1!("%f", 1.2, "1.200000"); + assert_fmt1!("%f", 1.3, "1.300000"); + assert_fmt1!("%f", 1.4, "1.400000"); + assert_fmt1!("%f", 1.5, "1.500000"); + assert_fmt1!("%.4f", 1.06125, "1.0613"); /* input is not representible exactly as double */ + assert_fmt1!("%.4f", 1.03125, "1.0312"); /* 0x1.08p0 */ + assert_fmt1!("%.2f", 1.375, "1.38"); + assert_fmt1!("%.1f", 1.375, "1.4"); + assert_fmt1!("%.1lf", 1.375, "1.4"); + assert_fmt1!("%.15f", 1.1, "1.100000000000000"); + assert_fmt1!("%.16f", 1.1, "1.1000000000000001"); + assert_fmt1!("%.17f", 1.1, "1.10000000000000009"); + assert_fmt1!("%.2e", 1500001.0, "1.50e+06"); + assert_fmt1!("%.2e", 1505000.0, "1.50e+06"); + assert_fmt1!("%.2e", 1505000.0000009537, "1.51e+06"); + assert_fmt1!("%.2e", 1505001.0, "1.51e+06"); + assert_fmt1!("%.2e", 1506000.0, "1.51e+06"); + + // pi in double precision, printed to a few extra places + assert_fmt1!("%.15f", PI, "3.141592653589793"); + assert_fmt1!("%.18f", PI, "3.141592653589793116"); + + // exact conversion of large integers + assert_fmt1!( + "%.0f", + 340282366920938463463374607431768211456.0, + "340282366920938463463374607431768211456" + ); + + let tiny = f64::exp2(-1021.0); + assert_fmt1!("%.1022f", tiny, format!("{:.1022}", tiny)); + + let tiny = f64::exp2(-1022.0); + assert_fmt1!("%.1022f", tiny, format!("{:.1022}", tiny)); + + assert_fmt1!("%.12g", 1000000000005.0, "1e+12"); + assert_fmt1!("%.12g", 100000000002500.0, "1.00000000002e+14"); + + assert_fmt1!("%.50g", 100000000000000.5, "100000000000000.5"); + assert_fmt1!("%.50g", 987654321098765.0, "987654321098765"); + assert_fmt1!("%.1f", 123123123123123.0, "123123123123123.0"); + assert_fmt1!("%g", 999999999.0, "1e+09"); + assert_fmt1!("%.3e", 999999999.75, "1.000e+09"); + + assert_fmt!("%f", 1234f64 => "1234.000000"); + assert_fmt!("%.5f", 1234f64 => "1234.00000"); + assert_fmt!("%.*f", 6, 1234.56f64 => "1234.560000"); + assert_fmt!("%f", -46.38 => "-46.380000"); + assert_fmt!("%012.3f", 1.2 => "00000001.200"); + assert_fmt!("%012.3e", 1.7 => "0001.700e+00"); + assert_fmt!("%e", 1e300 => "1.000000e+300"); + assert_fmt!("%012.3g%%!", 2.6 => "0000000002.6%!"); + assert_fmt!("%012.5G", -2.69 => "-00000002.69"); + assert_fmt!("%+7.4f", 42.785 => "+42.7850"); + assert_fmt!("{}% 7.4E", 493.12 => "{} 4.9312E+02"); + assert_fmt!("% 7.4E", -120.3 => "-1.2030E+02"); + assert_fmt!("%-10F", f64::INFINITY => "INF "); + assert_fmt!("%+010F", f64::INFINITY => " +INF"); + assert_fmt!("% f", f64::NAN => " nan"); + assert_fmt!("%+f", f64::NAN => "+nan"); + assert_fmt!("%.1f", 999.99 => "1000.0"); + assert_fmt!("%.1f", 9.99 => "10.0"); + assert_fmt!("%.1e", 9.99 => "1.0e+01"); + assert_fmt!("%.2f", 9.99 => "9.99"); + assert_fmt!("%.2e", 9.99 => "9.99e+00"); + assert_fmt!("%.3f", 9.99 => "9.990"); + assert_fmt!("%.3e", 9.99 => "9.990e+00"); + assert_fmt!("%.1g", 9.99 => "1e+01"); + assert_fmt!("%.1G", 9.99 => "1E+01"); + assert_fmt!("%.1f", 2.99 => "3.0"); + assert_fmt!("%.1e", 2.99 => "3.0e+00"); + assert_fmt!("%.1g", 2.99 => "3"); + assert_fmt!("%.1f", 2.599 => "2.6"); + assert_fmt!("%.1e", 2.599 => "2.6e+00"); + + assert_fmt!("%30.15f", 1234565678.0 => " 1234565678.000000000000000"); + assert_fmt!("%030.15f", 1234565678.0 => "00001234565678.000000000000000"); + + assert_fmt!("%05.3a", 123.456 => "0x1.eddp+6"); + assert_fmt!("%05.3A", 123.456 => "0X1.EDDP+6"); + + // Regression test using smallest denormal. + assert_fmt!("%.0f", f64::from_bits(1) => "0"); + assert_fmt!("%.1f", f64::from_bits(1) => "0.0"); + + // More regression tests. + assert_fmt!("%0.6f", 1e15 => "1000000000000000.000000"); + assert_fmt!("%.0e", 0 => "0e+00"); +} + +#[test] +fn test_float_g() { + // correctness in DBL_DIG places + assert_fmt1!("%.15g", 1.23456789012345, "1.23456789012345"); + + // correct choice of notation for %g + assert_fmt1!("%g", 0.0001, "0.0001"); + assert_fmt1!("%g", 0.00001, "1e-05"); + assert_fmt1!("%g", 123456, "123456"); + assert_fmt1!("%g", 1234567, "1.23457e+06"); + assert_fmt1!("%.7g", 1234567, "1234567"); + assert_fmt1!("%.7g", 12345678, "1.234568e+07"); + assert_fmt1!("%.8g", 0.1, "0.1"); + assert_fmt1!("%.9g", 0.1, "0.1"); + assert_fmt1!("%.10g", 0.1, "0.1"); + assert_fmt1!("%.11g", 0.1, "0.1"); + + // %g with precisions + assert_fmt1!("%.5g", 12345, "12345"); + assert_fmt1!("%.4g", 12345, "1.234e+04"); + assert_fmt1!("%.3g", 12345, "1.23e+04"); + assert_fmt1!("%.2g", 12345, "1.2e+04"); + assert_fmt1!("%.1g", 12345, "1e+04"); + assert_fmt1!("%.5g", 0.000123456, "0.00012346"); + assert_fmt1!("%.4g", 0.000123456, "0.0001235"); + assert_fmt1!("%.3g", 0.000123456, "0.000123"); + assert_fmt1!("%.2g", 0.000123456, "0.00012"); + assert_fmt1!("%.1g", 0.000123456, "0.0001"); + assert_fmt1!("%.5g", 99999, "99999"); + assert_fmt1!("%.4g", 99999, "1e+05"); + assert_fmt1!("%.5g", 0.00001, "1e-05"); + assert_fmt1!("%.6g", 0.00001, "1e-05"); + + // %g with precision and alt form + assert_fmt1!("%#.5g", 12345, "12345."); + assert_fmt1!("%#.4g", 12345, "1.234e+04"); + assert_fmt1!("%#.3g", 12345, "1.23e+04"); + assert_fmt1!("%#.2g", 12345, "1.2e+04"); + assert_fmt1!("%#.1g", 12345, "1.e+04"); + assert_fmt1!("%#.5g", 0.000123456, "0.00012346"); + assert_fmt1!("%#.4g", 0.000123456, "0.0001235"); + assert_fmt1!("%#.3g", 0.000123456, "0.000123"); + assert_fmt1!("%#.2g", 0.000123456, "0.00012"); + assert_fmt1!("%#.1g", 0.000123456, "0.0001"); + assert_fmt1!("%#.5g", 99999, "99999."); + assert_fmt1!("%#.4g", 99999, "1.000e+05"); + assert_fmt1!("%#.5g", 0.00001, "1.0000e-05"); + assert_fmt1!("%#.6g", 0.00001, "1.00000e-05"); + + // 'g' specifier changes meaning of precision to number of sigfigs. + // This applies both to explicit precision, and the default precision, which is 6. + assert_fmt!("%.1g", 2.599 => "3"); + assert_fmt!("%g", 3.0 => "3"); + assert_fmt!("%G", 3.0 => "3"); + assert_fmt!("%g", 1234234.532234234 => "1.23423e+06"); + assert_fmt!("%g", 23490234723.234239 => "2.34902e+10"); + assert_fmt!("%G", 23490234723.234239 => "2.34902E+10"); + + assert_fmt!("%g", 0.0 => "0"); + assert_fmt!("%G", 0.0 => "0"); +} + +#[test] +fn test_float_hex() { + assert_fmt1!("%.0a", 0.0, "0x0p+0"); + assert_fmt1!("%.1a", 0.0, "0x0.0p+0"); + assert_fmt1!("%.2a", 0.0, "0x0.00p+0"); + assert_fmt1!("%.3a", 0.0, "0x0.000p+0"); + + // Test mixed precision and padding with left-adjust. + assert_fmt!("%-10.5a", 1.23456 => "0x1.3c0c2p+0"); + assert_fmt!("%-15.3a", -123.456 => "-0x1.eddp+6 "); + assert_fmt!("%-20.1a", 0.00001234 => "0x1.ap-17 "); + + assert_fmt!("%.0a", PI => "0x2p+1"); + assert_fmt!("%.1a", PI => "0x1.9p+1"); + assert_fmt!("%.2a", PI => "0x1.92p+1"); + assert_fmt!("%.3a", PI => "0x1.922p+1"); + assert_fmt!("%.4a", PI => "0x1.9220p+1"); + assert_fmt!("%.5a", PI => "0x1.921fbp+1"); + assert_fmt!("%.6a", PI => "0x1.921fb5p+1"); + assert_fmt!("%.7a", PI => "0x1.921fb54p+1"); + assert_fmt!("%.8a", PI => "0x1.921fb544p+1"); + assert_fmt!("%.9a", PI => "0x1.921fb5444p+1"); + assert_fmt!("%.10a", PI => "0x1.921fb54443p+1"); + assert_fmt!("%.11a", PI => "0x1.921fb54442dp+1"); + assert_fmt!("%.12a", PI => "0x1.921fb54442d2p+1"); + assert_fmt!("%.13a", PI => "0x1.921fb54442d18p+1"); + assert_fmt!("%.14a", PI => "0x1.921fb54442d180p+1"); + assert_fmt!("%.15a", PI => "0x1.921fb54442d1800p+1"); + assert_fmt!("%.16a", PI => "0x1.921fb54442d18000p+1"); + assert_fmt!("%.17a", PI => "0x1.921fb54442d180000p+1"); + assert_fmt!("%.18a", PI => "0x1.921fb54442d1800000p+1"); + assert_fmt!("%.19a", PI => "0x1.921fb54442d18000000p+1"); + assert_fmt!("%.20a", PI => "0x1.921fb54442d180000000p+1"); +} + +#[test] +fn test_prefixes() { + // Test the valid prefixes. + // Note that we generally ignore prefixes, since we know the width of the actual passed-in type. + // We don't test prefixed 'n'. + // Integer prefixes. + use Error::BadFormatString; + for spec in "diouxX".chars() { + let expected = sprintf_check!(format!("%{}", spec), 5); + for prefix in ["", "h", "hh", "l", "ll", "z", "j", "t"] { + let actual = sprintf_check!(format!("%{}{}", prefix, spec), 5); + assert_eq!(actual, expected); + } + + for prefix in ["L", "B", "!"] { + sprintf_err!(format!("%{}{}", prefix, spec), 5 => BadFormatString); + } + } + + // Floating prefixes. + for spec in "aAeEfFgG".chars() { + let expected = sprintf_check!(format!("%{}", spec), 5.0); + for prefix in ["", "l", "L"] { + let actual = sprintf_check!(format!("%{}{}", prefix, spec), 5.0); + assert_eq!(actual, expected); + } + + for prefix in ["h", "hh", "z", "j", "t", "!"] { + sprintf_err!(format!("%{}{}", prefix, spec), 5.0 => BadFormatString); + } + } + + // Character prefixes. + assert_eq!(sprintf_check!("%c", 'c'), "c"); + assert_eq!(sprintf_check!("%lc", 'c'), "c"); + assert_eq!(sprintf_check!("%s", "cs"), "cs"); + assert_eq!(sprintf_check!("%ls", "cs"), "cs"); +} + +#[allow(clippy::approx_constant)] +#[test] +fn negative_precision_width() { + assert_fmt!("%*s", -10, "hello" => "hello "); + assert_fmt!("%*s", -5, "world" => "world"); + assert_fmt!("%-*s", 10, "rust" => "rust "); + assert_fmt!("%.*s", -3, "example" => "example"); + + assert_fmt!("%*d", -8, 456 => "456 "); + assert_fmt!("%*i", -4, -789 => "-789"); + assert_fmt!("%-*o", 6, 123 => "173 "); + assert_fmt!("%.*x", -2, 255 => "ff"); + assert_fmt!("%-*X", 7, 255 => "FF "); + assert_fmt!("%.*u", -5, 5000 => "5000"); + + assert_fmt!("%*f", -12, 78.9 => "78.900000 "); + assert_fmt!("%*g", -10, 12345.678 => "12345.7 "); + assert_fmt!("%-*e", 15, 0.00012 => "1.200000e-04 "); + assert_fmt!("%-*e", -15, 0.00012 => "1.200000e-04 "); + assert_fmt!("%.*G", -2, 123.456 => "123.456"); + assert_fmt!("%-*E", 14, 123456.789 => "1.234568E+05 "); + assert_fmt!("%-*E", -14, 123456.789 => "1.234568E+05 "); + assert_fmt!("%.*f", -6, 3.14159 => "3.141590"); + + assert_fmt!("%*.*f", -12, -6, 78.9 => "78.900000 "); + assert_fmt!("%*.*g", -10, -3, 12345.678 => "12345.7 "); + assert_fmt!("%*.*e", -15, -8, 0.00012 => "1.200000e-04 "); + assert_fmt!("%*.*E", -14, -4, 123456.789 => "1.234568E+05 "); + + assert_fmt!("%*.*d", -6, -4, 2024 => "2024 "); + assert_fmt!("%*.*x", -8, -3, 255 => "ff "); + + assert_fmt!("%*.*f", -10, -2, 3.14159 => "3.141590 "); + assert_fmt!("%*.*g", -12, -5, 123.456 => "123.456 "); + assert_fmt!("%*.*e", -14, -3, 0.000789 => "7.890000e-04 "); + assert_fmt!("%*.*E", -16, -5, 98765.4321 => "9.876543E+04 "); +} + +#[test] +fn test_precision_overflow() { + // Disallow precisions larger than i32::MAX. + sprintf_err!("%.*g", usize::MAX, 1.0 => Error::Overflow); + sprintf_err!("%.2147483648g", usize::MAX, 1.0 => Error::Overflow); + sprintf_err!("%.*g", i32::MAX as usize + 1, 1.0 => Error::Overflow); + sprintf_err!("%.2147483648g", i32::MAX as usize + 1, 1.0 => Error::Overflow); +} + +#[test] +fn test_huge_precision_g() { + let f = 1e-100; + assert_eq!(sprintf_count!("%.2147483647g", f), 288); + assert_eq!(sprintf_count!("%.*g", i32::MAX, f), 288); + assert_fmt!("%.*g", i32::MAX, 2.0_f64.powi(-4) => "0.0625"); + + sprintf_err!("%.*g", usize::MAX, f => Error::Overflow); + sprintf_err!("%.2147483648g", f => Error::Overflow); +} + +#[test] +fn test_errors() { + use Error::*; + sprintf_err!("%", => BadFormatString); + sprintf_err!("%1", => BadFormatString); + sprintf_err!("%%%k", => BadFormatString); + sprintf_err!("%B", => BadFormatString); + sprintf_err!("%lC", 'q' => BadFormatString); + sprintf_err!("%lS", 'q' => BadFormatString); + sprintf_err!("%d", => MissingArg); + sprintf_err!("%d %u", 1 => MissingArg); + sprintf_err!("%*d", 5 => MissingArg); + sprintf_err!("%.*d", 5 => MissingArg); + sprintf_err!("%%", 1 => ExtraArg); + sprintf_err!("%d %d", 1, 2, 3 => ExtraArg); + sprintf_err!("%d", "abc" => BadArgType); + sprintf_err!("%s", 5 => BadArgType); + sprintf_err!("%*d", "s", 5 => BadArgType); + sprintf_err!("%.*d", "s", 5 => BadArgType); + sprintf_err!("%18446744073709551616d", 5 => Overflow); + sprintf_err!("%.18446744073709551616d", 5 => Overflow); + + // We allow passing an int for a float, but not a float for an int. + assert_fmt!("%f", 3 => "3.000000"); + sprintf_err!("%d", 3.0 => BadArgType); + + // We allow passing an int for a char, reporting "overflow" for ints + // which cannot be converted to char (treating surrogates as "overflow"). + assert_fmt!("%c", 0 => "\0"); + assert_fmt!("%c", 'Z' as u32 => "Z"); + sprintf_err!("%c", 5.0 => BadArgType); + sprintf_err!("%c", -1 => Overflow); + sprintf_err!("%c", u64::MAX => Overflow); + sprintf_err!("%c", 0xD800 => Overflow); + sprintf_err!("%c", 0xD8FF => Overflow); + + // Apostrophe only works for d,u,i,f,F + sprintf_err!("%'c", 0 => BadFormatString); + sprintf_err!("%'o", 0 => BadFormatString); + sprintf_err!("%'x", 0 => BadFormatString); + sprintf_err!("%'X", 0 => BadFormatString); + sprintf_err!("%'n", 0 => BadFormatString); + sprintf_err!("%'a", 0 => BadFormatString); + sprintf_err!("%'A", 0 => BadFormatString); + sprintf_err!("%'e", 0 => BadFormatString); + sprintf_err!("%'E", 0 => BadFormatString); + sprintf_err!("%'g", 0 => BadFormatString); + sprintf_err!("%'G", 0 => BadFormatString); +} + +#[test] +fn test_locale() { + fn test_printf_loc<'a>(expected: &str, locale: &Locale, format: &str, arg: impl ToArg<'a>) { + let mut target = String::new(); + let format_chars: Vec = format.chars().collect(); + let len = sprintf_locale(&mut target, &format_chars, locale, &mut [arg.to_arg()]) + .expect("printf failed"); + assert_eq!(len, target.len()); + assert_eq!(target, expected); + } + + let mut locale = C_LOCALE; + locale.decimal_point = ','; + locale.thousands_sep = Some('!'); + locale.grouping = [3, 1, 0, 0]; + + test_printf_loc("-46,380000", &locale, "%f", -46.38); + test_printf_loc("00000001,200", &locale, "%012.3f", 1.2); + test_printf_loc("1234", &locale, "%d", 1234); + test_printf_loc("0x1,9p+3", &locale, "%a", 12.5); + test_printf_loc("12345!6!789", &locale, "%'d", 123456789); + test_printf_loc("123!4!567", &locale, "%'d", 1234567); + test_printf_loc("214748!3!647", &locale, "%'u", 2147483647); + test_printf_loc("-123!4!567", &locale, "%'i", -1234567); + test_printf_loc("-123!4!567,890000", &locale, "%'f", -1234567.89); + test_printf_loc("123!4!567,8899999999", &locale, "%'.10f", 1234567.89); + test_printf_loc("12!3!456,789", &locale, "%'.3F", 123456.789); + test_printf_loc("00000000001!234", &locale, "%'015d", 1234); + test_printf_loc("1!2!345", &locale, "%'7d", 12345); + test_printf_loc(" 1!2!345", &locale, "%'8d", 12345); + test_printf_loc("+1!2!345", &locale, "%'+d", 12345); + + // Thousands seps count as width, and so remove some leading zeros. + // Padding does NOT use thousands sep. + test_printf_loc("0001234567", &EN_US_LOCALE, "%010d", 1234567); + test_printf_loc("01,234,567", &EN_US_LOCALE, "%'010d", 1234567); + test_printf_loc( + "000000000000000001,222,333,444", + &EN_US_LOCALE, + "%'0.30d", + 1222333444, + ); +} + +#[test] +#[ignore] +fn test_float_hex_prec() { + // Check that libc and our hex float formatting agree for each precision. + // Note that our hex float formatting rounds according to the rounding mode, + // while libc may not; as a result we may differ in the last digit. So this + // requires manual comparison. + let mut c_storage = [0u8; 256]; + let c_storage_ptr = c_storage.as_mut_ptr() as *mut i8; + let mut rust_str = String::with_capacity(256); + + let c_fmt = b"%.*a\0".as_ptr() as *const i8; + let mut failed = false; + for sign in [1.0, -1.0].into_iter() { + for mut v in [0.0, 0.5, 1.0, 1.5, PI, TAU, E].into_iter() { + v *= sign; + for preci in 1..=200_i32 { + rust_str.clear(); + crate::sprintf!(=> &mut rust_str, utf32str!("%.*a"), preci, v); + + let printf_str = unsafe { + let len = libc::snprintf(c_storage_ptr, c_storage.len(), c_fmt, preci, v); + assert!(len >= 0); + let sl = std::slice::from_raw_parts(c_storage_ptr as *const u8, len as usize); + std::str::from_utf8(sl).unwrap() + }; + if rust_str != printf_str { + println!( + "Our printf and libc disagree on hex formatting of float: {v} + with precision: {preci} + our output: <{rust_str}> + libc output: <{printf_str}>" + ); + failed = true; + } + } + } + } + assert!(!failed); +} + +fn test_exhaustive(rust_fmt: &Utf32Str, c_fmt: *const i8) { + // "There's only 4 billion floats so test them all." + // This tests a format string expected to be of the form "%.*g" or "%.*e". + // That is, it takes a precision and a double. + println!("Testing {}", rust_fmt); + let mut rust_str = String::with_capacity(128); + let mut c_storage = [0u8; 128]; + let c_storage_ptr = c_storage.as_mut_ptr() as *mut i8; + + for i in 0..=u32::MAX { + if i % 1000000 == 0 { + println!("{:.2}%", (i as f64) / (u32::MAX as f64) * 100.0); + } + let f = f32::from_bits(i); + let ff = f as f64; + for preci in 0..=10 { + rust_str.clear(); + crate::sprintf!(=> &mut rust_str, rust_fmt, preci, ff); + + let printf_str = unsafe { + let len = libc::snprintf(c_storage_ptr, c_storage.len(), c_fmt, preci, ff); + assert!(len >= 0); + let sl = std::slice::from_raw_parts(c_storage_ptr as *const u8, len as usize); + std::str::from_utf8(sl).unwrap() + }; + if rust_str != printf_str { + println!( + "Rust and libc disagree on formatting float {i:x}: {ff}\n + with precision: {preci} + format string: {rust_fmt} + rust output: <{rust_str}> + libc output: <{printf_str}>" + ); + assert_eq!(rust_str, printf_str); + } + } + } +} + +#[test] +#[ignore] +fn test_float_g_exhaustive() { + // To run: cargo test test_float_g_exhaustive --release -- --ignored --nocapture + test_exhaustive( + widestring::utf32str!("%.*g"), + b"%.*g\0".as_ptr() as *const i8, + ); +} + +#[test] +#[ignore] +fn test_float_e_exhaustive() { + // To run: cargo test test_float_e_exhaustive --release -- --ignored --nocapture + test_exhaustive( + widestring::utf32str!("%.*e"), + b"%.*e\0".as_ptr() as *const i8, + ); +} + +#[test] +#[ignore] +fn test_float_f_exhaustive() { + // To run: cargo test test_float_f_exhaustive --release -- --ignored --nocapture + test_exhaustive( + widestring::utf32str!("%.*f"), + b"%.*f\0".as_ptr() as *const i8, + ); +}