fish-shell/printf/src/printf_impl.rs

/** Rust printf implementation, based on musl. */
use super::arg::Arg;
use super::fmt_fp::format_float;
use super::locale::Locale;
use std::fmt::{self, Write};
use std::mem;
use std::result::Result;

#[cfg(feature = "widestring")]
use widestring::Utf32Str as wstr;

/// Possible errors from printf.
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
    /// Invalid format string.
    BadFormatString,
    /// Too few arguments.
    MissingArg,
    /// Too many arguments.
    ExtraArg,
    /// Argument type doesn't match format specifier.
    BadArgType,
    /// Precision is too large to represent.
    Overflow,
    /// Error emitted by the output stream.
    Fmt(fmt::Error),
}

// Convenience conversion from fmt::Error.
impl From<fmt::Error> for Error {
    fn from(err: fmt::Error) -> Error {
        Error::Fmt(err)
    }
}

#[derive(Debug, Copy, Clone, Default)]
pub(super) struct ModifierFlags {
    pub alt_form: bool, // #
    pub zero_pad: bool, // 0
    pub left_adj: bool, // negative field width
    pub pad_pos: bool,  // space: blank before positive numbers
    pub mark_pos: bool, // +: sign before positive numbers
    pub grouped: bool,  // ': group indicator
}

impl ModifierFlags {
    // If c is a modifier character, set the flag and return true.
    // Otherwise return false. Note we allow repeated modifier flags.
    fn try_set(&mut self, c: char) -> bool {
        match c {
            '#' => self.alt_form = true,
            '0' => self.zero_pad = true,
            '-' => self.left_adj = true,
            ' ' => self.pad_pos = true,
            '+' => self.mark_pos = true,
            '\'' => self.grouped = true,
            _ => return false,
        };
        true
    }
}

// The set of prefixes of conversion specifiers.
// Note that we mostly ignore prefixes - we take sizes of values from the arguments themselves.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
enum ConversionPrefix {
    Empty,
    hh,
    h,
    l,
    ll,
    j,
    t,
    z,
    L,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
#[rustfmt::skip]
pub(super) enum ConversionSpec {
    // Integers, with prefixes "hh", "h", "l", "ll", "j", "t", "z"
    // Note that we treat '%i' as '%d'.
    d, o, u, x, X,

    // USizeRef receiver, with same prefixes as ints
    n,

    // Float, with prefixes "l" and "L"
    a, A, e, E, f, F, g, G,

    // Pointer, no prefixes
    p,

    // Character or String, with supported prefixes "l"
    // Note that we treat '%C' as '%c' and '%S' as '%s'.
    c, s,
}

impl ConversionSpec {
    // Returns true if the given prefix is supported by this conversion specifier.
    fn supports_prefix(self, prefix: ConversionPrefix) -> bool {
        use ConversionPrefix::*;
        use ConversionSpec::*;
        if matches!(prefix, Empty) {
            // No prefix is always supported.
            return true;
        }
        match self {
            d | o | u | x | X | n => matches!(prefix, hh | h | l | ll | j | t | z),
            a | A | e | E | f | F | g | G => matches!(prefix, l | L),
            p => false,
            c | s => matches!(prefix, l),
        }
    }

    // Returns true if the conversion specifier is lowercase,
    // which affects certain rendering.
    #[inline]
    pub(super) fn is_lower(self) -> bool {
        use ConversionSpec::*;
        match self {
            d | o | u | x | n | a | e | f | g | p | c | s => true,
            X | A | E | F | G => false,
        }
    }

    // Returns a ConversionSpec from a character, or None if none.
    fn from_char(cc: char) -> Option<Self> {
        use ConversionSpec::*;
        let res = match cc {
            'd' | 'i' => d,
            'o' => o,
            'u' => u,
            'x' => x,
            'X' => X,
            'n' => n,
            'a' => a,
            'A' => A,
            'e' => e,
            'E' => E,
            'f' => f,
            'F' => F,
            'g' => g,
            'G' => G,
            'p' => p,
            'c' | 'C' => c,
            's' | 'S' => s,
            _ => return None,
        };
        Some(res)
    }
}

// A helper type with convenience functions for format strings.
pub trait FormatString {
    // Return true if we are empty.
    fn is_empty(&self) -> bool;

    // Return the character at a given index, or None if out of bounds.
    // Note the index is a count of characters, not bytes.
    fn at(&self, index: usize) -> Option<char>;

    // Advance by the given number of characters.
    fn advance_by(&mut self, n: usize);

    // Read a sequence of characters to be output literally, advancing the cursor.
    // The characters may optionally be stored in the given buffer.
    // This handles a tail of %%.
    fn take_literal<'a: 'b, 'b>(&'a mut self, buffer: &'b mut String) -> &'b str;
}

impl FormatString for &str {
    fn is_empty(&self) -> bool {
        (*self).is_empty()
    }

    fn at(&self, index: usize) -> Option<char> {
        self.chars().nth(index)
    }

    fn advance_by(&mut self, n: usize) {
        let mut chars = self.chars();
        for _ in 0..n {
            let c = chars.next();
            assert!(c.is_some(), "FormatString::advance(): index out of bounds");
        }
        *self = chars.as_str();
    }

    fn take_literal<'a: 'b, 'b>(&'a mut self, _buffer: &'b mut String) -> &'b str {
        // Count length of non-percent characters.
        let non_percents: usize = self
            .chars()
            .take_while(|&c| c != '%')
            .map(|c| c.len_utf8())
            .sum();
        // Take only an even number of percents. Note we know these have byte length 1.
        let percent_pairs = self[non_percents..]
            .chars()
            .take_while(|&c| c == '%')
            .count()
            / 2;
        let (prefix, rest) = self.split_at(non_percents + percent_pairs * 2);
        *self = rest;
        // Trim half of the trailing percent characters from the prefix.
        &prefix[..prefix.len() - percent_pairs]
    }
}

#[cfg(feature = "widestring")]
impl FormatString for &wstr {
    fn is_empty(&self) -> bool {
        (*self).is_empty()
    }

    fn at(&self, index: usize) -> Option<char> {
        self.as_char_slice().get(index).copied()
    }

    fn advance_by(&mut self, n: usize) {
        *self = &self[n..];
    }

    fn take_literal<'a: 'b, 'b>(&'a mut self, buffer: &'b mut String) -> &'b str {
        let s = self.as_char_slice();
        let non_percents = s.iter().take_while(|&&c| c != '%').count();
        // Take only an even number of percents.
        let percent_pairs: usize = s[non_percents..].iter().take_while(|&&c| c == '%').count() / 2;
        *self = &self[non_percents + percent_pairs * 2..];
        buffer.clear();
        buffer.extend(s[..non_percents + percent_pairs].iter());
        buffer.as_str()
    }
}

// Read an int from a format string, stopping at the first non-digit.
// Negative values are not supported.
// If there are no digits, return 0.
// Adjust the format string to point to the char after the int.
fn get_int(fmt: &mut impl FormatString) -> Result<usize, Error> {
    use Error::Overflow;
    let mut i: usize = 0;
    while let Some(digit) = fmt.at(0).and_then(|c| c.to_digit(10)) {
        i = i.checked_mul(10).ok_or(Overflow)?;
        i = i.checked_add(digit as usize).ok_or(Overflow)?;
        fmt.advance_by(1);
    }
    Ok(i)
}

// Read a conversion prefix from a format string, advancing it.
fn get_prefix(fmt: &mut impl FormatString) -> ConversionPrefix {
    use ConversionPrefix as CP;
    let prefix = match fmt.at(0).unwrap_or('\0') {
        'h' if fmt.at(1) == Some('h') => CP::hh,
        'h' => CP::h,
        'l' if fmt.at(1) == Some('l') => CP::ll,
        'l' => CP::l,
        'j' => CP::j,
        't' => CP::t,
        'z' => CP::z,
        'L' => CP::L,
        _ => CP::Empty,
    };
    fmt.advance_by(match prefix {
        CP::Empty => 0,
        CP::hh | CP::ll => 2,
        _ => 1,
    });
    prefix
}

// Read an (optionally prefixed) format specifier, such as d, Lf, etc.
// Adjust the cursor to point to the char after the specifier.
fn get_specifier(fmt: &mut impl FormatString) -> Result<ConversionSpec, Error> {
    let prefix = get_prefix(fmt);
    // Awkwardly placed hack to disallow %lC and %lS, since we otherwise treat
    // them as the same.
    if prefix != ConversionPrefix::Empty && matches!(fmt.at(0), Some('C' | 'S')) {
        return Err(Error::BadFormatString);
    }
    let spec = fmt
        .at(0)
        .and_then(ConversionSpec::from_char)
        .ok_or(Error::BadFormatString)?;
    if !spec.supports_prefix(prefix) {
        return Err(Error::BadFormatString);
    }
    fmt.advance_by(1);
    Ok(spec)
}

// Pad output by emitting `c` until `min_width` is reached.
pub(super) fn pad(
    f: &mut impl Write,
    c: char,
    min_width: usize,
    current_width: usize,
) -> fmt::Result {
    assert!(c == '0' || c == ' ');
    if current_width >= min_width {
        return Ok(());
    }
    const ZEROS: &str = "0000000000000000";
    const SPACES: &str = "                ";
    let buff = if c == '0' { ZEROS } else { SPACES };
    let mut remaining = min_width - current_width;
    while remaining > 0 {
        let n = remaining.min(buff.len());
        f.write_str(&buff[..n])?;
        remaining -= n;
    }
    Ok(())
}

/// Formats a string using the provided format specifiers, arguments, and locale,
/// and writes the output to the given `Write` implementation.
///
/// # Parameters
/// - `f`: The receiver of formatted output.
/// - `fmt`: The format string being parsed.
/// - `locale`: The locale to use for number formatting.
/// - `args`: Iterator over the arguments to format.
///
/// # Returns
/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`.
///
/// # Example
///
/// ```
/// use fish_printf::{sprintf_locale, ToArg, FormatString, locale};
/// use std::fmt::Write;
///
/// let mut output = String::new();
/// let fmt: &str = "%'0.2f";
/// let mut args = [1234567.89.to_arg()];
///
/// let result = sprintf_locale(&mut output, fmt, &locale::EN_US_LOCALE, &mut args);
///
/// assert!(result == Ok(12));
/// assert_eq!(output, "1,234,567.89");
/// ```
pub fn sprintf_locale(
    f: &mut impl Write,
    fmt: impl FormatString,
    locale: &Locale,
    args: &mut [Arg],
) -> Result<usize, Error> {
    use ConversionSpec as CS;
    let mut s = fmt;
    let mut args = args.iter_mut();
    let mut out_len: usize = 0;

    // Shared storage for the output of the conversion specifier.
    let buf = &mut String::new();
    'main: while !s.is_empty() {
        buf.clear();

        // Handle literal text and %% format specifiers.
        let lit = s.take_literal(buf);
        if !lit.is_empty() {
            f.write_str(lit)?;
            out_len = out_len
                .checked_add(lit.chars().count())
                .ok_or(Error::Overflow)?;
            continue 'main;
        }

        // Consume the % at the start of the format specifier.
        debug_assert!(s.at(0) == Some('%'));
        s.advance_by(1);

        // Read modifier flags. '-' and '0' flags are mutually exclusive.
        let mut flags = ModifierFlags::default();
        while flags.try_set(s.at(0).unwrap_or('\0')) {
            s.advance_by(1);
        }
        if flags.left_adj {
            flags.zero_pad = false;
        }

        // Read field width. We do not support $.
        let width = if s.at(0) == Some('*') {
            let arg_width = args.next().ok_or(Error::MissingArg)?.as_sint()?;
            s.advance_by(1);
            if arg_width < 0 {
                flags.left_adj = true;
            }
            arg_width
                .unsigned_abs()
                .try_into()
                .map_err(|_| Error::Overflow)?
        } else {
            get_int(&mut s)?
        };

        // Optionally read precision. We do not support $.
        let mut prec: Option<usize> = if s.at(0) == Some('.') && s.at(1) == Some('*') {
            // "A negative precision is treated as though it were missing."
            // Here we assume the precision is always signed.
            s.advance_by(2);
            let p = args.next().ok_or(Error::MissingArg)?.as_sint()?;
            p.try_into().ok()
        } else if s.at(0) == Some('.') {
            s.advance_by(1);
            Some(get_int(&mut s)?)
        } else {
            None
        };
        // Disallow precisions larger than i32::MAX, in keeping with C.
        if prec.unwrap_or(0) > i32::MAX as usize {
            return Err(Error::Overflow);
        }

        // Read out the format specifier and arg.
        let conv_spec = get_specifier(&mut s)?;
        let arg = args.next().ok_or(Error::MissingArg)?;
        let mut prefix = "";

        // Thousands grouping only works for d,u,i,f,F.
        // 'i' is mapped to 'd'.
        if flags.grouped && !matches!(conv_spec, CS::d | CS::u | CS::f | CS::F) {
            return Err(Error::BadFormatString);
        }

        // Disable zero-pad if we have an explicit precision.
        // "If a precision is given with a numeric conversion (d, i, o, u, i, x, and X),
        // the 0 flag is ignored." p is included here.
        let spec_is_numeric = matches!(conv_spec, CS::d | CS::u | CS::o | CS::p | CS::x | CS::X);
        if spec_is_numeric && prec.is_some() {
            flags.zero_pad = false;
        }

        // Apply the formatting. Some cases continue the main loop.
        // Note that numeric conversions must leave 'body' empty if the value is 0.
        let body: &str = match conv_spec {
            CS::n => {
                arg.set_count(out_len)?;
                continue 'main;
            }
            CS::e | CS::f | CS::g | CS::a | CS::E | CS::F | CS::G | CS::A => {
                // Floating point types handle output on their own.
                let float = arg.as_float()?;
                let len = format_float(f, float, width, prec, flags, locale, conv_spec, buf)?;
                out_len = out_len.checked_add(len).ok_or(Error::Overflow)?;
                continue 'main;
            }
            CS::p => {
                const PTR_HEX_DIGITS: usize = 2 * mem::size_of::<*const u8>();
                prec = prec.map(|p| p.max(PTR_HEX_DIGITS));
                let uint = arg.as_uint()?;
                if uint != 0 {
                    prefix = "0x";
                    write!(buf, "{:x}", uint)?;
                }
                buf
            }
            CS::x | CS::X => {
                // If someone passes us a negative value, format it with the width
                // we were given.
                let lower = conv_spec.is_lower();
                let (_, uint) = arg.as_wrapping_sint()?;
                if uint != 0 {
                    if flags.alt_form {
                        prefix = if lower { "0x" } else { "0X" };
                    }
                    if lower {
                        write!(buf, "{:x}", uint)?;
                    } else {
                        write!(buf, "{:X}", uint)?;
                    }
                }
                buf
            }
            CS::o => {
                let uint = arg.as_uint()?;
                if uint != 0 {
                    write!(buf, "{:o}", uint)?;
                }
                if flags.alt_form && prec.unwrap_or(0) <= buf.len() + 1 {
                    prec = Some(buf.len() + 1);
                }
                buf
            }
            CS::u => {
                let uint = arg.as_uint()?;
                if uint != 0 {
                    write!(buf, "{}", uint)?;
                }
                buf
            }
            CS::d => {
                let arg_i = arg.as_sint()?;
                if arg_i < 0 {
                    prefix = "-";
                } else if flags.mark_pos {
                    prefix = "+";
                } else if flags.pad_pos {
                    prefix = " ";
                }
                if arg_i != 0 {
                    write!(buf, "{}", arg_i.unsigned_abs())?;
                }
                buf
            }
            CS::c => {
                // also 'C'
                flags.zero_pad = false;
                buf.push(arg.as_char()?);
                buf
            }
            CS::s => {
                // also 'S'
                let s = arg.as_str(buf)?;
                let p = prec.unwrap_or(s.len()).min(s.len());
                prec = Some(p);
                flags.zero_pad = false;
                &s[..p]
            }
        };
        // Numeric output should be empty iff the value is 0.
        if spec_is_numeric && body.is_empty() {
            debug_assert!(arg.as_uint().unwrap() == 0);
        }

        // Decide if we want to apply thousands grouping to the body, and compute its size.
        // Note we have already errored out if grouped is set and this is non-numeric.
        let wants_grouping = flags.grouped && locale.thousands_sep.is_some();
        let body_len = match wants_grouping {
            true => body.len() + locale.separator_count(body.len()),
            false => body.len(),
        };

        // Resolve the precision.
        // In the case of a non-numeric conversion, update the precision to at least the
        // length of the string.
        let prec = if !spec_is_numeric {
            prec.unwrap_or(body_len)
        } else {
            prec.unwrap_or(1).max(body_len)
        };

        let prefix_len = prefix.len();
        let unpadded_width = prefix_len.checked_add(prec).ok_or(Error::Overflow)?;
        let width = width.max(unpadded_width);

        // Pad on the left with spaces to the desired width?
        if !flags.left_adj && !flags.zero_pad {
            pad(f, ' ', width, unpadded_width)?;
        }

        // Output any prefix.
        f.write_str(prefix)?;

        // Pad after the prefix with zeros to the desired width?
        if !flags.left_adj && flags.zero_pad {
            pad(f, '0', width, unpadded_width)?;
        }

        // Pad on the left to the given precision?
        pad(f, '0', prec, body_len)?;

        // Output the actual value, perhaps with grouping.
        if wants_grouping {
            f.write_str(&locale.apply_grouping(body))?;
        } else {
            f.write_str(body)?;
        }

        // Pad on the right with spaces if we are left adjusted?
        if flags.left_adj {
            pad(f, ' ', width, unpadded_width)?;
        }

        out_len = out_len.checked_add(width).ok_or(Error::Overflow)?;
    }

    // Too many args?
    if args.next().is_some() {
        return Err(Error::ExtraArg);
    }
    Ok(out_len)
}