Revert "Revert "Implement builtin_printf in Rust""

This reverts commit 9f7e6a6cd1. Add additional fixes from code review.
2024-11-10 23:24:39 +00:00 · 2023-04-01 10:17:49 -07:00 · 2023-04-01 10:17:49 -07:00 · a487b1ecf2
commit a487b1ecf2
parent 2d6f752f6e
11 changed files with 873 additions and 729 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -105,7 +105,7 @@ set(FISH_BUILTIN_SRCS
    src/builtins/disown.cpp
    src/builtins/eval.cpp src/builtins/fg.cpp
    src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp
-    src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp
+    src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/path.cpp
    src/builtins/read.cpp src/builtins/set.cpp
    src/builtins/set_color.cpp src/builtins/source.cpp src/builtins/status.cpp
    src/builtins/string.cpp src/builtins/test.cpp src/builtins/type.cpp src/builtins/ulimit.cpp
--- a/fish-rust/src/builtins/mod.rs
+++ b/fish-rust/src/builtins/mod.rs
@ -7,6 +7,7 @@ pub mod contains;
 pub mod echo;
 pub mod emit;
 pub mod exit;
 pub mod printf;
 pub mod pwd;
 pub mod random;
 pub mod realpath;
--- a/fish-rust/src/builtins/printf.rs
+++ b/fish-rust/src/builtins/printf.rs
@ -0,0 +1,810 @@
 // printf - format and print data
 // Copyright (C) 1990-2007 Free Software Foundation, Inc.
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2, or (at your option)
 // any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software Foundation,
 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 // Usage: printf format [argument...]
 //
 // A front end to the printf function that lets it be used from the shell.
 //
 // Backslash escapes:
 //
 // \" = double quote
 // \\ = backslash
 // \a = alert (bell)
 // \b = backspace
 // \c = produce no further output
 // \e = escape
 // \f = form feed
 // \n = new line
 // \r = carriage return
 // \t = horizontal tab
 // \v = vertical tab
 // \ooo = octal number (ooo is 1 to 3 digits)
 // \xhh = hexadecimal number (hhh is 1 to 2 digits)
 // \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
 // \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
 //
 // Additional directive:
 //
 // %b = print an argument string, interpreting backslash escapes,
 //   except that octal escapes are of the form \0 or \0ooo.
 //
 // The `format' argument is re-used as many times as necessary
 // to convert all of the given arguments.
 //
 // David MacKenzie <djm@gnu.ai.mit.edu>
 // This file has been imported from source code of printf command in GNU Coreutils version 6.9.
 use libc::c_int;
 use num_traits;
 use std::result::Result;
 use crate::builtins::shared::{io_streams_t, STATUS_CMD_ERROR, STATUS_CMD_OK, STATUS_INVALID_ARGS};
 use crate::ffi::parser_t;
 use crate::locale::{get_numeric_locale, Locale};
 use crate::wchar::{encode_byte_to_char, wstr, WExt, WString, L};
 use crate::wutil::errors::Error;
 use crate::wutil::gettext::{wgettext, wgettext_fmt};
 use crate::wutil::wcstod::wcstod;
 use crate::wutil::wcstoi::{fish_wcstoi_partial, Options as WcstoiOpts};
 use crate::wutil::{sprintf, wstr_offset_in};
 use printf_compat::args::ToArg;
 use printf_compat::printf::sprintf_locale;
 /// \return true if \p c is an octal digit.
 fn is_octal_digit(c: char) -> bool {
    ('0'..='7').contains(&c)
 }
 /// \return true if \p c is a decimal digit.
 fn iswdigit(c: char) -> bool {
    c.is_ascii_digit()
 }
 /// \return true if \p c is a hexadecimal digit.
 fn iswxdigit(c: char) -> bool {
    c.is_ascii_hexdigit()
 }
 struct builtin_printf_state_t<'a> {
    // Out and err streams. Note this is a captured reference!
    streams: &'a mut io_streams_t,
    // The status of the operation.
    exit_code: c_int,
    // Whether we should stop outputting. This gets set in the case of an error, and also with the
    // \c escape.
    early_exit: bool,
    // Our output buffer, so we don't write() constantly.
    // Our strategy is simple:
    // We print once per argument, and we flush the buffer before the error.
    buff: WString,
    // The locale, which affects printf output and also parsing of floats due to decimal separators.
    locale: Locale,
 }
 /// Convert to a scalar type. \return the result of conversion, and the end of the converted string.
 /// On conversion failure, \p end is not modified.
 trait RawStringToScalarType: Copy + num_traits::Zero + std::convert::From<u32> {
    /// Convert from a string to our self type.
    /// \return the result of conversion, and the remainder of the string.
    fn raw_string_to_scalar_type<'a>(
        s: &'a wstr,
        locale: &Locale,
        end: &mut &'a wstr,
    ) -> Result<Self, Error>;
    /// Convert from a Unicode code point to this type.
    /// This supports printf's ability to convert from char to scalar via a leading quote.
    /// Try it:
    ///     > printf "%f" "'a"
    ///     97.000000
    /// Wild stuff.
    fn from_ord(c: char) -> Self {
        let as_u32: u32 = c.into();
        as_u32.into()
    }
 }
 impl RawStringToScalarType for i64 {
    fn raw_string_to_scalar_type<'a>(
        s: &'a wstr,
        _locale: &Locale,
        end: &mut &'a wstr,
    ) -> Result<Self, Error> {
        let mut consumed = 0;
        let res = fish_wcstoi_partial(s, WcstoiOpts::default(), &mut consumed);
        *end = s.slice_from(consumed);
        res
    }
 }
 impl RawStringToScalarType for u64 {
    fn raw_string_to_scalar_type<'a>(
        s: &'a wstr,
        _locale: &Locale,
        end: &mut &'a wstr,
    ) -> Result<Self, Error> {
        let mut consumed = 0;
        let res = fish_wcstoi_partial(
            s,
            WcstoiOpts {
                wrap_negatives: true,
                ..Default::default()
            },
            &mut consumed,
        );
        *end = s.slice_from(consumed);
        res
    }
 }
 impl RawStringToScalarType for f64 {
    fn raw_string_to_scalar_type<'a>(
        s: &'a wstr,
        locale: &Locale,
        end: &mut &'a wstr,
    ) -> Result<Self, Error> {
        let mut consumed: usize = 0;
        let mut result = wcstod(s, locale.decimal_point, &mut consumed);
        if result.is_ok() && consumed == s.chars().count() {
            *end = s.slice_from(consumed);
            return result;
        }
        // The conversion using the user's locale failed. That may be due to the string not being a
        // valid floating point value. It could also be due to the locale using different separator
        // characters than the normal english convention. So try again by forcing the use of a locale
        // that employs the english convention for writing floating point numbers.
        consumed = 0;
        result = wcstod(s, '.', &mut consumed);
        if result.is_ok() {
            *end = s.slice_from(consumed);
        }
        return result;
    }
 }
 /// Convert a string to a scalar type.
 /// Use state.verify_numeric to report any errors.
 fn string_to_scalar_type<T: RawStringToScalarType>(
    s: &wstr,
    state: &mut builtin_printf_state_t,
 ) -> T {
    if s.char_at(0) == '"' || s.char_at(0) == '\'' {
        // Note that if the string is really just a leading quote,
        // we really do want to convert the "trailing nul".
        T::from_ord(s.char_at(1))
    } else {
        let mut end = s;
        let mval = T::raw_string_to_scalar_type(s, &state.locale, &mut end);
        state.verify_numeric(s, end, mval.err());
        mval.unwrap_or(T::zero())
    }
 }
 /// For each character in str, set the corresponding boolean in the array to the given flag.
 fn modify_allowed_format_specifiers(ok: &mut [bool; 256], str: &str, flag: bool) {
    for c in str.chars() {
        ok[c as usize] = flag;
    }
 }
 impl<'a> builtin_printf_state_t<'a> {
    #[allow(clippy::partialeq_to_none)]
    fn verify_numeric(&mut self, s: &wstr, end: &wstr, errcode: Option<Error>) {
        // This check matches the historic `errcode != EINVAL` check from C++.
        // Note that empty or missing values will be silently treated as 0.
        if errcode != None && errcode != Some(Error::InvalidChar) && errcode != Some(Error::Empty) {
            match errcode.unwrap() {
                Error::Overflow => {
                    self.fatal_error(sprintf!("%ls: %ls", s, wgettext!("Number out of range")));
                }
                Error::Empty => {
                    self.fatal_error(sprintf!("%ls: %ls", s, wgettext!("Number was empty")));
                }
                Error::InvalidChar | Error::CharsLeft => {
                    panic!("Unreachable");
                }
            }
        } else if !end.is_empty() {
            if s.as_ptr() == end.as_ptr() {
                self.fatal_error(wgettext_fmt!("%ls: expected a numeric value", s));
            } else {
                // This isn't entirely fatal - the value should still be printed.
                self.nonfatal_error(wgettext_fmt!(
                    "%ls: value not completely converted (can't convert '%ls')",
                    s,
                    end
                ));
                // Warn about octal numbers as they can be confusing.
                // Do it if the unconverted digit is a valid hex digit,
                // because it could also be an "0x" -> "0" typo.
                if s.char_at(0) == '0' && iswxdigit(end.char_at(0)) {
                    self.nonfatal_error(wgettext_fmt!(
                        "Hint: a leading '0' without an 'x' indicates an octal number"
                    ));
                }
            }
        }
    }
    /// Evaluate a printf conversion specification.  SPEC is the start of the directive, and CONVERSION
    /// specifies the type of conversion.  SPEC does not include any length modifier or the
    /// conversion specifier itself.  FIELD_WIDTH and PRECISION are the field width and
    /// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
    /// ARGUMENT is the argument to be formatted.
    #[allow(clippy::collapsible_else_if, clippy::too_many_arguments)]
    fn print_direc(
        &mut self,
        spec: &wstr,
        conversion: char,
        have_field_width: bool,
        field_width: i32,
        have_precision: bool,
        precision: i32,
        argument: &wstr,
    ) {
        /// Printf macro helper which provides our locale.
        macro_rules! sprintf_loc {
            (
            $fmt:expr, // format string of type &wstr
            $($arg:expr),* // arguments
            ) => {
                sprintf_locale(
                    $fmt,
                    &self.locale,
                    &[$($arg.to_arg()),*]
                )
            }
        }
        // Start with everything except the conversion specifier.
        let mut fmt = spec.to_owned();
        // Create a copy of the % directive, with a width modifier substituted for any
        // existing integer length modifier.
        match conversion {
            'x' | 'X' | 'd' | 'i' | 'o' | 'u' => {
                fmt.push_str("ll");
            }
            'a' | 'e' | 'f' | 'g' | 'A' | 'E' | 'F' | 'G' => {
                fmt.push_str("L");
            }
            's' | 'c' => {
                fmt.push_str("l");
            }
            _ => {}
        }
        // Append the conversion itself.
        fmt.push(conversion);
        // Rebind as a ref.
        let fmt: &wstr = &fmt;
        match conversion {
            'd' | 'i' => {
                let arg: i64 = string_to_scalar_type(argument, self);
                if !have_field_width {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, precision, arg));
                    }
                } else {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, field_width, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
                    }
                }
            }
            'o' | 'u' | 'x' | 'X' => {
                let arg: u64 = string_to_scalar_type(argument, self);
                if !have_field_width {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, precision, arg));
                    }
                } else {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, field_width, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
                    }
                }
            }
            'a' | 'A' | 'e' | 'E' | 'f' | 'F' | 'g' | 'G' => {
                let arg: f64 = string_to_scalar_type(argument, self);
                if !have_field_width {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, precision, arg));
                    }
                } else {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, field_width, arg));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
                    }
                }
            }
            'c' => {
                if !have_field_width {
                    self.append_output_str(sprintf_loc!(fmt, argument.char_at(0)));
                } else {
                    self.append_output_str(sprintf_loc!(fmt, field_width, argument.char_at(0)));
                }
            }
            's' => {
                if !have_field_width {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, argument));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, precision, argument));
                    }
                } else {
                    if !have_precision {
                        self.append_output_str(sprintf_loc!(fmt, field_width, argument));
                    } else {
                        self.append_output_str(sprintf_loc!(fmt, field_width, precision, argument));
                    }
                }
            }
            _ => {
                panic!("unexpected opt: {}", conversion);
            }
        }
    }
    /// Print the text in FORMAT, using ARGV for arguments to any `%' directives.
    /// Return the number of elements of ARGV used.
    fn print_formatted(&mut self, format: &wstr, mut argv: &[&wstr]) -> usize {
        let mut argc = argv.len();
        let save_argc = argc; /* Preserve original value.  */
        let mut f: &wstr; /* Pointer into `format'.  */
        let mut direc_start: &wstr; /* Start of % directive.  */
        let mut direc_length: usize; /* Length of % directive.  */
        let mut have_field_width: bool; /* True if FIELD_WIDTH is valid.  */
        let mut field_width: c_int = 0; /* Arg to first '*'.  */
        let mut have_precision: bool; /* True if PRECISION is valid.  */
        let mut precision = 0; /* Arg to second '*'.  */
        let mut ok = [false; 256]; /* ok['x'] is true if %x is allowed.  */
        // N.B. this was originally written as a loop like so:
        //    for (f = format; *f != L'\0'; ++f) {
        // so we emulate that.
        f = format;
        let mut first = true;
        loop {
            if !first {
                f = &f[1..];
            }
            first = false;
            if f.is_empty() {
                break;
            }
            match f.char_at(0) {
                '%' => {
                    direc_start = f;
                    f = &f[1..];
                    direc_length = 1;
                    have_field_width = false;
                    have_precision = false;
                    if f.char_at(0) == '%' {
                        self.append_output('%');
                        continue;
                    }
                    if f.char_at(0) == 'b' {
                        // FIXME: Field width and precision are not supported for %b, even though POSIX
                        // requires it.
                        if argc > 0 {
                            self.print_esc_string(argv[0]);
                            argv = &argv[1..];
                            argc -= 1;
                        }
                        continue;
                    }
                    modify_allowed_format_specifiers(&mut ok, "aAcdeEfFgGiosuxX", true);
                    let mut continue_looking_for_flags = true;
                    while continue_looking_for_flags {
                        match f.char_at(0) {
                            'I' | '\'' => {
                                modify_allowed_format_specifiers(&mut ok, "aAceEosxX", false);
                            }
                            '-' | '+' | ' ' => {
                                // pass
                            }
                            '#' => {
                                modify_allowed_format_specifiers(&mut ok, "cdisu", false);
                            }
                            '0' => {
                                modify_allowed_format_specifiers(&mut ok, "cs", false);
                            }
                            _ => {
                                continue_looking_for_flags = false;
                            }
                        }
                        if continue_looking_for_flags {
                            f = &f[1..];
                            direc_length += 1;
                        }
                    }
                    if f.char_at(0) == '*' {
                        f = &f[1..];
                        direc_length += 1;
                        if argc > 0 {
                            let width: i64 = string_to_scalar_type(argv[0], self);
                            if (c_int::MIN as i64) <= width && width <= (c_int::MAX as i64) {
                                field_width = width as c_int;
                            } else {
                                self.fatal_error(wgettext_fmt!(
                                    "invalid field width: %ls",
                                    argv[0]
                                ));
                            }
                            argv = &argv[1..];
                            argc -= 1;
                        } else {
                            field_width = 0;
                        }
                        have_field_width = true;
                    } else {
                        while iswdigit(f.char_at(0)) {
                            f = &f[1..];
                            direc_length += 1;
                        }
                    }
                    if f.char_at(0) == '.' {
                        f = &f[1..];
                        direc_length += 1;
                        modify_allowed_format_specifiers(&mut ok, "c", false);
                        if f.char_at(0) == '*' {
                            f = &f[1..];
                            direc_length += 1;
                            if argc > 0 {
                                let prec: i64 = string_to_scalar_type(argv[0], self);
                                if prec < 0 {
                                    // A negative precision is taken as if the precision were omitted,
                                    // so -1 is safe here even if prec < INT_MIN.
                                    precision = -1;
                                } else if (c_int::MAX as i64) < prec {
                                    self.fatal_error(wgettext_fmt!(
                                        "invalid precision: %ls",
                                        argv[0]
                                    ));
                                } else {
                                    precision = prec as c_int;
                                }
                                argv = &argv[1..];
                                argc -= 1;
                            } else {
                                precision = 0;
                            }
                            have_precision = true;
                        } else {
                            while iswdigit(f.char_at(0)) {
                                f = &f[1..];
                                direc_length += 1;
                            }
                        }
                    }
                    while matches!(f.char_at(0), 'l' | 'L' | 'h' | 'j' | 't' | 'z') {
                        f = &f[1..];
                    }
                    let conversion = f.char_at(0);
                    if (conversion as usize) > 0xFF || !ok[conversion as usize] {
                        self.fatal_error(wgettext_fmt!(
                            "%.*ls: invalid conversion specification",
                            wstr_offset_in(f, direc_start) + 1,
                            direc_start
                        ));
                        return 0;
                    }
                    let mut argument = L!("");
                    if argc > 0 {
                        argument = argv[0];
                        argv = &argv[1..];
                        argc -= 1;
                    }
                    self.print_direc(
                        &direc_start[..direc_length],
                        f.char_at(0),
                        have_field_width,
                        field_width,
                        have_precision,
                        precision,
                        argument,
                    );
                }
                '\\' => {
                    let consumed_minus_1 = self.print_esc(f, false);
                    f = &f[consumed_minus_1..]; // Loop increment will add 1.
                }
                c => {
                    self.append_output(c);
                }
            }
        }
        save_argc - argc
    }
    fn nonfatal_error<Str: AsRef<wstr>>(&mut self, errstr: Str) {
        let errstr = errstr.as_ref();
        // Don't error twice.
        if self.early_exit {
            return;
        }
        // If we have output, write it so it appears first.
        if !self.buff.is_empty() {
            self.streams.out.append(&self.buff);
            self.buff.clear();
        }
        self.streams.err.append(errstr);
        if !errstr.ends_with('\n') {
            self.streams.err.append1('\n');
        }
        // We set the exit code to error, because one occurred,
        // but we don't do an early exit so we still print what we can.
        self.exit_code = STATUS_CMD_ERROR.unwrap();
    }
    fn fatal_error<Str: AsRef<wstr>>(&mut self, errstr: Str) {
        let errstr = errstr.as_ref();
        // Don't error twice.
        if self.early_exit {
            return;
        }
        // If we have output, write it so it appears first.
        if !self.buff.is_empty() {
            self.streams.out.append(&self.buff);
            self.buff.clear();
        }
        self.streams.err.append(errstr);
        if !errstr.ends_with('\n') {
            self.streams.err.append1('\n');
        }
        self.exit_code = STATUS_CMD_ERROR.unwrap();
        self.early_exit = true;
    }
    /// Print a \ escape sequence starting at ESCSTART.
    /// Return the number of characters in the string, *besides the backslash*.
    /// That is this is ONE LESS than the number of characters consumed.
    /// If octal_0 is nonzero, octal escapes are of the form \0ooo, where o
    /// is an octal digit; otherwise they are of the form \ooo.
    fn print_esc(&mut self, escstart: &wstr, octal_0: bool) -> usize {
        assert!(escstart.char_at(0) == '\\');
        let mut p = &escstart[1..];
        let mut esc_value = 0; /* Value of \nnn escape. */
        let mut esc_length; /* Length of \nnn escape. */
        if p.char_at(0) == 'x' {
            // A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.
            p = &p[1..];
            esc_length = 0;
            while esc_length < 2 && iswxdigit(p.char_at(0)) {
                esc_value = esc_value * 16 + p.char_at(0).to_digit(16).unwrap();
                esc_length += 1;
                p = &p[1..];
            }
            if esc_length == 0 {
                self.fatal_error(wgettext!("missing hexadecimal number in escape"));
            }
            self.append_output(encode_byte_to_char((esc_value % 256) as u8));
        } else if is_octal_digit(p.char_at(0)) {
            // Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p
            // != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b.
            // Wrap mod 256, which matches historic behavior.
            esc_length = 0;
            if octal_0 && p.char_at(0) == '0' {
                p = &p[1..];
            }
            while esc_length < 3 && is_octal_digit(p.char_at(0)) {
                esc_value = esc_value * 8 + p.char_at(0).to_digit(8).unwrap();
                esc_length += 1;
                p = &p[1..];
            }
            self.append_output(encode_byte_to_char((esc_value % 256) as u8));
        } else if "\"\\abcefnrtv".contains(p.char_at(0)) {
            self.print_esc_char(p.char_at(0));
            p = &p[1..];
        } else if p.char_at(0) == 'u' || p.char_at(0) == 'U' {
            let esc_char: char = p.char_at(0);
            p = &p[1..];
            let mut uni_value = 0;
            let exp_esc_length = if esc_char == 'u' { 4 } else { 8 };
            for esc_length in 0..exp_esc_length {
                if !iswxdigit(p.char_at(0)) {
                    // Escape sequence must be done. Complain if we didn't get anything.
                    if esc_length == 0 {
                        self.fatal_error(wgettext!("Missing hexadecimal number in Unicode escape"));
                    }
                    break;
                }
                uni_value = uni_value * 16 + p.char_at(0).to_digit(16).unwrap();
                p = &p[1..];
            }
            // N.B. we assume __STDC_ISO_10646__.
            if uni_value > 0x10FFFF {
                self.fatal_error(wgettext_fmt!(
                    "Unicode character out of range: \\%c%0*x",
                    esc_char,
                    exp_esc_length,
                    uni_value
                ));
            } else {
                // TODO-RUST: if uni_value is a surrogate, we need to encode it using our PUA scheme.
                if let Some(c) = char::from_u32(uni_value) {
                    self.append_output(c);
                } else {
                    self.fatal_error(wgettext!("Invalid code points not yet supported by printf"));
                }
            }
        } else {
            self.append_output('\\');
            if !p.is_empty() {
                self.append_output(p.char_at(0));
                p = &p[1..];
            }
        }
        return wstr_offset_in(p, escstart) - 1;
    }
    /// Print string str, evaluating \ escapes.
    fn print_esc_string(&mut self, mut str: &wstr) {
        // Emulating the following loop: for (; *str; str++)
        while !str.is_empty() {
            let c = str.char_at(0);
            if c == '\\' {
                let consumed_minus_1 = self.print_esc(str, false);
                str = &str[consumed_minus_1..];
            } else {
                self.append_output(c);
            }
            str = &str[1..];
        }
    }
    /// Output a single-character \ escape.
    fn print_esc_char(&mut self, c: char) {
        match c {
            'a' => {
                // alert
                self.append_output('\x07'); // \a
            }
            'b' => {
                // backspace
                self.append_output('\x08'); // \b
            }
            'c' => {
                // cancel the rest of the output
                self.early_exit = true;
            }
            'e' => {
                // escape
                self.append_output('\x1B');
            }
            'f' => {
                // form feed
                self.append_output('\x0C'); // \f
            }
            'n' => {
                // new line
                self.append_output('\n');
            }
            'r' => {
                // carriage return
                self.append_output('\r');
            }
            't' => {
                // horizontal tab
                self.append_output('\t');
            }
            'v' => {
                // vertical tab
                self.append_output('\x0B'); // \v
            }
            _ => {
                self.append_output(c);
            }
        }
    }
    fn append_output(&mut self, c: char) {
        // Don't output if we're done.
        if self.early_exit {
            return;
        }
        self.buff.push(c);
    }
    fn append_output_str<Str: AsRef<wstr>>(&mut self, s: Str) {
        // Don't output if we're done.
        if self.early_exit {
            return;
        }
        self.buff.push_utfstr(&s);
    }
 }
 /// The printf builtin.
 pub fn printf(
    _parser: &mut parser_t,
    streams: &mut io_streams_t,
    argv: &mut [&wstr],
 ) -> Option<c_int> {
    let mut argc = argv.len();
    // Rebind argv as immutable slice (can't rearrange its elements), skipping the command name.
    let mut argv: &[&wstr] = &argv[1..];
    argc -= 1;
    if argc < 1 {
        return STATUS_INVALID_ARGS;
    }
    let mut state = builtin_printf_state_t {
        streams,
        exit_code: STATUS_CMD_OK.unwrap(),
        early_exit: false,
        buff: WString::new(),
        locale: get_numeric_locale(),
    };
    let format = argv[0];
    argc -= 1;
    argv = &argv[1..];
    loop {
        let args_used = state.print_formatted(format, argv);
        argc -= args_used;
        argv = &argv[args_used..];
        if !state.buff.is_empty() {
            state.streams.out.append(&state.buff);
            state.buff.clear();
        }
        if !(args_used > 0 && argc > 0 && !state.early_exit) {
            break;
        }
    }
    return Some(state.exit_code);
 }
--- a/fish-rust/src/builtins/shared.rs
+++ b/fish-rust/src/builtins/shared.rs
@ -1,4 +1,4 @@
-use crate::builtins::wait;
+use crate::builtins::{printf, wait};
 use crate::ffi::{self, parser_t, wcharz_t, Repin, RustBuiltin};
 use crate::wchar::{self, wstr, L};
 use crate::wchar_ffi::{c_str, empty_wstring};
@ -45,7 +45,9 @@ pub const STATUS_CMD_OK: Option<c_int> = Some(0);
 /// The status code used for failure exit in a command (but not if the args were invalid).
 pub const STATUS_CMD_ERROR: Option<c_int> = Some(1);
-/// A handy return value for invalid args.
+/// The status code used for invalid arguments given to a command. This is distinct from valid
 /// arguments that might result in a command failure. An invalid args condition is something
 /// like an unrecognized flag, missing or too many arguments, an invalid integer, etc.
 pub const STATUS_INVALID_ARGS: Option<c_int> = Some(2);
 /// A wrapper around output_stream_t.
@ -61,6 +63,11 @@ impl output_stream_t {
    pub fn append<Str: AsRef<wstr>>(&mut self, s: Str) -> bool {
        self.ffi().append1(c_str!(s))
    }
    /// Append a char.
    pub fn append1(&mut self, c: char) -> bool {
        self.append(wstr::from_char_slice(&[c]))
    }
 }
 // Convenience wrappers around C++ io_streams_t.
@ -132,6 +139,7 @@ pub fn run_builtin(
        RustBuiltin::Realpath => super::realpath::realpath(parser, streams, args),
        RustBuiltin::Return => super::r#return::r#return(parser, streams, args),
        RustBuiltin::Wait => wait::wait(parser, streams, args),
        RustBuiltin::Printf => printf::printf(parser, streams, args),
    }
 }
--- a/fish-rust/src/wchar_ext.rs
+++ b/fish-rust/src/wchar_ext.rs
@ -153,6 +153,13 @@ pub trait WExt {
    /// Access the chars of a WString or wstr.
    fn as_char_slice(&self) -> &[char];
    /// Return a char slice from a *char index*.
    /// This is different from Rust string slicing, which takes a byte index.
    fn slice_from(&self, start: usize) -> &wstr {
        let chars = self.as_char_slice();
        wstr::from_char_slice(&chars[start..])
    }
    /// \return the char at an index.
    /// If the index is equal to the length, return '\0'.
    /// If the index exceeds the length, then panic.
--- a/fish-rust/src/wutil/mod.rs
+++ b/fish-rust/src/wutil/mod.rs
@ -8,6 +8,7 @@ pub mod wcstoi;
 mod wrealpath;
 use crate::common::fish_reserved_codepoint;
 use crate::wchar::wstr;
 pub(crate) use gettext::{wgettext, wgettext_fmt};
 pub use normalize_path::*;
 pub(crate) use printf::sprintf;
@ -48,3 +49,32 @@ fn fish_is_pua(c: char) -> bool {
 pub fn fish_iswalnum(c: char) -> bool {
    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
 }
 /// Given that \p cursor is a pointer into \p base, return the offset in characters.
 /// This emulates C pointer arithmetic:
 ///    `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`.
 pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize {
    let cursor = cursor.as_slice();
    let base = base.as_slice();
    // cursor may be a zero-length slice at the end of base,
    // which base.as_ptr_range().contains(cursor.as_ptr()) will reject.
    let base_range = base.as_ptr_range();
    let curs_range = cursor.as_ptr_range();
    assert!(
        base_range.start <= curs_range.start && curs_range.end <= base_range.end,
        "cursor should be a subslice of base"
    );
    let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) };
    assert!(offset >= 0, "offset should be non-negative");
    offset as usize
 }
 #[test]
 fn test_wstr_offset_in() {
    use crate::wchar::L;
    let base = L!("hello world");
    assert_eq!(wstr_offset_in(&base[6..], base), 6);
    assert_eq!(wstr_offset_in(&base[0..], base), 0);
    assert_eq!(wstr_offset_in(&base[6..], &base[6..]), 0);
    assert_eq!(wstr_offset_in(&base[base.len()..], base), base.len());
 }
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@ -44,7 +44,6 @@
 #include "builtins/jobs.h"
 #include "builtins/math.h"
 #include "builtins/path.h"
 #include "builtins/printf.h"
 #include "builtins/read.h"
 #include "builtins/set.h"
 #include "builtins/set_color.h"
@ -393,7 +392,7 @@ static constexpr builtin_data_t builtin_datas[] = {
    {L"not", &builtin_generic, N_(L"Negate exit status of job")},
    {L"or", &builtin_generic, N_(L"Execute command if previous command failed")},
    {L"path", &builtin_path, N_(L"Handle paths")},
-    {L"printf", &builtin_printf, N_(L"Prints formatted text")},
+    {L"printf", &implemented_in_rust, N_(L"Prints formatted text")},
    {L"pwd", &implemented_in_rust, N_(L"Print the working directory")},
    {L"random", &implemented_in_rust, N_(L"Generate random number")},
    {L"read", &builtin_read, N_(L"Read a line of input into variables")},
@ -558,6 +557,9 @@ static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
    if (cmd == L"wait") {
        return RustBuiltin::Wait;
    }
    if (cmd == L"printf") {
        return RustBuiltin::Printf;
    }
    if (cmd == L"return") {
        return RustBuiltin::Return;
    }
--- a/src/builtin.h
+++ b/src/builtin.h
@ -116,6 +116,7 @@ enum RustBuiltin : int32_t {
    Echo,
    Emit,
    Exit,
    Printf,
    Pwd,
    Random,
    Realpath,
--- a/src/builtins/printf.cpp
+++ b/src/builtins/printf.cpp
@ -1,713 +0,0 @@
 // printf - format and print data
 // Copyright (C) 1990-2007 Free Software Foundation, Inc.
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2, or (at your option)
 // any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software Foundation,
 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 // Usage: printf format [argument...]
 //
 // A front end to the printf function that lets it be used from the shell.
 //
 // Backslash escapes:
 //
 // \" = double quote
 // \\ = backslash
 // \a = alert (bell)
 // \b = backspace
 // \c = produce no further output
 // \e = escape
 // \f = form feed
 // \n = new line
 // \r = carriage return
 // \t = horizontal tab
 // \v = vertical tab
 // \ooo = octal number (ooo is 1 to 3 digits)
 // \xhh = hexadecimal number (hhh is 1 to 2 digits)
 // \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
 // \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
 //
 // Additional directive:
 //
 // %b = print an argument string, interpreting backslash escapes,
 //   except that octal escapes are of the form \0 or \0ooo.
 //
 // The `format' argument is re-used as many times as necessary
 // to convert all of the given arguments.
 //
 // David MacKenzie <djm@gnu.ai.mit.edu>
 // This file has been imported from source code of printf command in GNU Coreutils version 6.9.
 #include "config.h"  // IWYU pragma: keep
 #include "printf.h"
 #include <cerrno>
 #include <cinttypes>
 #include <climits>
 #include <cstdarg>
 #include <cstdint>
 #include <cstring>
 #include <cwchar>
 #include <cwctype>
 #include <locale>
 #ifdef HAVE_XLOCALE_H
 #include <xlocale.h>
 #endif
 #include "../builtin.h"
 #include "../common.h"
 #include "../io.h"
 #include "../maybe.h"
 #include "../wcstringutil.h"
 #include "../wutil.h"  // IWYU pragma: keep
 class parser_t;
 namespace {
 struct builtin_printf_state_t {
    // Out and err streams. Note this is a captured reference!
    io_streams_t &streams;
    // The status of the operation.
    int exit_code;
    // Whether we should stop outputting. This gets set in the case of an error, and also with the
    // \c escape.
    bool early_exit;
    // Our output buffer, so we don't write() constantly.
    // Our strategy is simple:
    // We print once per argument, and we flush the buffer before the error.
    wcstring buff;
    explicit builtin_printf_state_t(io_streams_t &s)
        : streams(s), exit_code(0), early_exit(false) {}
    void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode);
    void print_direc(const wchar_t *start, size_t length, wchar_t conversion, bool have_field_width,
                     int field_width, bool have_precision, int precision, wchar_t const *argument);
    int print_formatted(const wchar_t *format, int argc, const wchar_t **argv);
    void nonfatal_error(const wchar_t *fmt, ...);
    void fatal_error(const wchar_t *fmt, ...);
    long print_esc(const wchar_t *escstart, bool octal_0);
    void print_esc_string(const wchar_t *str);
    void print_esc_char(wchar_t c);
    void append_output(wchar_t c);
    void append_format_output(const wchar_t *fmt, ...);
 };
 }  // namespace
 static bool is_octal_digit(wchar_t c) { return iswdigit(c) && c < L'8'; }
 void builtin_printf_state_t::nonfatal_error(const wchar_t *fmt, ...) {
    // Don't error twice.
    if (early_exit) return;
    // If we have output, write it so it appears first.
    if (!buff.empty()) {
        streams.out.append(buff);
        buff.clear();
    }
    va_list va;
    va_start(va, fmt);
    wcstring errstr = vformat_string(fmt, va);
    va_end(va);
    streams.err.append(errstr);
    if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
    // We set the exit code to error, because one occurred,
    // but we don't do an early exit so we still print what we can.
    this->exit_code = STATUS_CMD_ERROR;
 }
 void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) {
    // Don't error twice.
    if (early_exit) return;
    // If we have output, write it so it appears first.
    if (!buff.empty()) {
        streams.out.append(buff);
        buff.clear();
    }
    va_list va;
    va_start(va, fmt);
    wcstring errstr = vformat_string(fmt, va);
    va_end(va);
    streams.err.append(errstr);
    if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
    this->exit_code = STATUS_CMD_ERROR;
    this->early_exit = true;
 }
 void builtin_printf_state_t::append_output(wchar_t c) {
    // Don't output if we're done.
    if (early_exit) return;
    buff.push_back(c);
 }
 void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...) {
    // Don't output if we're done.
    if (early_exit) return;
    va_list va;
    va_start(va, fmt);
    wcstring tmp = vformat_string(fmt, va);
    va_end(va);
    buff.append(tmp);
 }
 void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode) {
    if (errcode != 0 && errcode != EINVAL) {
        if (errcode == ERANGE) {
            this->fatal_error(L"%ls: %ls", s, _(L"Number out of range"));
        } else {
            this->fatal_error(L"%ls: %s", s, std::strerror(errcode));
        }
    } else if (*end) {
        if (s == end) {
            this->fatal_error(_(L"%ls: expected a numeric value"), s);
        } else {
            // This isn't entirely fatal - the value should still be printed.
            this->nonfatal_error(_(L"%ls: value not completely converted (can't convert '%ls')"), s,
                                 end);
            // Warn about octal numbers as they can be confusing.
            // Do it if the unconverted digit is a valid hex digit,
            // because it could also be an "0x" -> "0" typo.
            if (*s == L'0' && iswxdigit(*end)) {
                this->nonfatal_error(
                    _(L"Hint: a leading '0' without an 'x' indicates an octal number"), s, end);
            }
        }
    }
 }
 template <typename T>
 static T raw_string_to_scalar_type(const wchar_t *s, wchar_t **end);
 template <>
 intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
    return std::wcstoimax(s, end, 0);
 }
 template <>
 uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
    return std::wcstoumax(s, end, 0);
 }
 template <>
 long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
    double val = std::wcstod(s, end);
    if (**end == L'\0') return val;
    // The conversion using the user's locale failed. That may be due to the string not being a
    // valid floating point value. It could also be due to the locale using different separator
    // characters than the normal english convention. So try again by forcing the use of a locale
    // that employs the english convention for writing floating point numbers.
    return wcstod_l(s, end, fish_c_locale());
 }
 template <typename T>
 static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state) {
    T val;
    if (*s == L'\"' || *s == L'\'') {
        wchar_t ch = *++s;
        val = ch;
    } else {
        wchar_t *end = nullptr;
        errno = 0;
        val = raw_string_to_scalar_type<T>(s, &end);
        state->verify_numeric(s, end, errno);
    }
    return val;
 }
 /// Output a single-character \ escape.
 void builtin_printf_state_t::print_esc_char(wchar_t c) {
    switch (c) {
        case L'a': {  // alert
            this->append_output(L'\a');
            break;
        }
        case L'b': {  // backspace
            this->append_output(L'\b');
            break;
        }
        case L'c': {  // cancel the rest of the output
            this->early_exit = true;
            break;
        }
        case L'e': {  // escape
            this->append_output(L'\x1B');
            break;
        }
        case L'f': {  // form feed
            this->append_output(L'\f');
            break;
        }
        case L'n': {  // new line
            this->append_output(L'\n');
            break;
        }
        case L'r': {  // carriage return
            this->append_output(L'\r');
            break;
        }
        case L't': {  // horizontal tab
            this->append_output(L'\t');
            break;
        }
        case L'v': {  // vertical tab
            this->append_output(L'\v');
            break;
        }
        default: {
            this->append_output(c);
            break;
        }
    }
 }
 /// Print a \ escape sequence starting at ESCSTART.
 /// Return the number of characters in the escape sequence besides the backslash..
 /// If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
 /// is an octal digit; otherwise they are of the form \ooo.
 long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) {
    const wchar_t *p = escstart + 1;
    int esc_value = 0; /* Value of \nnn escape. */
    int esc_length;    /* Length of \nnn escape. */
    if (*p == L'x') {
        // A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.
        for (esc_length = 0, ++p; esc_length < 2 && iswxdigit(*p); ++esc_length, ++p)
            esc_value = esc_value * 16 + convert_digit(*p, 16);
        if (esc_length == 0) this->fatal_error(_(L"missing hexadecimal number in escape"));
        this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
    } else if (is_octal_digit(*p)) {
        // Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p
        // != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b.
        // Wrap mod 256, which matches historic behavior.
        for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p);
             ++esc_length, ++p)
            esc_value = esc_value * 8 + convert_digit(*p, 8);
        this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
    } else if (*p && std::wcschr(L"\"\\abcefnrtv", *p)) {
        print_esc_char(*p++);
    } else if (*p == L'u' || *p == L'U') {
        wchar_t esc_char = *p;
        p++;
        uint32_t uni_value = 0;
        for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) {
            if (!iswxdigit(*p)) {
                // Escape sequence must be done. Complain if we didn't get anything.
                if (esc_length == 0) {
                    this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
                }
                break;
            }
            uni_value = uni_value * 16 + convert_digit(*p, 16);
            p++;
        }
        // PCA GNU printf respects the limitations described in ISO N717, about which universal
        // characters "shall not" be specified. I believe this limitation is for the benefit of
        // compilers; I see no reason to impose it in builtin_printf.
        //
        // If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code
        // points, so just use that. If not defined, use the %lc printf conversion; this probably
        // won't do anything good if your wide character set is not Unicode, but such platforms are
        // exceedingly rare.
        if (uni_value > 0x10FFFF) {
            this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char,
                              (esc_char == L'u' ? 4 : 8), uni_value);
        } else {
 #if defined(__STDC_ISO_10646__)
            this->append_output(uni_value);
 #else
            this->append_format_output(L"%lc", uni_value);
 #endif
        }
    } else {
        this->append_output(L'\\');
        if (*p) {
            this->append_output(*p);
            p++;
        }
    }
    return p - escstart - 1;
 }
 /// Print string STR, evaluating \ escapes.
 void builtin_printf_state_t::print_esc_string(const wchar_t *str) {
    for (; *str; str++)
        if (*str == L'\\')
            str += print_esc(str, true);
        else
            this->append_output(*str);
 }
 /// Evaluate a printf conversion specification.  START is the start of the directive, LENGTH is its
 /// length, and CONVERSION specifies the type of conversion.  LENGTH does not include any length
 /// modifier or the conversion specifier itself.  FIELD_WIDTH and PRECISION are the field width and
 /// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
 /// ARGUMENT is the argument to be formatted.
 void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion,
                                         bool have_field_width, int field_width,
                                         bool have_precision, int precision,
                                         wchar_t const *argument) {
    // Start with everything except the conversion specifier.
    wcstring fmt(start, length);
    // Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any
    // existing integer length modifier.
    switch (conversion) {
        case L'x':
        case L'X':
        case L'd':
        case L'i':
        case L'o':
        case L'u': {
            fmt.append(L"ll");
            break;
        }
        case L'a':
        case L'e':
        case L'f':
        case L'g':
        case L'A':
        case L'E':
        case L'F':
        case L'G': {
            fmt.append(L"L");
            break;
        }
        case L's':
        case L'c': {
            fmt.append(L"l");
            break;
        }
        default: {
            break;
        }
    }
    // Append the conversion itself.
    fmt.push_back(conversion);
    switch (conversion) {
        case L'd':
        case L'i': {
            auto arg = string_to_scalar_type<intmax_t>(argument, this);
            if (!have_field_width) {
                if (!have_precision)
                    this->append_format_output(fmt.c_str(), arg);
                else
                    this->append_format_output(fmt.c_str(), precision, arg);
            } else {
                if (!have_precision)
                    this->append_format_output(fmt.c_str(), field_width, arg);
                else
                    this->append_format_output(fmt.c_str(), field_width, precision, arg);
            }
            break;
        }
        case L'o':
        case L'u':
        case L'x':
        case L'X': {
            auto arg = string_to_scalar_type<uintmax_t>(argument, this);
            if (!have_field_width) {
                if (!have_precision)
                    this->append_format_output(fmt.c_str(), arg);
                else
                    this->append_format_output(fmt.c_str(), precision, arg);
            } else {
                if (!have_precision)
                    this->append_format_output(fmt.c_str(), field_width, arg);
                else
                    this->append_format_output(fmt.c_str(), field_width, precision, arg);
            }
            break;
        }
        case L'a':
        case L'A':
        case L'e':
        case L'E':
        case L'f':
        case L'F':
        case L'g':
        case L'G': {
            auto arg = string_to_scalar_type<long double>(argument, this);
            if (!have_field_width) {
                if (!have_precision) {
                    this->append_format_output(fmt.c_str(), arg);
                } else {
                    this->append_format_output(fmt.c_str(), precision, arg);
                }
            } else {
                if (!have_precision) {
                    this->append_format_output(fmt.c_str(), field_width, arg);
                } else {
                    this->append_format_output(fmt.c_str(), field_width, precision, arg);
                }
            }
            break;
        }
        case L'c': {
            if (!have_field_width) {
                this->append_format_output(fmt.c_str(), *argument);
            } else {
                this->append_format_output(fmt.c_str(), field_width, *argument);
            }
            break;
        }
        case L's': {
            if (!have_field_width) {
                if (!have_precision) {
                    this->append_format_output(fmt.c_str(), argument);
                } else {
                    this->append_format_output(fmt.c_str(), precision, argument);
                }
            } else {
                if (!have_precision) {
                    this->append_format_output(fmt.c_str(), field_width, argument);
                } else {
                    this->append_format_output(fmt.c_str(), field_width, precision, argument);
                }
            }
            break;
        }
        default: {
            DIE("unexpected opt");
        }
    }
 }
 /// For each character in str, set the corresponding boolean in the array to the given flag.
 static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str,
                                                    bool flag) {
    for (const char *c = str; *c != '\0'; c++) {
        auto idx = static_cast<unsigned char>(*c);
        ok[idx] = flag;
    }
 }
 /// Print the text in FORMAT, using ARGV (with ARGC elements) for arguments to any `%' directives.
 /// Return the number of elements of ARGV used.
 int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, const wchar_t **argv) {
    int save_argc = argc;        /* Preserve original value.  */
    const wchar_t *f;            /* Pointer into `format'.  */
    const wchar_t *direc_start;  /* Start of % directive.  */
    size_t direc_length;         /* Length of % directive.  */
    bool have_field_width;       /* True if FIELD_WIDTH is valid.  */
    int field_width = 0;         /* Arg to first '*'.  */
    bool have_precision;         /* True if PRECISION is valid.  */
    int precision = 0;           /* Arg to second '*'.  */
    bool ok[UCHAR_MAX + 1] = {}; /* ok['x'] is true if %x is allowed.  */
    for (f = format; *f != L'\0'; ++f) {
        switch (*f) {
            case L'%': {
                direc_start = f++;
                direc_length = 1;
                have_field_width = have_precision = false;
                if (*f == L'%') {
                    this->append_output(L'%');
                    break;
                }
                if (*f == L'b') {
                    // FIXME: Field width and precision are not supported for %b, even though POSIX
                    // requires it.
                    if (argc > 0) {
                        print_esc_string(*argv);
                        ++argv;
                        --argc;
                    }
                    break;
                }
                modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true);
                for (bool continue_looking_for_flags = true; continue_looking_for_flags;) {
                    switch (*f) {
                        case L'I':
                        case L'\'': {
                            modify_allowed_format_specifiers(ok, "aAceEosxX", false);
                            break;
                        }
                        case '-':
                        case '+':
                        case ' ': {
                            break;
                        }
                        case L'#': {
                            modify_allowed_format_specifiers(ok, "cdisu", false);
                            break;
                        }
                        case '0': {
                            modify_allowed_format_specifiers(ok, "cs", false);
                            break;
                        }
                        default: {
                            continue_looking_for_flags = false;
                            break;
                        }
                    }
                    if (continue_looking_for_flags) {
                        f++;
                        direc_length++;
                    }
                }
                if (*f == L'*') {
                    ++f;
                    ++direc_length;
                    if (argc > 0) {
                        auto width = string_to_scalar_type<intmax_t>(*argv, this);
                        if (INT_MIN <= width && width <= INT_MAX)
                            field_width = static_cast<int>(width);
                        else
                            this->fatal_error(_(L"invalid field width: %ls"), *argv);
                        ++argv;
                        --argc;
                    } else {
                        field_width = 0;
                    }
                    have_field_width = true;
                } else {
                    while (iswdigit(*f)) {
                        ++f;
                        ++direc_length;
                    }
                }
                if (*f == L'.') {
                    ++f;
                    ++direc_length;
                    modify_allowed_format_specifiers(ok, "c", false);
                    if (*f == L'*') {
                        ++f;
                        ++direc_length;
                        if (argc > 0) {
                            auto prec = string_to_scalar_type<intmax_t>(*argv, this);
                            if (prec < 0) {
                                // A negative precision is taken as if the precision were omitted,
                                // so -1 is safe here even if prec < INT_MIN.
                                precision = -1;
                            } else if (INT_MAX < prec)
                                this->fatal_error(_(L"invalid precision: %ls"), *argv);
                            else {
                                precision = static_cast<int>(prec);
                            }
                            ++argv;
                            --argc;
                        } else {
                            precision = 0;
                        }
                        have_precision = true;
                    } else {
                        while (iswdigit(*f)) {
                            ++f;
                            ++direc_length;
                        }
                    }
                }
                while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' ||
                       *f == L'z') {
                    ++f;
                }
                wchar_t conversion = *f;
                if (conversion > 0xFF || !ok[conversion]) {
                    this->fatal_error(_(L"%.*ls: invalid conversion specification"),
                                      static_cast<int>(f + 1 - direc_start), direc_start);
                    return 0;
                }
                const wchar_t *argument = L"";
                if (argc > 0) {
                    argument = *argv++;
                    argc--;
                }
                print_direc(direc_start, direc_length, *f, have_field_width, field_width,
                            have_precision, precision, argument);
                break;
            }
            case L'\\': {
                f += print_esc(f, false);
                break;
            }
            default: {
                this->append_output(*f);
                break;
            }
        }
    }
    return save_argc - argc;
 }
 /// The printf builtin.
 maybe_t<int> builtin_printf(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
    UNUSED(parser);
    int argc = builtin_count_args(argv);
    argv++;
    argc--;
    if (argc < 1) {
        return STATUS_INVALID_ARGS;
    }
 #if defined(HAVE_USELOCALE) || defined(__GLIBC__)
    // We use a locale-dependent LC_NUMERIC here,
    // unlike the rest of fish (which uses LC_NUMERIC=C).
    // Because we do output as well as wcstod (which would have wcstod_l),
    // we need to set the locale here.
    // (glibc has uselocale since 2.3, but our configure checks fail us)
    locale_t prev_locale = uselocale(fish_numeric_locale());
 #else
    // NetBSD does not have uselocale,
    // so the best we can do is setlocale.
    auto prev_locale = setlocale(LC_NUMERIC, nullptr);
    setlocale(LC_NUMERIC, "");
 #endif
    builtin_printf_state_t state(streams);
    int args_used;
    const wchar_t *format = argv[0];
    argc--;
    argv++;
    do {
        args_used = state.print_formatted(format, argc, argv);
        argc -= args_used;
        argv += args_used;
        if (!state.buff.empty()) {
            streams.out.append(state.buff);
            state.buff.clear();
        }
    } while (args_used > 0 && argc > 0 && !state.early_exit);
 #if defined(HAVE_USELOCALE) || defined(__GLIBC__)
    uselocale(prev_locale);
 #else
    setlocale(LC_NUMERIC, prev_locale);
 #endif
    return state.exit_code;
 }
--- a/src/builtins/printf.h
+++ b/src/builtins/printf.h
@ -1,11 +0,0 @@
 // Prototypes for functions for executing builtin_printf functions.
 #ifndef FISH_BUILTIN_PRINTF_H
 #define FISH_BUILTIN_PRINTF_H
 #include "../maybe.h"
 class parser_t;
 struct io_streams_t;
 maybe_t<int> builtin_printf(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
 #endif
--- a/tests/checks/printf.fish
+++ b/tests/checks/printf.fish
@ -124,6 +124,15 @@ printf '%d\n' 0g
 echo $status
 # CHECK: 1
 printf '%f\n' 0x2
 # CHECK: 2.000000
 printf '%f\n' 0x2p3
 # CHECK: 16.000000
 printf '%.1f\n' -0X1.5P8
 # CHECK: -336.0
 # Test that we ignore options
 printf -a
 printf --foo