mirror of
https://github.com/fish-shell/fish-shell
synced 2024-09-20 22:42:04 +00:00
Revert "Revert "Implement builtin_printf in Rust""
This reverts commit 9f7e6a6cd1
.
Add additional fixes from code review.
This commit is contained in:
parent
2d6f752f6e
commit
a487b1ecf2
11 changed files with 873 additions and 729 deletions
|
@ -105,7 +105,7 @@ set(FISH_BUILTIN_SRCS
|
|||
src/builtins/disown.cpp
|
||||
src/builtins/eval.cpp src/builtins/fg.cpp
|
||||
src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp
|
||||
src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp
|
||||
src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/path.cpp
|
||||
src/builtins/read.cpp src/builtins/set.cpp
|
||||
src/builtins/set_color.cpp src/builtins/source.cpp src/builtins/status.cpp
|
||||
src/builtins/string.cpp src/builtins/test.cpp src/builtins/type.cpp src/builtins/ulimit.cpp
|
||||
|
|
|
@ -7,6 +7,7 @@ pub mod contains;
|
|||
pub mod echo;
|
||||
pub mod emit;
|
||||
pub mod exit;
|
||||
pub mod printf;
|
||||
pub mod pwd;
|
||||
pub mod random;
|
||||
pub mod realpath;
|
||||
|
|
810
fish-rust/src/builtins/printf.rs
Normal file
810
fish-rust/src/builtins/printf.rs
Normal file
|
@ -0,0 +1,810 @@
|
|||
// printf - format and print data
|
||||
// Copyright (C) 1990-2007 Free Software Foundation, Inc.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
// Usage: printf format [argument...]
|
||||
//
|
||||
// A front end to the printf function that lets it be used from the shell.
|
||||
//
|
||||
// Backslash escapes:
|
||||
//
|
||||
// \" = double quote
|
||||
// \\ = backslash
|
||||
// \a = alert (bell)
|
||||
// \b = backspace
|
||||
// \c = produce no further output
|
||||
// \e = escape
|
||||
// \f = form feed
|
||||
// \n = new line
|
||||
// \r = carriage return
|
||||
// \t = horizontal tab
|
||||
// \v = vertical tab
|
||||
// \ooo = octal number (ooo is 1 to 3 digits)
|
||||
// \xhh = hexadecimal number (hhh is 1 to 2 digits)
|
||||
// \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
|
||||
// \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
|
||||
//
|
||||
// Additional directive:
|
||||
//
|
||||
// %b = print an argument string, interpreting backslash escapes,
|
||||
// except that octal escapes are of the form \0 or \0ooo.
|
||||
//
|
||||
// The `format' argument is re-used as many times as necessary
|
||||
// to convert all of the given arguments.
|
||||
//
|
||||
// David MacKenzie <djm@gnu.ai.mit.edu>
|
||||
|
||||
// This file has been imported from source code of printf command in GNU Coreutils version 6.9.
|
||||
|
||||
use libc::c_int;
|
||||
use num_traits;
|
||||
use std::result::Result;
|
||||
|
||||
use crate::builtins::shared::{io_streams_t, STATUS_CMD_ERROR, STATUS_CMD_OK, STATUS_INVALID_ARGS};
|
||||
use crate::ffi::parser_t;
|
||||
use crate::locale::{get_numeric_locale, Locale};
|
||||
use crate::wchar::{encode_byte_to_char, wstr, WExt, WString, L};
|
||||
use crate::wutil::errors::Error;
|
||||
use crate::wutil::gettext::{wgettext, wgettext_fmt};
|
||||
use crate::wutil::wcstod::wcstod;
|
||||
use crate::wutil::wcstoi::{fish_wcstoi_partial, Options as WcstoiOpts};
|
||||
use crate::wutil::{sprintf, wstr_offset_in};
|
||||
use printf_compat::args::ToArg;
|
||||
use printf_compat::printf::sprintf_locale;
|
||||
|
||||
/// \return true if \p c is an octal digit.
|
||||
fn is_octal_digit(c: char) -> bool {
|
||||
('0'..='7').contains(&c)
|
||||
}
|
||||
|
||||
/// \return true if \p c is a decimal digit.
|
||||
fn iswdigit(c: char) -> bool {
|
||||
c.is_ascii_digit()
|
||||
}
|
||||
|
||||
/// \return true if \p c is a hexadecimal digit.
|
||||
fn iswxdigit(c: char) -> bool {
|
||||
c.is_ascii_hexdigit()
|
||||
}
|
||||
|
||||
struct builtin_printf_state_t<'a> {
|
||||
// Out and err streams. Note this is a captured reference!
|
||||
streams: &'a mut io_streams_t,
|
||||
|
||||
// The status of the operation.
|
||||
exit_code: c_int,
|
||||
|
||||
// Whether we should stop outputting. This gets set in the case of an error, and also with the
|
||||
// \c escape.
|
||||
early_exit: bool,
|
||||
|
||||
// Our output buffer, so we don't write() constantly.
|
||||
// Our strategy is simple:
|
||||
// We print once per argument, and we flush the buffer before the error.
|
||||
buff: WString,
|
||||
|
||||
// The locale, which affects printf output and also parsing of floats due to decimal separators.
|
||||
locale: Locale,
|
||||
}
|
||||
|
||||
/// Convert to a scalar type. \return the result of conversion, and the end of the converted string.
|
||||
/// On conversion failure, \p end is not modified.
|
||||
trait RawStringToScalarType: Copy + num_traits::Zero + std::convert::From<u32> {
|
||||
/// Convert from a string to our self type.
|
||||
/// \return the result of conversion, and the remainder of the string.
|
||||
fn raw_string_to_scalar_type<'a>(
|
||||
s: &'a wstr,
|
||||
locale: &Locale,
|
||||
end: &mut &'a wstr,
|
||||
) -> Result<Self, Error>;
|
||||
|
||||
/// Convert from a Unicode code point to this type.
|
||||
/// This supports printf's ability to convert from char to scalar via a leading quote.
|
||||
/// Try it:
|
||||
/// > printf "%f" "'a"
|
||||
/// 97.000000
|
||||
/// Wild stuff.
|
||||
fn from_ord(c: char) -> Self {
|
||||
let as_u32: u32 = c.into();
|
||||
as_u32.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl RawStringToScalarType for i64 {
|
||||
fn raw_string_to_scalar_type<'a>(
|
||||
s: &'a wstr,
|
||||
_locale: &Locale,
|
||||
end: &mut &'a wstr,
|
||||
) -> Result<Self, Error> {
|
||||
let mut consumed = 0;
|
||||
let res = fish_wcstoi_partial(s, WcstoiOpts::default(), &mut consumed);
|
||||
*end = s.slice_from(consumed);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl RawStringToScalarType for u64 {
|
||||
fn raw_string_to_scalar_type<'a>(
|
||||
s: &'a wstr,
|
||||
_locale: &Locale,
|
||||
end: &mut &'a wstr,
|
||||
) -> Result<Self, Error> {
|
||||
let mut consumed = 0;
|
||||
let res = fish_wcstoi_partial(
|
||||
s,
|
||||
WcstoiOpts {
|
||||
wrap_negatives: true,
|
||||
..Default::default()
|
||||
},
|
||||
&mut consumed,
|
||||
);
|
||||
*end = s.slice_from(consumed);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl RawStringToScalarType for f64 {
|
||||
fn raw_string_to_scalar_type<'a>(
|
||||
s: &'a wstr,
|
||||
locale: &Locale,
|
||||
end: &mut &'a wstr,
|
||||
) -> Result<Self, Error> {
|
||||
let mut consumed: usize = 0;
|
||||
let mut result = wcstod(s, locale.decimal_point, &mut consumed);
|
||||
if result.is_ok() && consumed == s.chars().count() {
|
||||
*end = s.slice_from(consumed);
|
||||
return result;
|
||||
}
|
||||
// The conversion using the user's locale failed. That may be due to the string not being a
|
||||
// valid floating point value. It could also be due to the locale using different separator
|
||||
// characters than the normal english convention. So try again by forcing the use of a locale
|
||||
// that employs the english convention for writing floating point numbers.
|
||||
consumed = 0;
|
||||
result = wcstod(s, '.', &mut consumed);
|
||||
if result.is_ok() {
|
||||
*end = s.slice_from(consumed);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a string to a scalar type.
|
||||
/// Use state.verify_numeric to report any errors.
|
||||
fn string_to_scalar_type<T: RawStringToScalarType>(
|
||||
s: &wstr,
|
||||
state: &mut builtin_printf_state_t,
|
||||
) -> T {
|
||||
if s.char_at(0) == '"' || s.char_at(0) == '\'' {
|
||||
// Note that if the string is really just a leading quote,
|
||||
// we really do want to convert the "trailing nul".
|
||||
T::from_ord(s.char_at(1))
|
||||
} else {
|
||||
let mut end = s;
|
||||
let mval = T::raw_string_to_scalar_type(s, &state.locale, &mut end);
|
||||
state.verify_numeric(s, end, mval.err());
|
||||
mval.unwrap_or(T::zero())
|
||||
}
|
||||
}
|
||||
|
||||
/// For each character in str, set the corresponding boolean in the array to the given flag.
|
||||
fn modify_allowed_format_specifiers(ok: &mut [bool; 256], str: &str, flag: bool) {
|
||||
for c in str.chars() {
|
||||
ok[c as usize] = flag;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> builtin_printf_state_t<'a> {
|
||||
#[allow(clippy::partialeq_to_none)]
|
||||
fn verify_numeric(&mut self, s: &wstr, end: &wstr, errcode: Option<Error>) {
|
||||
// This check matches the historic `errcode != EINVAL` check from C++.
|
||||
// Note that empty or missing values will be silently treated as 0.
|
||||
if errcode != None && errcode != Some(Error::InvalidChar) && errcode != Some(Error::Empty) {
|
||||
match errcode.unwrap() {
|
||||
Error::Overflow => {
|
||||
self.fatal_error(sprintf!("%ls: %ls", s, wgettext!("Number out of range")));
|
||||
}
|
||||
Error::Empty => {
|
||||
self.fatal_error(sprintf!("%ls: %ls", s, wgettext!("Number was empty")));
|
||||
}
|
||||
Error::InvalidChar | Error::CharsLeft => {
|
||||
panic!("Unreachable");
|
||||
}
|
||||
}
|
||||
} else if !end.is_empty() {
|
||||
if s.as_ptr() == end.as_ptr() {
|
||||
self.fatal_error(wgettext_fmt!("%ls: expected a numeric value", s));
|
||||
} else {
|
||||
// This isn't entirely fatal - the value should still be printed.
|
||||
self.nonfatal_error(wgettext_fmt!(
|
||||
"%ls: value not completely converted (can't convert '%ls')",
|
||||
s,
|
||||
end
|
||||
));
|
||||
// Warn about octal numbers as they can be confusing.
|
||||
// Do it if the unconverted digit is a valid hex digit,
|
||||
// because it could also be an "0x" -> "0" typo.
|
||||
if s.char_at(0) == '0' && iswxdigit(end.char_at(0)) {
|
||||
self.nonfatal_error(wgettext_fmt!(
|
||||
"Hint: a leading '0' without an 'x' indicates an octal number"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a printf conversion specification. SPEC is the start of the directive, and CONVERSION
|
||||
/// specifies the type of conversion. SPEC does not include any length modifier or the
|
||||
/// conversion specifier itself. FIELD_WIDTH and PRECISION are the field width and
|
||||
/// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
|
||||
/// ARGUMENT is the argument to be formatted.
|
||||
#[allow(clippy::collapsible_else_if, clippy::too_many_arguments)]
|
||||
fn print_direc(
|
||||
&mut self,
|
||||
spec: &wstr,
|
||||
conversion: char,
|
||||
have_field_width: bool,
|
||||
field_width: i32,
|
||||
have_precision: bool,
|
||||
precision: i32,
|
||||
argument: &wstr,
|
||||
) {
|
||||
/// Printf macro helper which provides our locale.
|
||||
macro_rules! sprintf_loc {
|
||||
(
|
||||
$fmt:expr, // format string of type &wstr
|
||||
$($arg:expr),* // arguments
|
||||
) => {
|
||||
sprintf_locale(
|
||||
$fmt,
|
||||
&self.locale,
|
||||
&[$($arg.to_arg()),*]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Start with everything except the conversion specifier.
|
||||
let mut fmt = spec.to_owned();
|
||||
|
||||
// Create a copy of the % directive, with a width modifier substituted for any
|
||||
// existing integer length modifier.
|
||||
match conversion {
|
||||
'x' | 'X' | 'd' | 'i' | 'o' | 'u' => {
|
||||
fmt.push_str("ll");
|
||||
}
|
||||
'a' | 'e' | 'f' | 'g' | 'A' | 'E' | 'F' | 'G' => {
|
||||
fmt.push_str("L");
|
||||
}
|
||||
's' | 'c' => {
|
||||
fmt.push_str("l");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Append the conversion itself.
|
||||
fmt.push(conversion);
|
||||
|
||||
// Rebind as a ref.
|
||||
let fmt: &wstr = &fmt;
|
||||
match conversion {
|
||||
'd' | 'i' => {
|
||||
let arg: i64 = string_to_scalar_type(argument, self);
|
||||
if !have_field_width {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, precision, arg));
|
||||
}
|
||||
} else {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
|
||||
}
|
||||
}
|
||||
}
|
||||
'o' | 'u' | 'x' | 'X' => {
|
||||
let arg: u64 = string_to_scalar_type(argument, self);
|
||||
if !have_field_width {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, precision, arg));
|
||||
}
|
||||
} else {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'a' | 'A' | 'e' | 'E' | 'f' | 'F' | 'g' | 'G' => {
|
||||
let arg: f64 = string_to_scalar_type(argument, self);
|
||||
if !have_field_width {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, precision, arg));
|
||||
}
|
||||
} else {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, arg));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, precision, arg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'c' => {
|
||||
if !have_field_width {
|
||||
self.append_output_str(sprintf_loc!(fmt, argument.char_at(0)));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, argument.char_at(0)));
|
||||
}
|
||||
}
|
||||
|
||||
's' => {
|
||||
if !have_field_width {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, argument));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, precision, argument));
|
||||
}
|
||||
} else {
|
||||
if !have_precision {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, argument));
|
||||
} else {
|
||||
self.append_output_str(sprintf_loc!(fmt, field_width, precision, argument));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
panic!("unexpected opt: {}", conversion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print the text in FORMAT, using ARGV for arguments to any `%' directives.
|
||||
/// Return the number of elements of ARGV used.
|
||||
fn print_formatted(&mut self, format: &wstr, mut argv: &[&wstr]) -> usize {
|
||||
let mut argc = argv.len();
|
||||
let save_argc = argc; /* Preserve original value. */
|
||||
let mut f: &wstr; /* Pointer into `format'. */
|
||||
let mut direc_start: &wstr; /* Start of % directive. */
|
||||
let mut direc_length: usize; /* Length of % directive. */
|
||||
let mut have_field_width: bool; /* True if FIELD_WIDTH is valid. */
|
||||
let mut field_width: c_int = 0; /* Arg to first '*'. */
|
||||
let mut have_precision: bool; /* True if PRECISION is valid. */
|
||||
let mut precision = 0; /* Arg to second '*'. */
|
||||
let mut ok = [false; 256]; /* ok['x'] is true if %x is allowed. */
|
||||
|
||||
// N.B. this was originally written as a loop like so:
|
||||
// for (f = format; *f != L'\0'; ++f) {
|
||||
// so we emulate that.
|
||||
f = format;
|
||||
let mut first = true;
|
||||
loop {
|
||||
if !first {
|
||||
f = &f[1..];
|
||||
}
|
||||
first = false;
|
||||
if f.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
match f.char_at(0) {
|
||||
'%' => {
|
||||
direc_start = f;
|
||||
f = &f[1..];
|
||||
direc_length = 1;
|
||||
have_field_width = false;
|
||||
have_precision = false;
|
||||
if f.char_at(0) == '%' {
|
||||
self.append_output('%');
|
||||
continue;
|
||||
}
|
||||
if f.char_at(0) == 'b' {
|
||||
// FIXME: Field width and precision are not supported for %b, even though POSIX
|
||||
// requires it.
|
||||
if argc > 0 {
|
||||
self.print_esc_string(argv[0]);
|
||||
argv = &argv[1..];
|
||||
argc -= 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
modify_allowed_format_specifiers(&mut ok, "aAcdeEfFgGiosuxX", true);
|
||||
let mut continue_looking_for_flags = true;
|
||||
while continue_looking_for_flags {
|
||||
match f.char_at(0) {
|
||||
'I' | '\'' => {
|
||||
modify_allowed_format_specifiers(&mut ok, "aAceEosxX", false);
|
||||
}
|
||||
|
||||
'-' | '+' | ' ' => {
|
||||
// pass
|
||||
}
|
||||
|
||||
'#' => {
|
||||
modify_allowed_format_specifiers(&mut ok, "cdisu", false);
|
||||
}
|
||||
|
||||
'0' => {
|
||||
modify_allowed_format_specifiers(&mut ok, "cs", false);
|
||||
}
|
||||
|
||||
_ => {
|
||||
continue_looking_for_flags = false;
|
||||
}
|
||||
}
|
||||
if continue_looking_for_flags {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if f.char_at(0) == '*' {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
if argc > 0 {
|
||||
let width: i64 = string_to_scalar_type(argv[0], self);
|
||||
if (c_int::MIN as i64) <= width && width <= (c_int::MAX as i64) {
|
||||
field_width = width as c_int;
|
||||
} else {
|
||||
self.fatal_error(wgettext_fmt!(
|
||||
"invalid field width: %ls",
|
||||
argv[0]
|
||||
));
|
||||
}
|
||||
argv = &argv[1..];
|
||||
argc -= 1;
|
||||
} else {
|
||||
field_width = 0;
|
||||
}
|
||||
have_field_width = true;
|
||||
} else {
|
||||
while iswdigit(f.char_at(0)) {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if f.char_at(0) == '.' {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
modify_allowed_format_specifiers(&mut ok, "c", false);
|
||||
if f.char_at(0) == '*' {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
if argc > 0 {
|
||||
let prec: i64 = string_to_scalar_type(argv[0], self);
|
||||
if prec < 0 {
|
||||
// A negative precision is taken as if the precision were omitted,
|
||||
// so -1 is safe here even if prec < INT_MIN.
|
||||
precision = -1;
|
||||
} else if (c_int::MAX as i64) < prec {
|
||||
self.fatal_error(wgettext_fmt!(
|
||||
"invalid precision: %ls",
|
||||
argv[0]
|
||||
));
|
||||
} else {
|
||||
precision = prec as c_int;
|
||||
}
|
||||
argv = &argv[1..];
|
||||
argc -= 1;
|
||||
} else {
|
||||
precision = 0;
|
||||
}
|
||||
have_precision = true;
|
||||
} else {
|
||||
while iswdigit(f.char_at(0)) {
|
||||
f = &f[1..];
|
||||
direc_length += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while matches!(f.char_at(0), 'l' | 'L' | 'h' | 'j' | 't' | 'z') {
|
||||
f = &f[1..];
|
||||
}
|
||||
|
||||
let conversion = f.char_at(0);
|
||||
if (conversion as usize) > 0xFF || !ok[conversion as usize] {
|
||||
self.fatal_error(wgettext_fmt!(
|
||||
"%.*ls: invalid conversion specification",
|
||||
wstr_offset_in(f, direc_start) + 1,
|
||||
direc_start
|
||||
));
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut argument = L!("");
|
||||
if argc > 0 {
|
||||
argument = argv[0];
|
||||
argv = &argv[1..];
|
||||
argc -= 1;
|
||||
}
|
||||
self.print_direc(
|
||||
&direc_start[..direc_length],
|
||||
f.char_at(0),
|
||||
have_field_width,
|
||||
field_width,
|
||||
have_precision,
|
||||
precision,
|
||||
argument,
|
||||
);
|
||||
}
|
||||
'\\' => {
|
||||
let consumed_minus_1 = self.print_esc(f, false);
|
||||
f = &f[consumed_minus_1..]; // Loop increment will add 1.
|
||||
}
|
||||
|
||||
c => {
|
||||
self.append_output(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
save_argc - argc
|
||||
}
|
||||
|
||||
fn nonfatal_error<Str: AsRef<wstr>>(&mut self, errstr: Str) {
|
||||
let errstr = errstr.as_ref();
|
||||
// Don't error twice.
|
||||
if self.early_exit {
|
||||
return;
|
||||
}
|
||||
|
||||
// If we have output, write it so it appears first.
|
||||
if !self.buff.is_empty() {
|
||||
self.streams.out.append(&self.buff);
|
||||
self.buff.clear();
|
||||
}
|
||||
|
||||
self.streams.err.append(errstr);
|
||||
if !errstr.ends_with('\n') {
|
||||
self.streams.err.append1('\n');
|
||||
}
|
||||
|
||||
// We set the exit code to error, because one occurred,
|
||||
// but we don't do an early exit so we still print what we can.
|
||||
self.exit_code = STATUS_CMD_ERROR.unwrap();
|
||||
}
|
||||
|
||||
fn fatal_error<Str: AsRef<wstr>>(&mut self, errstr: Str) {
|
||||
let errstr = errstr.as_ref();
|
||||
|
||||
// Don't error twice.
|
||||
if self.early_exit {
|
||||
return;
|
||||
}
|
||||
|
||||
// If we have output, write it so it appears first.
|
||||
if !self.buff.is_empty() {
|
||||
self.streams.out.append(&self.buff);
|
||||
self.buff.clear();
|
||||
}
|
||||
|
||||
self.streams.err.append(errstr);
|
||||
if !errstr.ends_with('\n') {
|
||||
self.streams.err.append1('\n');
|
||||
}
|
||||
|
||||
self.exit_code = STATUS_CMD_ERROR.unwrap();
|
||||
self.early_exit = true;
|
||||
}
|
||||
|
||||
/// Print a \ escape sequence starting at ESCSTART.
|
||||
/// Return the number of characters in the string, *besides the backslash*.
|
||||
/// That is this is ONE LESS than the number of characters consumed.
|
||||
/// If octal_0 is nonzero, octal escapes are of the form \0ooo, where o
|
||||
/// is an octal digit; otherwise they are of the form \ooo.
|
||||
fn print_esc(&mut self, escstart: &wstr, octal_0: bool) -> usize {
|
||||
assert!(escstart.char_at(0) == '\\');
|
||||
let mut p = &escstart[1..];
|
||||
let mut esc_value = 0; /* Value of \nnn escape. */
|
||||
let mut esc_length; /* Length of \nnn escape. */
|
||||
if p.char_at(0) == 'x' {
|
||||
// A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.
|
||||
p = &p[1..];
|
||||
esc_length = 0;
|
||||
while esc_length < 2 && iswxdigit(p.char_at(0)) {
|
||||
esc_value = esc_value * 16 + p.char_at(0).to_digit(16).unwrap();
|
||||
esc_length += 1;
|
||||
p = &p[1..];
|
||||
}
|
||||
if esc_length == 0 {
|
||||
self.fatal_error(wgettext!("missing hexadecimal number in escape"));
|
||||
}
|
||||
self.append_output(encode_byte_to_char((esc_value % 256) as u8));
|
||||
} else if is_octal_digit(p.char_at(0)) {
|
||||
// Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p
|
||||
// != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b.
|
||||
// Wrap mod 256, which matches historic behavior.
|
||||
esc_length = 0;
|
||||
if octal_0 && p.char_at(0) == '0' {
|
||||
p = &p[1..];
|
||||
}
|
||||
while esc_length < 3 && is_octal_digit(p.char_at(0)) {
|
||||
esc_value = esc_value * 8 + p.char_at(0).to_digit(8).unwrap();
|
||||
esc_length += 1;
|
||||
p = &p[1..];
|
||||
}
|
||||
self.append_output(encode_byte_to_char((esc_value % 256) as u8));
|
||||
} else if "\"\\abcefnrtv".contains(p.char_at(0)) {
|
||||
self.print_esc_char(p.char_at(0));
|
||||
p = &p[1..];
|
||||
} else if p.char_at(0) == 'u' || p.char_at(0) == 'U' {
|
||||
let esc_char: char = p.char_at(0);
|
||||
p = &p[1..];
|
||||
let mut uni_value = 0;
|
||||
let exp_esc_length = if esc_char == 'u' { 4 } else { 8 };
|
||||
for esc_length in 0..exp_esc_length {
|
||||
if !iswxdigit(p.char_at(0)) {
|
||||
// Escape sequence must be done. Complain if we didn't get anything.
|
||||
if esc_length == 0 {
|
||||
self.fatal_error(wgettext!("Missing hexadecimal number in Unicode escape"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
uni_value = uni_value * 16 + p.char_at(0).to_digit(16).unwrap();
|
||||
p = &p[1..];
|
||||
}
|
||||
// N.B. we assume __STDC_ISO_10646__.
|
||||
if uni_value > 0x10FFFF {
|
||||
self.fatal_error(wgettext_fmt!(
|
||||
"Unicode character out of range: \\%c%0*x",
|
||||
esc_char,
|
||||
exp_esc_length,
|
||||
uni_value
|
||||
));
|
||||
} else {
|
||||
// TODO-RUST: if uni_value is a surrogate, we need to encode it using our PUA scheme.
|
||||
if let Some(c) = char::from_u32(uni_value) {
|
||||
self.append_output(c);
|
||||
} else {
|
||||
self.fatal_error(wgettext!("Invalid code points not yet supported by printf"));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.append_output('\\');
|
||||
if !p.is_empty() {
|
||||
self.append_output(p.char_at(0));
|
||||
p = &p[1..];
|
||||
}
|
||||
}
|
||||
return wstr_offset_in(p, escstart) - 1;
|
||||
}
|
||||
|
||||
/// Print string str, evaluating \ escapes.
|
||||
fn print_esc_string(&mut self, mut str: &wstr) {
|
||||
// Emulating the following loop: for (; *str; str++)
|
||||
while !str.is_empty() {
|
||||
let c = str.char_at(0);
|
||||
if c == '\\' {
|
||||
let consumed_minus_1 = self.print_esc(str, false);
|
||||
str = &str[consumed_minus_1..];
|
||||
} else {
|
||||
self.append_output(c);
|
||||
}
|
||||
str = &str[1..];
|
||||
}
|
||||
}
|
||||
|
||||
/// Output a single-character \ escape.
|
||||
fn print_esc_char(&mut self, c: char) {
|
||||
match c {
|
||||
'a' => {
|
||||
// alert
|
||||
self.append_output('\x07'); // \a
|
||||
}
|
||||
'b' => {
|
||||
// backspace
|
||||
self.append_output('\x08'); // \b
|
||||
}
|
||||
'c' => {
|
||||
// cancel the rest of the output
|
||||
self.early_exit = true;
|
||||
}
|
||||
'e' => {
|
||||
// escape
|
||||
self.append_output('\x1B');
|
||||
}
|
||||
'f' => {
|
||||
// form feed
|
||||
self.append_output('\x0C'); // \f
|
||||
}
|
||||
'n' => {
|
||||
// new line
|
||||
self.append_output('\n');
|
||||
}
|
||||
'r' => {
|
||||
// carriage return
|
||||
self.append_output('\r');
|
||||
}
|
||||
't' => {
|
||||
// horizontal tab
|
||||
self.append_output('\t');
|
||||
}
|
||||
'v' => {
|
||||
// vertical tab
|
||||
self.append_output('\x0B'); // \v
|
||||
}
|
||||
_ => {
|
||||
self.append_output(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn append_output(&mut self, c: char) {
|
||||
// Don't output if we're done.
|
||||
if self.early_exit {
|
||||
return;
|
||||
}
|
||||
|
||||
self.buff.push(c);
|
||||
}
|
||||
|
||||
fn append_output_str<Str: AsRef<wstr>>(&mut self, s: Str) {
|
||||
// Don't output if we're done.
|
||||
if self.early_exit {
|
||||
return;
|
||||
}
|
||||
|
||||
self.buff.push_utfstr(&s);
|
||||
}
|
||||
}
|
||||
|
||||
/// The printf builtin.
|
||||
pub fn printf(
|
||||
_parser: &mut parser_t,
|
||||
streams: &mut io_streams_t,
|
||||
argv: &mut [&wstr],
|
||||
) -> Option<c_int> {
|
||||
let mut argc = argv.len();
|
||||
|
||||
// Rebind argv as immutable slice (can't rearrange its elements), skipping the command name.
|
||||
let mut argv: &[&wstr] = &argv[1..];
|
||||
argc -= 1;
|
||||
if argc < 1 {
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
let mut state = builtin_printf_state_t {
|
||||
streams,
|
||||
exit_code: STATUS_CMD_OK.unwrap(),
|
||||
early_exit: false,
|
||||
buff: WString::new(),
|
||||
locale: get_numeric_locale(),
|
||||
};
|
||||
let format = argv[0];
|
||||
argc -= 1;
|
||||
argv = &argv[1..];
|
||||
loop {
|
||||
let args_used = state.print_formatted(format, argv);
|
||||
argc -= args_used;
|
||||
argv = &argv[args_used..];
|
||||
if !state.buff.is_empty() {
|
||||
state.streams.out.append(&state.buff);
|
||||
state.buff.clear();
|
||||
}
|
||||
if !(args_used > 0 && argc > 0 && !state.early_exit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Some(state.exit_code);
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
use crate::builtins::wait;
|
||||
use crate::builtins::{printf, wait};
|
||||
use crate::ffi::{self, parser_t, wcharz_t, Repin, RustBuiltin};
|
||||
use crate::wchar::{self, wstr, L};
|
||||
use crate::wchar_ffi::{c_str, empty_wstring};
|
||||
|
@ -45,7 +45,9 @@ pub const STATUS_CMD_OK: Option<c_int> = Some(0);
|
|||
/// The status code used for failure exit in a command (but not if the args were invalid).
|
||||
pub const STATUS_CMD_ERROR: Option<c_int> = Some(1);
|
||||
|
||||
/// A handy return value for invalid args.
|
||||
/// The status code used for invalid arguments given to a command. This is distinct from valid
|
||||
/// arguments that might result in a command failure. An invalid args condition is something
|
||||
/// like an unrecognized flag, missing or too many arguments, an invalid integer, etc.
|
||||
pub const STATUS_INVALID_ARGS: Option<c_int> = Some(2);
|
||||
|
||||
/// A wrapper around output_stream_t.
|
||||
|
@ -61,6 +63,11 @@ impl output_stream_t {
|
|||
pub fn append<Str: AsRef<wstr>>(&mut self, s: Str) -> bool {
|
||||
self.ffi().append1(c_str!(s))
|
||||
}
|
||||
|
||||
/// Append a char.
|
||||
pub fn append1(&mut self, c: char) -> bool {
|
||||
self.append(wstr::from_char_slice(&[c]))
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience wrappers around C++ io_streams_t.
|
||||
|
@ -132,6 +139,7 @@ pub fn run_builtin(
|
|||
RustBuiltin::Realpath => super::realpath::realpath(parser, streams, args),
|
||||
RustBuiltin::Return => super::r#return::r#return(parser, streams, args),
|
||||
RustBuiltin::Wait => wait::wait(parser, streams, args),
|
||||
RustBuiltin::Printf => printf::printf(parser, streams, args),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -153,6 +153,13 @@ pub trait WExt {
|
|||
/// Access the chars of a WString or wstr.
|
||||
fn as_char_slice(&self) -> &[char];
|
||||
|
||||
/// Return a char slice from a *char index*.
|
||||
/// This is different from Rust string slicing, which takes a byte index.
|
||||
fn slice_from(&self, start: usize) -> &wstr {
|
||||
let chars = self.as_char_slice();
|
||||
wstr::from_char_slice(&chars[start..])
|
||||
}
|
||||
|
||||
/// \return the char at an index.
|
||||
/// If the index is equal to the length, return '\0'.
|
||||
/// If the index exceeds the length, then panic.
|
||||
|
|
|
@ -8,6 +8,7 @@ pub mod wcstoi;
|
|||
mod wrealpath;
|
||||
|
||||
use crate::common::fish_reserved_codepoint;
|
||||
use crate::wchar::wstr;
|
||||
pub(crate) use gettext::{wgettext, wgettext_fmt};
|
||||
pub use normalize_path::*;
|
||||
pub(crate) use printf::sprintf;
|
||||
|
@ -48,3 +49,32 @@ fn fish_is_pua(c: char) -> bool {
|
|||
pub fn fish_iswalnum(c: char) -> bool {
|
||||
!fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
|
||||
}
|
||||
|
||||
/// Given that \p cursor is a pointer into \p base, return the offset in characters.
|
||||
/// This emulates C pointer arithmetic:
|
||||
/// `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`.
|
||||
pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize {
|
||||
let cursor = cursor.as_slice();
|
||||
let base = base.as_slice();
|
||||
// cursor may be a zero-length slice at the end of base,
|
||||
// which base.as_ptr_range().contains(cursor.as_ptr()) will reject.
|
||||
let base_range = base.as_ptr_range();
|
||||
let curs_range = cursor.as_ptr_range();
|
||||
assert!(
|
||||
base_range.start <= curs_range.start && curs_range.end <= base_range.end,
|
||||
"cursor should be a subslice of base"
|
||||
);
|
||||
let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) };
|
||||
assert!(offset >= 0, "offset should be non-negative");
|
||||
offset as usize
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wstr_offset_in() {
|
||||
use crate::wchar::L;
|
||||
let base = L!("hello world");
|
||||
assert_eq!(wstr_offset_in(&base[6..], base), 6);
|
||||
assert_eq!(wstr_offset_in(&base[0..], base), 0);
|
||||
assert_eq!(wstr_offset_in(&base[6..], &base[6..]), 0);
|
||||
assert_eq!(wstr_offset_in(&base[base.len()..], base), base.len());
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
#include "builtins/jobs.h"
|
||||
#include "builtins/math.h"
|
||||
#include "builtins/path.h"
|
||||
#include "builtins/printf.h"
|
||||
#include "builtins/read.h"
|
||||
#include "builtins/set.h"
|
||||
#include "builtins/set_color.h"
|
||||
|
@ -393,7 +392,7 @@ static constexpr builtin_data_t builtin_datas[] = {
|
|||
{L"not", &builtin_generic, N_(L"Negate exit status of job")},
|
||||
{L"or", &builtin_generic, N_(L"Execute command if previous command failed")},
|
||||
{L"path", &builtin_path, N_(L"Handle paths")},
|
||||
{L"printf", &builtin_printf, N_(L"Prints formatted text")},
|
||||
{L"printf", &implemented_in_rust, N_(L"Prints formatted text")},
|
||||
{L"pwd", &implemented_in_rust, N_(L"Print the working directory")},
|
||||
{L"random", &implemented_in_rust, N_(L"Generate random number")},
|
||||
{L"read", &builtin_read, N_(L"Read a line of input into variables")},
|
||||
|
@ -558,6 +557,9 @@ static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
|
|||
if (cmd == L"wait") {
|
||||
return RustBuiltin::Wait;
|
||||
}
|
||||
if (cmd == L"printf") {
|
||||
return RustBuiltin::Printf;
|
||||
}
|
||||
if (cmd == L"return") {
|
||||
return RustBuiltin::Return;
|
||||
}
|
||||
|
|
|
@ -116,6 +116,7 @@ enum RustBuiltin : int32_t {
|
|||
Echo,
|
||||
Emit,
|
||||
Exit,
|
||||
Printf,
|
||||
Pwd,
|
||||
Random,
|
||||
Realpath,
|
||||
|
|
|
@ -1,713 +0,0 @@
|
|||
// printf - format and print data
|
||||
// Copyright (C) 1990-2007 Free Software Foundation, Inc.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
// Usage: printf format [argument...]
|
||||
//
|
||||
// A front end to the printf function that lets it be used from the shell.
|
||||
//
|
||||
// Backslash escapes:
|
||||
//
|
||||
// \" = double quote
|
||||
// \\ = backslash
|
||||
// \a = alert (bell)
|
||||
// \b = backspace
|
||||
// \c = produce no further output
|
||||
// \e = escape
|
||||
// \f = form feed
|
||||
// \n = new line
|
||||
// \r = carriage return
|
||||
// \t = horizontal tab
|
||||
// \v = vertical tab
|
||||
// \ooo = octal number (ooo is 1 to 3 digits)
|
||||
// \xhh = hexadecimal number (hhh is 1 to 2 digits)
|
||||
// \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
|
||||
// \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
|
||||
//
|
||||
// Additional directive:
|
||||
//
|
||||
// %b = print an argument string, interpreting backslash escapes,
|
||||
// except that octal escapes are of the form \0 or \0ooo.
|
||||
//
|
||||
// The `format' argument is re-used as many times as necessary
|
||||
// to convert all of the given arguments.
|
||||
//
|
||||
// David MacKenzie <djm@gnu.ai.mit.edu>
|
||||
|
||||
// This file has been imported from source code of printf command in GNU Coreutils version 6.9.
|
||||
#include "config.h" // IWYU pragma: keep
|
||||
|
||||
#include "printf.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cinttypes>
|
||||
#include <climits>
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <cwchar>
|
||||
#include <cwctype>
|
||||
#include <locale>
|
||||
#ifdef HAVE_XLOCALE_H
|
||||
#include <xlocale.h>
|
||||
#endif
|
||||
|
||||
#include "../builtin.h"
|
||||
#include "../common.h"
|
||||
#include "../io.h"
|
||||
#include "../maybe.h"
|
||||
#include "../wcstringutil.h"
|
||||
#include "../wutil.h" // IWYU pragma: keep
|
||||
|
||||
class parser_t;
|
||||
|
||||
namespace {
|
||||
struct builtin_printf_state_t {
|
||||
// Out and err streams. Note this is a captured reference!
|
||||
io_streams_t &streams;
|
||||
|
||||
// The status of the operation.
|
||||
int exit_code;
|
||||
|
||||
// Whether we should stop outputting. This gets set in the case of an error, and also with the
|
||||
// \c escape.
|
||||
bool early_exit;
|
||||
// Our output buffer, so we don't write() constantly.
|
||||
// Our strategy is simple:
|
||||
// We print once per argument, and we flush the buffer before the error.
|
||||
wcstring buff;
|
||||
|
||||
explicit builtin_printf_state_t(io_streams_t &s)
|
||||
: streams(s), exit_code(0), early_exit(false) {}
|
||||
|
||||
void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode);
|
||||
|
||||
void print_direc(const wchar_t *start, size_t length, wchar_t conversion, bool have_field_width,
|
||||
int field_width, bool have_precision, int precision, wchar_t const *argument);
|
||||
|
||||
int print_formatted(const wchar_t *format, int argc, const wchar_t **argv);
|
||||
|
||||
void nonfatal_error(const wchar_t *fmt, ...);
|
||||
void fatal_error(const wchar_t *fmt, ...);
|
||||
|
||||
long print_esc(const wchar_t *escstart, bool octal_0);
|
||||
void print_esc_string(const wchar_t *str);
|
||||
void print_esc_char(wchar_t c);
|
||||
|
||||
void append_output(wchar_t c);
|
||||
void append_format_output(const wchar_t *fmt, ...);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static bool is_octal_digit(wchar_t c) { return iswdigit(c) && c < L'8'; }
|
||||
|
||||
void builtin_printf_state_t::nonfatal_error(const wchar_t *fmt, ...) {
|
||||
// Don't error twice.
|
||||
if (early_exit) return;
|
||||
|
||||
// If we have output, write it so it appears first.
|
||||
if (!buff.empty()) {
|
||||
streams.out.append(buff);
|
||||
buff.clear();
|
||||
}
|
||||
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
wcstring errstr = vformat_string(fmt, va);
|
||||
va_end(va);
|
||||
streams.err.append(errstr);
|
||||
if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
|
||||
|
||||
// We set the exit code to error, because one occurred,
|
||||
// but we don't do an early exit so we still print what we can.
|
||||
this->exit_code = STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) {
|
||||
// Don't error twice.
|
||||
if (early_exit) return;
|
||||
|
||||
// If we have output, write it so it appears first.
|
||||
if (!buff.empty()) {
|
||||
streams.out.append(buff);
|
||||
buff.clear();
|
||||
}
|
||||
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
wcstring errstr = vformat_string(fmt, va);
|
||||
va_end(va);
|
||||
streams.err.append(errstr);
|
||||
if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
|
||||
|
||||
this->exit_code = STATUS_CMD_ERROR;
|
||||
this->early_exit = true;
|
||||
}
|
||||
void builtin_printf_state_t::append_output(wchar_t c) {
|
||||
// Don't output if we're done.
|
||||
if (early_exit) return;
|
||||
|
||||
buff.push_back(c);
|
||||
}
|
||||
|
||||
void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...) {
|
||||
// Don't output if we're done.
|
||||
if (early_exit) return;
|
||||
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
wcstring tmp = vformat_string(fmt, va);
|
||||
va_end(va);
|
||||
buff.append(tmp);
|
||||
}
|
||||
|
||||
void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode) {
|
||||
if (errcode != 0 && errcode != EINVAL) {
|
||||
if (errcode == ERANGE) {
|
||||
this->fatal_error(L"%ls: %ls", s, _(L"Number out of range"));
|
||||
} else {
|
||||
this->fatal_error(L"%ls: %s", s, std::strerror(errcode));
|
||||
}
|
||||
} else if (*end) {
|
||||
if (s == end) {
|
||||
this->fatal_error(_(L"%ls: expected a numeric value"), s);
|
||||
} else {
|
||||
// This isn't entirely fatal - the value should still be printed.
|
||||
this->nonfatal_error(_(L"%ls: value not completely converted (can't convert '%ls')"), s,
|
||||
end);
|
||||
// Warn about octal numbers as they can be confusing.
|
||||
// Do it if the unconverted digit is a valid hex digit,
|
||||
// because it could also be an "0x" -> "0" typo.
|
||||
if (*s == L'0' && iswxdigit(*end)) {
|
||||
this->nonfatal_error(
|
||||
_(L"Hint: a leading '0' without an 'x' indicates an octal number"), s, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T raw_string_to_scalar_type(const wchar_t *s, wchar_t **end);
|
||||
|
||||
template <>
|
||||
intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
|
||||
return std::wcstoimax(s, end, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
|
||||
return std::wcstoumax(s, end, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
|
||||
double val = std::wcstod(s, end);
|
||||
if (**end == L'\0') return val;
|
||||
// The conversion using the user's locale failed. That may be due to the string not being a
|
||||
// valid floating point value. It could also be due to the locale using different separator
|
||||
// characters than the normal english convention. So try again by forcing the use of a locale
|
||||
// that employs the english convention for writing floating point numbers.
|
||||
return wcstod_l(s, end, fish_c_locale());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state) {
|
||||
T val;
|
||||
if (*s == L'\"' || *s == L'\'') {
|
||||
wchar_t ch = *++s;
|
||||
val = ch;
|
||||
} else {
|
||||
wchar_t *end = nullptr;
|
||||
errno = 0;
|
||||
val = raw_string_to_scalar_type<T>(s, &end);
|
||||
state->verify_numeric(s, end, errno);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
/// Output a single-character \ escape.
|
||||
void builtin_printf_state_t::print_esc_char(wchar_t c) {
|
||||
switch (c) {
|
||||
case L'a': { // alert
|
||||
this->append_output(L'\a');
|
||||
break;
|
||||
}
|
||||
case L'b': { // backspace
|
||||
this->append_output(L'\b');
|
||||
break;
|
||||
}
|
||||
case L'c': { // cancel the rest of the output
|
||||
this->early_exit = true;
|
||||
break;
|
||||
}
|
||||
case L'e': { // escape
|
||||
this->append_output(L'\x1B');
|
||||
break;
|
||||
}
|
||||
case L'f': { // form feed
|
||||
this->append_output(L'\f');
|
||||
break;
|
||||
}
|
||||
case L'n': { // new line
|
||||
this->append_output(L'\n');
|
||||
break;
|
||||
}
|
||||
case L'r': { // carriage return
|
||||
this->append_output(L'\r');
|
||||
break;
|
||||
}
|
||||
case L't': { // horizontal tab
|
||||
this->append_output(L'\t');
|
||||
break;
|
||||
}
|
||||
case L'v': { // vertical tab
|
||||
this->append_output(L'\v');
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
this->append_output(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print a \ escape sequence starting at ESCSTART.
|
||||
/// Return the number of characters in the escape sequence besides the backslash..
|
||||
/// If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
|
||||
/// is an octal digit; otherwise they are of the form \ooo.
|
||||
long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) {
|
||||
const wchar_t *p = escstart + 1;
|
||||
int esc_value = 0; /* Value of \nnn escape. */
|
||||
int esc_length; /* Length of \nnn escape. */
|
||||
|
||||
if (*p == L'x') {
|
||||
// A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.
|
||||
for (esc_length = 0, ++p; esc_length < 2 && iswxdigit(*p); ++esc_length, ++p)
|
||||
esc_value = esc_value * 16 + convert_digit(*p, 16);
|
||||
if (esc_length == 0) this->fatal_error(_(L"missing hexadecimal number in escape"));
|
||||
this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
|
||||
} else if (is_octal_digit(*p)) {
|
||||
// Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p
|
||||
// != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b.
|
||||
// Wrap mod 256, which matches historic behavior.
|
||||
for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p);
|
||||
++esc_length, ++p)
|
||||
esc_value = esc_value * 8 + convert_digit(*p, 8);
|
||||
this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
|
||||
} else if (*p && std::wcschr(L"\"\\abcefnrtv", *p)) {
|
||||
print_esc_char(*p++);
|
||||
} else if (*p == L'u' || *p == L'U') {
|
||||
wchar_t esc_char = *p;
|
||||
p++;
|
||||
uint32_t uni_value = 0;
|
||||
for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) {
|
||||
if (!iswxdigit(*p)) {
|
||||
// Escape sequence must be done. Complain if we didn't get anything.
|
||||
if (esc_length == 0) {
|
||||
this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
uni_value = uni_value * 16 + convert_digit(*p, 16);
|
||||
p++;
|
||||
}
|
||||
|
||||
// PCA GNU printf respects the limitations described in ISO N717, about which universal
|
||||
// characters "shall not" be specified. I believe this limitation is for the benefit of
|
||||
// compilers; I see no reason to impose it in builtin_printf.
|
||||
//
|
||||
// If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code
|
||||
// points, so just use that. If not defined, use the %lc printf conversion; this probably
|
||||
// won't do anything good if your wide character set is not Unicode, but such platforms are
|
||||
// exceedingly rare.
|
||||
if (uni_value > 0x10FFFF) {
|
||||
this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char,
|
||||
(esc_char == L'u' ? 4 : 8), uni_value);
|
||||
} else {
|
||||
#if defined(__STDC_ISO_10646__)
|
||||
this->append_output(uni_value);
|
||||
#else
|
||||
this->append_format_output(L"%lc", uni_value);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
this->append_output(L'\\');
|
||||
if (*p) {
|
||||
this->append_output(*p);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
return p - escstart - 1;
|
||||
}
|
||||
|
||||
/// Print string STR, evaluating \ escapes.
|
||||
void builtin_printf_state_t::print_esc_string(const wchar_t *str) {
|
||||
for (; *str; str++)
|
||||
if (*str == L'\\')
|
||||
str += print_esc(str, true);
|
||||
else
|
||||
this->append_output(*str);
|
||||
}
|
||||
|
||||
/// Evaluate a printf conversion specification. START is the start of the directive, LENGTH is its
|
||||
/// length, and CONVERSION specifies the type of conversion. LENGTH does not include any length
|
||||
/// modifier or the conversion specifier itself. FIELD_WIDTH and PRECISION are the field width and
|
||||
/// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
|
||||
/// ARGUMENT is the argument to be formatted.
|
||||
void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion,
|
||||
bool have_field_width, int field_width,
|
||||
bool have_precision, int precision,
|
||||
wchar_t const *argument) {
|
||||
// Start with everything except the conversion specifier.
|
||||
wcstring fmt(start, length);
|
||||
|
||||
// Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any
|
||||
// existing integer length modifier.
|
||||
switch (conversion) {
|
||||
case L'x':
|
||||
case L'X':
|
||||
case L'd':
|
||||
case L'i':
|
||||
case L'o':
|
||||
case L'u': {
|
||||
fmt.append(L"ll");
|
||||
break;
|
||||
}
|
||||
case L'a':
|
||||
case L'e':
|
||||
case L'f':
|
||||
case L'g':
|
||||
case L'A':
|
||||
case L'E':
|
||||
case L'F':
|
||||
case L'G': {
|
||||
fmt.append(L"L");
|
||||
break;
|
||||
}
|
||||
case L's':
|
||||
case L'c': {
|
||||
fmt.append(L"l");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Append the conversion itself.
|
||||
fmt.push_back(conversion);
|
||||
|
||||
switch (conversion) {
|
||||
case L'd':
|
||||
case L'i': {
|
||||
auto arg = string_to_scalar_type<intmax_t>(argument, this);
|
||||
if (!have_field_width) {
|
||||
if (!have_precision)
|
||||
this->append_format_output(fmt.c_str(), arg);
|
||||
else
|
||||
this->append_format_output(fmt.c_str(), precision, arg);
|
||||
} else {
|
||||
if (!have_precision)
|
||||
this->append_format_output(fmt.c_str(), field_width, arg);
|
||||
else
|
||||
this->append_format_output(fmt.c_str(), field_width, precision, arg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'o':
|
||||
case L'u':
|
||||
case L'x':
|
||||
case L'X': {
|
||||
auto arg = string_to_scalar_type<uintmax_t>(argument, this);
|
||||
if (!have_field_width) {
|
||||
if (!have_precision)
|
||||
this->append_format_output(fmt.c_str(), arg);
|
||||
else
|
||||
this->append_format_output(fmt.c_str(), precision, arg);
|
||||
} else {
|
||||
if (!have_precision)
|
||||
this->append_format_output(fmt.c_str(), field_width, arg);
|
||||
else
|
||||
this->append_format_output(fmt.c_str(), field_width, precision, arg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'a':
|
||||
case L'A':
|
||||
case L'e':
|
||||
case L'E':
|
||||
case L'f':
|
||||
case L'F':
|
||||
case L'g':
|
||||
case L'G': {
|
||||
auto arg = string_to_scalar_type<long double>(argument, this);
|
||||
if (!have_field_width) {
|
||||
if (!have_precision) {
|
||||
this->append_format_output(fmt.c_str(), arg);
|
||||
} else {
|
||||
this->append_format_output(fmt.c_str(), precision, arg);
|
||||
}
|
||||
} else {
|
||||
if (!have_precision) {
|
||||
this->append_format_output(fmt.c_str(), field_width, arg);
|
||||
} else {
|
||||
this->append_format_output(fmt.c_str(), field_width, precision, arg);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'c': {
|
||||
if (!have_field_width) {
|
||||
this->append_format_output(fmt.c_str(), *argument);
|
||||
} else {
|
||||
this->append_format_output(fmt.c_str(), field_width, *argument);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L's': {
|
||||
if (!have_field_width) {
|
||||
if (!have_precision) {
|
||||
this->append_format_output(fmt.c_str(), argument);
|
||||
} else {
|
||||
this->append_format_output(fmt.c_str(), precision, argument);
|
||||
}
|
||||
} else {
|
||||
if (!have_precision) {
|
||||
this->append_format_output(fmt.c_str(), field_width, argument);
|
||||
} else {
|
||||
this->append_format_output(fmt.c_str(), field_width, precision, argument);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
DIE("unexpected opt");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For each character in str, set the corresponding boolean in the array to the given flag.
|
||||
static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str,
|
||||
bool flag) {
|
||||
for (const char *c = str; *c != '\0'; c++) {
|
||||
auto idx = static_cast<unsigned char>(*c);
|
||||
ok[idx] = flag;
|
||||
}
|
||||
}
|
||||
|
||||
/// Print the text in FORMAT, using ARGV (with ARGC elements) for arguments to any `%' directives.
|
||||
/// Return the number of elements of ARGV used.
|
||||
int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, const wchar_t **argv) {
|
||||
int save_argc = argc; /* Preserve original value. */
|
||||
const wchar_t *f; /* Pointer into `format'. */
|
||||
const wchar_t *direc_start; /* Start of % directive. */
|
||||
size_t direc_length; /* Length of % directive. */
|
||||
bool have_field_width; /* True if FIELD_WIDTH is valid. */
|
||||
int field_width = 0; /* Arg to first '*'. */
|
||||
bool have_precision; /* True if PRECISION is valid. */
|
||||
int precision = 0; /* Arg to second '*'. */
|
||||
bool ok[UCHAR_MAX + 1] = {}; /* ok['x'] is true if %x is allowed. */
|
||||
|
||||
for (f = format; *f != L'\0'; ++f) {
|
||||
switch (*f) {
|
||||
case L'%': {
|
||||
direc_start = f++;
|
||||
direc_length = 1;
|
||||
have_field_width = have_precision = false;
|
||||
if (*f == L'%') {
|
||||
this->append_output(L'%');
|
||||
break;
|
||||
}
|
||||
if (*f == L'b') {
|
||||
// FIXME: Field width and precision are not supported for %b, even though POSIX
|
||||
// requires it.
|
||||
if (argc > 0) {
|
||||
print_esc_string(*argv);
|
||||
++argv;
|
||||
--argc;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true);
|
||||
for (bool continue_looking_for_flags = true; continue_looking_for_flags;) {
|
||||
switch (*f) {
|
||||
case L'I':
|
||||
case L'\'': {
|
||||
modify_allowed_format_specifiers(ok, "aAceEosxX", false);
|
||||
break;
|
||||
}
|
||||
case '-':
|
||||
case '+':
|
||||
case ' ': {
|
||||
break;
|
||||
}
|
||||
case L'#': {
|
||||
modify_allowed_format_specifiers(ok, "cdisu", false);
|
||||
break;
|
||||
}
|
||||
case '0': {
|
||||
modify_allowed_format_specifiers(ok, "cs", false);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
continue_looking_for_flags = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (continue_looking_for_flags) {
|
||||
f++;
|
||||
direc_length++;
|
||||
}
|
||||
}
|
||||
|
||||
if (*f == L'*') {
|
||||
++f;
|
||||
++direc_length;
|
||||
if (argc > 0) {
|
||||
auto width = string_to_scalar_type<intmax_t>(*argv, this);
|
||||
if (INT_MIN <= width && width <= INT_MAX)
|
||||
field_width = static_cast<int>(width);
|
||||
else
|
||||
this->fatal_error(_(L"invalid field width: %ls"), *argv);
|
||||
++argv;
|
||||
--argc;
|
||||
} else {
|
||||
field_width = 0;
|
||||
}
|
||||
have_field_width = true;
|
||||
} else {
|
||||
while (iswdigit(*f)) {
|
||||
++f;
|
||||
++direc_length;
|
||||
}
|
||||
}
|
||||
if (*f == L'.') {
|
||||
++f;
|
||||
++direc_length;
|
||||
modify_allowed_format_specifiers(ok, "c", false);
|
||||
if (*f == L'*') {
|
||||
++f;
|
||||
++direc_length;
|
||||
if (argc > 0) {
|
||||
auto prec = string_to_scalar_type<intmax_t>(*argv, this);
|
||||
if (prec < 0) {
|
||||
// A negative precision is taken as if the precision were omitted,
|
||||
// so -1 is safe here even if prec < INT_MIN.
|
||||
precision = -1;
|
||||
} else if (INT_MAX < prec)
|
||||
this->fatal_error(_(L"invalid precision: %ls"), *argv);
|
||||
else {
|
||||
precision = static_cast<int>(prec);
|
||||
}
|
||||
++argv;
|
||||
--argc;
|
||||
} else {
|
||||
precision = 0;
|
||||
}
|
||||
have_precision = true;
|
||||
} else {
|
||||
while (iswdigit(*f)) {
|
||||
++f;
|
||||
++direc_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' ||
|
||||
*f == L'z') {
|
||||
++f;
|
||||
}
|
||||
|
||||
wchar_t conversion = *f;
|
||||
if (conversion > 0xFF || !ok[conversion]) {
|
||||
this->fatal_error(_(L"%.*ls: invalid conversion specification"),
|
||||
static_cast<int>(f + 1 - direc_start), direc_start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const wchar_t *argument = L"";
|
||||
if (argc > 0) {
|
||||
argument = *argv++;
|
||||
argc--;
|
||||
}
|
||||
print_direc(direc_start, direc_length, *f, have_field_width, field_width,
|
||||
have_precision, precision, argument);
|
||||
break;
|
||||
}
|
||||
case L'\\': {
|
||||
f += print_esc(f, false);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
this->append_output(*f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return save_argc - argc;
|
||||
}
|
||||
|
||||
/// The printf builtin.
|
||||
maybe_t<int> builtin_printf(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
|
||||
UNUSED(parser);
|
||||
int argc = builtin_count_args(argv);
|
||||
|
||||
argv++;
|
||||
argc--;
|
||||
|
||||
if (argc < 1) {
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
#if defined(HAVE_USELOCALE) || defined(__GLIBC__)
|
||||
// We use a locale-dependent LC_NUMERIC here,
|
||||
// unlike the rest of fish (which uses LC_NUMERIC=C).
|
||||
// Because we do output as well as wcstod (which would have wcstod_l),
|
||||
// we need to set the locale here.
|
||||
// (glibc has uselocale since 2.3, but our configure checks fail us)
|
||||
locale_t prev_locale = uselocale(fish_numeric_locale());
|
||||
#else
|
||||
// NetBSD does not have uselocale,
|
||||
// so the best we can do is setlocale.
|
||||
auto prev_locale = setlocale(LC_NUMERIC, nullptr);
|
||||
setlocale(LC_NUMERIC, "");
|
||||
#endif
|
||||
|
||||
builtin_printf_state_t state(streams);
|
||||
int args_used;
|
||||
const wchar_t *format = argv[0];
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
do {
|
||||
args_used = state.print_formatted(format, argc, argv);
|
||||
argc -= args_used;
|
||||
argv += args_used;
|
||||
if (!state.buff.empty()) {
|
||||
streams.out.append(state.buff);
|
||||
state.buff.clear();
|
||||
}
|
||||
} while (args_used > 0 && argc > 0 && !state.early_exit);
|
||||
|
||||
#if defined(HAVE_USELOCALE) || defined(__GLIBC__)
|
||||
uselocale(prev_locale);
|
||||
#else
|
||||
setlocale(LC_NUMERIC, prev_locale);
|
||||
#endif
|
||||
|
||||
return state.exit_code;
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
// Prototypes for functions for executing builtin_printf functions.
|
||||
#ifndef FISH_BUILTIN_PRINTF_H
|
||||
#define FISH_BUILTIN_PRINTF_H
|
||||
|
||||
#include "../maybe.h"
|
||||
|
||||
class parser_t;
|
||||
struct io_streams_t;
|
||||
|
||||
maybe_t<int> builtin_printf(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
|
||||
#endif
|
|
@ -124,6 +124,15 @@ printf '%d\n' 0g
|
|||
echo $status
|
||||
# CHECK: 1
|
||||
|
||||
printf '%f\n' 0x2
|
||||
# CHECK: 2.000000
|
||||
|
||||
printf '%f\n' 0x2p3
|
||||
# CHECK: 16.000000
|
||||
|
||||
printf '%.1f\n' -0X1.5P8
|
||||
# CHECK: -336.0
|
||||
|
||||
# Test that we ignore options
|
||||
printf -a
|
||||
printf --foo
|
||||
|
|
Loading…
Reference in a new issue