fish-shell/src/builtins/printf.cpp

714 lines
25 KiB
C++
Raw Normal View History

// printf - format and print data
// Copyright (C) 1990-2007 Free Software Foundation, Inc.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2, or (at your option)
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software Foundation,
// Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
// Usage: printf format [argument...]
//
// A front end to the printf function that lets it be used from the shell.
//
// Backslash escapes:
//
// \" = double quote
// \\ = backslash
// \a = alert (bell)
// \b = backspace
// \c = produce no further output
// \e = escape
// \f = form feed
// \n = new line
// \r = carriage return
// \t = horizontal tab
// \v = vertical tab
// \ooo = octal number (ooo is 1 to 3 digits)
// \xhh = hexadecimal number (hhh is 1 to 2 digits)
// \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
// \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
//
// Additional directive:
//
// %b = print an argument string, interpreting backslash escapes,
// except that octal escapes are of the form \0 or \0ooo.
//
// The `format' argument is re-used as many times as necessary
// to convert all of the given arguments.
//
// David MacKenzie <djm@gnu.ai.mit.edu>
// This file has been imported from source code of printf command in GNU Coreutils version 6.9.
#include "config.h" // IWYU pragma: keep
#include "printf.h"
#include <cerrno>
#include <cinttypes>
#include <climits>
#include <cstdarg>
#include <cstdint>
#include <cstring>
#include <cwchar>
#include <cwctype>
#include <locale>
#ifdef HAVE_XLOCALE_H
#include <xlocale.h>
#endif
2013-01-22 17:07:28 +00:00
#include "../builtin.h"
#include "../common.h"
#include "../io.h"
#include "../maybe.h"
#include "../wcstringutil.h"
#include "../wutil.h" // IWYU pragma: keep
class parser_t;
2013-01-22 17:07:28 +00:00
namespace {
struct builtin_printf_state_t {
// Out and err streams. Note this is a captured reference!
io_streams_t &streams;
// The status of the operation.
int exit_code;
// Whether we should stop outputting. This gets set in the case of an error, and also with the
// \c escape.
bool early_exit;
// Our output buffer, so we don't write() constantly.
// Our strategy is simple:
// We print once per argument, and we flush the buffer before the error.
wcstring buff;
explicit builtin_printf_state_t(io_streams_t &s)
: streams(s), exit_code(0), early_exit(false) {}
2013-03-22 00:44:51 +00:00
void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode);
2013-03-22 00:44:51 +00:00
void print_direc(const wchar_t *start, size_t length, wchar_t conversion, bool have_field_width,
int field_width, bool have_precision, int precision, wchar_t const *argument);
2013-03-22 00:44:51 +00:00
int print_formatted(const wchar_t *format, int argc, const wchar_t **argv);
void nonfatal_error(const wchar_t *fmt, ...);
void fatal_error(const wchar_t *fmt, ...);
long print_esc(const wchar_t *escstart, bool octal_0);
void print_esc_string(const wchar_t *str);
void print_esc_char(wchar_t c);
void append_output(wchar_t c);
void append_format_output(const wchar_t *fmt, ...);
};
} // namespace
static bool is_octal_digit(wchar_t c) { return iswdigit(c) && c < L'8'; }
void builtin_printf_state_t::nonfatal_error(const wchar_t *fmt, ...) {
// Don't error twice.
if (early_exit) return;
// If we have output, write it so it appears first.
if (!buff.empty()) {
streams.out.append(buff);
buff.clear();
}
va_list va;
va_start(va, fmt);
wcstring errstr = vformat_string(fmt, va);
va_end(va);
streams.err.append(errstr);
if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
2019-11-25 11:03:25 +00:00
// We set the exit code to error, because one occurred,
// but we don't do an early exit so we still print what we can.
this->exit_code = STATUS_CMD_ERROR;
}
void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) {
// Don't error twice.
if (early_exit) return;
// If we have output, write it so it appears first.
if (!buff.empty()) {
streams.out.append(buff);
buff.clear();
}
va_list va;
va_start(va, fmt);
wcstring errstr = vformat_string(fmt, va);
va_end(va);
streams.err.append(errstr);
if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n');
this->exit_code = STATUS_CMD_ERROR;
this->early_exit = true;
}
void builtin_printf_state_t::append_output(wchar_t c) {
// Don't output if we're done.
if (early_exit) return;
buff.push_back(c);
}
void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...) {
// Don't output if we're done.
if (early_exit) return;
va_list va;
va_start(va, fmt);
wcstring tmp = vformat_string(fmt, va);
va_end(va);
buff.append(tmp);
}
void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode) {
if (errcode != 0 && errcode != EINVAL) {
if (errcode == ERANGE) {
this->fatal_error(L"%ls: %ls", s, _(L"Number out of range"));
} else {
this->fatal_error(L"%ls: %s", s, std::strerror(errcode));
}
} else if (*end) {
if (s == end) {
this->fatal_error(_(L"%ls: expected a numeric value"), s);
} else {
// This isn't entirely fatal - the value should still be printed.
this->nonfatal_error(_(L"%ls: value not completely converted (can't convert '%ls')"), s,
end);
// Warn about octal numbers as they can be confusing.
// Do it if the unconverted digit is a valid hex digit,
// because it could also be an "0x" -> "0" typo.
if (*s == L'0' && iswxdigit(*end)) {
this->nonfatal_error(
_(L"Hint: a leading '0' without an 'x' indicates an octal number"), s, end);
}
}
2013-01-22 17:07:28 +00:00
}
}
template <typename T>
static T raw_string_to_scalar_type(const wchar_t *s, wchar_t **end);
template <>
intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
return std::wcstoimax(s, end, 0);
}
template <>
uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
return std::wcstoumax(s, end, 0);
}
template <>
long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
double val = std::wcstod(s, end);
if (**end == L'\0') return val;
// The conversion using the user's locale failed. That may be due to the string not being a
// valid floating point value. It could also be due to the locale using different separator
// characters than the normal english convention. So try again by forcing the use of a locale
// that employs the english convention for writing floating point numbers.
return wcstod_l(s, end, fish_c_locale());
}
template <typename T>
static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state) {
T val;
if (*s == L'\"' || *s == L'\'') {
wchar_t ch = *++s;
2013-03-22 00:44:51 +00:00
val = ch;
} else {
wchar_t *end = nullptr;
errno = 0;
val = raw_string_to_scalar_type<T>(s, &end);
state->verify_numeric(s, end, errno);
}
return val;
}
2013-01-22 17:07:28 +00:00
/// Output a single-character \ escape.
void builtin_printf_state_t::print_esc_char(wchar_t c) {
switch (c) {
case L'a': { // alert
this->append_output(L'\a');
2013-03-22 00:44:51 +00:00
break;
}
case L'b': { // backspace
this->append_output(L'\b');
2013-03-22 00:44:51 +00:00
break;
}
case L'c': { // cancel the rest of the output
this->early_exit = true;
2013-03-22 00:44:51 +00:00
break;
}
case L'e': { // escape
this->append_output(L'\x1B');
break;
}
case L'f': { // form feed
this->append_output(L'\f');
2013-03-22 00:44:51 +00:00
break;
}
case L'n': { // new line
this->append_output(L'\n');
2013-03-22 00:44:51 +00:00
break;
}
case L'r': { // carriage return
this->append_output(L'\r');
2013-03-22 00:44:51 +00:00
break;
}
case L't': { // horizontal tab
this->append_output(L'\t');
2013-03-22 00:44:51 +00:00
break;
}
case L'v': { // vertical tab
this->append_output(L'\v');
2013-03-22 00:44:51 +00:00
break;
}
default: {
this->append_output(c);
2013-03-22 00:44:51 +00:00
break;
}
2013-01-22 17:07:28 +00:00
}
}
/// Print a \ escape sequence starting at ESCSTART.
/// Return the number of characters in the escape sequence besides the backslash..
/// If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
/// is an octal digit; otherwise they are of the form \ooo.
long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) {
const wchar_t *p = escstart + 1;
int esc_value = 0; /* Value of \nnn escape. */
int esc_length; /* Length of \nnn escape. */
2013-01-22 17:07:28 +00:00
if (*p == L'x') {
// A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.
for (esc_length = 0, ++p; esc_length < 2 && iswxdigit(*p); ++esc_length, ++p)
esc_value = esc_value * 16 + convert_digit(*p, 16);
if (esc_length == 0) this->fatal_error(_(L"missing hexadecimal number in escape"));
this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
} else if (is_octal_digit(*p)) {
// Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p
// != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b.
// Wrap mod 256, which matches historic behavior.
for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p);
++esc_length, ++p)
esc_value = esc_value * 8 + convert_digit(*p, 8);
this->append_output(ENCODE_DIRECT_BASE + esc_value % 256);
} else if (*p && std::wcschr(L"\"\\abcefnrtv", *p)) {
print_esc_char(*p++);
} else if (*p == L'u' || *p == L'U') {
wchar_t esc_char = *p;
p++;
uint32_t uni_value = 0;
for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) {
if (!iswxdigit(*p)) {
// Escape sequence must be done. Complain if we didn't get anything.
if (esc_length == 0) {
this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
}
break;
}
uni_value = uni_value * 16 + convert_digit(*p, 16);
p++;
}
2013-05-05 09:33:17 +00:00
// PCA GNU printf respects the limitations described in ISO N717, about which universal
// characters "shall not" be specified. I believe this limitation is for the benefit of
// compilers; I see no reason to impose it in builtin_printf.
//
// If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code
// points, so just use that. If not defined, use the %lc printf conversion; this probably
// won't do anything good if your wide character set is not Unicode, but such platforms are
// exceedingly rare.
if (uni_value > 0x10FFFF) {
this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char,
(esc_char == L'u' ? 4 : 8), uni_value);
} else {
#if defined(__STDC_ISO_10646__)
this->append_output(uni_value);
#else
this->append_format_output(L"%lc", uni_value);
#endif
}
} else {
this->append_output(L'\\');
if (*p) {
this->append_output(*p);
p++;
}
2013-01-22 17:07:28 +00:00
}
return p - escstart - 1;
2013-01-22 17:07:28 +00:00
}
/// Print string STR, evaluating \ escapes.
void builtin_printf_state_t::print_esc_string(const wchar_t *str) {
for (; *str; str++)
if (*str == L'\\')
str += print_esc(str, true);
else
this->append_output(*str);
2013-01-22 17:07:28 +00:00
}
/// Evaluate a printf conversion specification. START is the start of the directive, LENGTH is its
/// length, and CONVERSION specifies the type of conversion. LENGTH does not include any length
/// modifier or the conversion specifier itself. FIELD_WIDTH and PRECISION are the field width and
/// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
/// ARGUMENT is the argument to be formatted.
void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion,
bool have_field_width, int field_width,
bool have_precision, int precision,
wchar_t const *argument) {
// Start with everything except the conversion specifier.
wcstring fmt(start, length);
2013-01-22 17:07:28 +00:00
// Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any
// existing integer length modifier.
switch (conversion) {
case L'x':
case L'X':
case L'd':
case L'i':
case L'o':
case L'u': {
fmt.append(L"ll");
break;
}
case L'a':
case L'e':
case L'f':
case L'g':
case L'A':
case L'E':
case L'F':
case L'G': {
fmt.append(L"L");
break;
}
case L's':
case L'c': {
fmt.append(L"l");
break;
}
default: {
break;
}
}
2013-01-22 17:07:28 +00:00
// Append the conversion itself.
fmt.push_back(conversion);
switch (conversion) {
case L'd':
case L'i': {
auto arg = string_to_scalar_type<intmax_t>(argument, this);
if (!have_field_width) {
if (!have_precision)
this->append_format_output(fmt.c_str(), arg);
else
this->append_format_output(fmt.c_str(), precision, arg);
} else {
if (!have_precision)
this->append_format_output(fmt.c_str(), field_width, arg);
else
this->append_format_output(fmt.c_str(), field_width, precision, arg);
}
break;
}
case L'o':
case L'u':
case L'x':
case L'X': {
auto arg = string_to_scalar_type<uintmax_t>(argument, this);
if (!have_field_width) {
if (!have_precision)
this->append_format_output(fmt.c_str(), arg);
else
this->append_format_output(fmt.c_str(), precision, arg);
} else {
if (!have_precision)
this->append_format_output(fmt.c_str(), field_width, arg);
else
this->append_format_output(fmt.c_str(), field_width, precision, arg);
}
break;
}
case L'a':
case L'A':
case L'e':
case L'E':
case L'f':
case L'F':
case L'g':
case L'G': {
auto arg = string_to_scalar_type<long double>(argument, this);
if (!have_field_width) {
if (!have_precision) {
this->append_format_output(fmt.c_str(), arg);
} else {
this->append_format_output(fmt.c_str(), precision, arg);
}
} else {
if (!have_precision) {
this->append_format_output(fmt.c_str(), field_width, arg);
} else {
this->append_format_output(fmt.c_str(), field_width, precision, arg);
}
}
break;
}
case L'c': {
if (!have_field_width) {
this->append_format_output(fmt.c_str(), *argument);
} else {
this->append_format_output(fmt.c_str(), field_width, *argument);
}
break;
}
case L's': {
if (!have_field_width) {
if (!have_precision) {
this->append_format_output(fmt.c_str(), argument);
} else {
this->append_format_output(fmt.c_str(), precision, argument);
}
} else {
if (!have_precision) {
this->append_format_output(fmt.c_str(), field_width, argument);
} else {
this->append_format_output(fmt.c_str(), field_width, precision, argument);
}
}
break;
}
default: {
DIE("unexpected opt");
}
2013-01-22 17:07:28 +00:00
}
}
/// For each character in str, set the corresponding boolean in the array to the given flag.
static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str,
bool flag) {
for (const char *c = str; *c != '\0'; c++) {
auto idx = static_cast<unsigned char>(*c);
ok[idx] = flag;
}
}
/// Print the text in FORMAT, using ARGV (with ARGC elements) for arguments to any `%' directives.
/// Return the number of elements of ARGV used.
int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, const wchar_t **argv) {
int save_argc = argc; /* Preserve original value. */
const wchar_t *f; /* Pointer into `format'. */
const wchar_t *direc_start; /* Start of % directive. */
size_t direc_length; /* Length of % directive. */
bool have_field_width; /* True if FIELD_WIDTH is valid. */
int field_width = 0; /* Arg to first '*'. */
bool have_precision; /* True if PRECISION is valid. */
int precision = 0; /* Arg to second '*'. */
bool ok[UCHAR_MAX + 1] = {}; /* ok['x'] is true if %x is allowed. */
for (f = format; *f != L'\0'; ++f) {
switch (*f) {
case L'%': {
direc_start = f++;
direc_length = 1;
have_field_width = have_precision = false;
if (*f == L'%') {
this->append_output(L'%');
break;
}
if (*f == L'b') {
// FIXME: Field width and precision are not supported for %b, even though POSIX
// requires it.
if (argc > 0) {
print_esc_string(*argv);
++argv;
--argc;
}
break;
}
2013-10-26 22:27:39 +00:00
modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true);
2016-11-03 23:53:58 +00:00
for (bool continue_looking_for_flags = true; continue_looking_for_flags;) {
switch (*f) {
case L'I':
case L'\'': {
modify_allowed_format_specifiers(ok, "aAceEosxX", false);
2013-03-22 00:44:51 +00:00
break;
}
2013-03-22 00:44:51 +00:00
case '-':
case '+':
case ' ': {
2013-03-22 00:44:51 +00:00
break;
}
case L'#': {
modify_allowed_format_specifiers(ok, "cdisu", false);
2013-03-22 00:44:51 +00:00
break;
}
case '0': {
modify_allowed_format_specifiers(ok, "cs", false);
2013-03-22 00:44:51 +00:00
break;
}
default: {
2016-11-03 23:53:58 +00:00
continue_looking_for_flags = false;
break;
}
}
2016-11-03 23:53:58 +00:00
if (continue_looking_for_flags) {
f++;
direc_length++;
}
}
if (*f == L'*') {
++f;
++direc_length;
if (argc > 0) {
auto width = string_to_scalar_type<intmax_t>(*argv, this);
if (INT_MIN <= width && width <= INT_MAX)
field_width = static_cast<int>(width);
else
this->fatal_error(_(L"invalid field width: %ls"), *argv);
++argv;
--argc;
} else {
field_width = 0;
}
have_field_width = true;
} else {
while (iswdigit(*f)) {
++f;
++direc_length;
}
}
if (*f == L'.') {
++f;
++direc_length;
modify_allowed_format_specifiers(ok, "c", false);
if (*f == L'*') {
++f;
++direc_length;
if (argc > 0) {
auto prec = string_to_scalar_type<intmax_t>(*argv, this);
if (prec < 0) {
// A negative precision is taken as if the precision were omitted,
// so -1 is safe here even if prec < INT_MIN.
2013-03-22 00:44:51 +00:00
precision = -1;
} else if (INT_MAX < prec)
this->fatal_error(_(L"invalid precision: %ls"), *argv);
else {
precision = static_cast<int>(prec);
}
++argv;
--argc;
} else {
2013-03-22 00:44:51 +00:00
precision = 0;
}
have_precision = true;
} else {
while (iswdigit(*f)) {
2013-03-22 00:44:51 +00:00
++f;
++direc_length;
}
}
}
2013-03-22 00:44:51 +00:00
while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' ||
*f == L'z') {
2013-03-22 00:44:51 +00:00
++f;
}
wchar_t conversion = *f;
if (conversion > 0xFF || !ok[conversion]) {
this->fatal_error(_(L"%.*ls: invalid conversion specification"),
static_cast<int>(f + 1 - direc_start), direc_start);
return 0;
}
const wchar_t *argument = L"";
if (argc > 0) {
argument = *argv++;
argc--;
}
print_direc(direc_start, direc_length, *f, have_field_width, field_width,
have_precision, precision, argument);
2013-03-22 00:44:51 +00:00
break;
}
case L'\\': {
f += print_esc(f, false);
break;
}
default: {
this->append_output(*f);
break;
}
}
2013-03-22 00:44:51 +00:00
}
return save_argc - argc;
2013-01-22 17:07:28 +00:00
}
/// The printf builtin.
maybe_t<int> builtin_printf(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
UNUSED(parser);
int argc = builtin_count_args(argv);
argv++;
argc--;
2013-01-22 17:07:28 +00:00
if (argc < 1) {
return STATUS_INVALID_ARGS;
2013-01-22 17:07:28 +00:00
}
#if defined(HAVE_USELOCALE) || defined(__GLIBC__)
// We use a locale-dependent LC_NUMERIC here,
// unlike the rest of fish (which uses LC_NUMERIC=C).
// Because we do output as well as wcstod (which would have wcstod_l),
// we need to set the locale here.
// (glibc has uselocale since 2.3, but our configure checks fail us)
locale_t prev_locale = uselocale(fish_numeric_locale());
#else
// NetBSD does not have uselocale,
// so the best we can do is setlocale.
auto prev_locale = setlocale(LC_NUMERIC, nullptr);
setlocale(LC_NUMERIC, "");
#endif
builtin_printf_state_t state(streams);
int args_used;
const wchar_t *format = argv[0];
argc--;
argv++;
2013-01-22 17:07:28 +00:00
do {
args_used = state.print_formatted(format, argc, argv);
argc -= args_used;
argv += args_used;
if (!state.buff.empty()) {
streams.out.append(state.buff);
state.buff.clear();
}
} while (args_used > 0 && argc > 0 && !state.early_exit);
#if defined(HAVE_USELOCALE) || defined(__GLIBC__)
uselocale(prev_locale);
#else
setlocale(LC_NUMERIC, prev_locale);
#endif
2013-03-22 00:44:51 +00:00
return state.exit_code;
}