From fa090f2c9f5ce8f3a507343469a786f2c569d3ff Mon Sep 17 00:00:00 2001 From: Siteshwar Vashisht Date: Tue, 22 Jan 2013 18:07:28 +0100 Subject: [PATCH 1/2] Initial version of printf builtin --- Makefile.in | 6 +- builtin.cpp | 2 + builtin_printf.cpp | 634 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 640 insertions(+), 2 deletions(-) create mode 100644 builtin_printf.cpp diff --git a/Makefile.in b/Makefile.in index d0b58f5b0..cde39f26a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -106,7 +106,8 @@ parser_keywords.o wutil.o tokenizer.o # BUILTIN_FILES := builtin_set.cpp builtin_commandline.cpp \ - builtin_ulimit.cpp builtin_complete.cpp builtin_jobs.cpp + builtin_ulimit.cpp builtin_complete.cpp builtin_jobs.cpp \ + builtin_printf.cpp # @@ -871,7 +872,7 @@ builtin.o: wgetopt.h sanity.h tokenizer.h wildcard.h expand.h input_common.h builtin.o: input.h intern.h exec.h highlight.h screen.h color.h parse_util.h builtin.o: autoload.h lru.h parser_keywords.h path.h history.h builtin.o: builtin_set.cpp builtin_commandline.cpp builtin_complete.cpp -builtin.o: builtin_ulimit.cpp builtin_jobs.cpp +builtin.o: builtin_ulimit.cpp builtin_jobs.cpp builtin_printf.cpp builtin_commandline.o: config.h signal.h fallback.h util.h wutil.h common.h builtin_commandline.o: builtin.h io.h wgetopt.h reader.h complete.h proc.h builtin_commandline.o: parser.h event.h function.h tokenizer.h input_common.h @@ -888,6 +889,7 @@ builtin_test.o: config.h common.h util.h builtin.h io.h wutil.h proc.h builtin_test.o: signal.h builtin_ulimit.o: config.h fallback.h signal.h util.h builtin.h io.h common.h builtin_ulimit.o: wgetopt.h +builtin_printf.o: wgetopt.h color.o: color.h config.h common.h util.h fallback.h signal.h common.o: config.h fallback.h signal.h util.h wutil.h common.h expand.h common.o: proc.h io.h wildcard.h parser.h event.h function.h complete.h diff --git a/builtin.cpp b/builtin.cpp index 12c02d6b8..2cd85218e 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -390,6 +390,7 @@ static void builtin_missing_argument(parser_t &parser, const wchar_t *cmd, const #include "builtin_complete.cpp" #include "builtin_ulimit.cpp" #include "builtin_jobs.cpp" +#include "builtin_printf.cpp" /* builtin_test lives in builtin_test.cpp */ int builtin_test(parser_t &parser, wchar_t **argv); @@ -4024,6 +4025,7 @@ static const builtin_data_t builtin_datas[]= { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, + { L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"random", &builtin_random, N_(L"Generate random number") }, { L"read", &builtin_read, N_(L"Read a line of input into variables") }, diff --git a/builtin_printf.cpp b/builtin_printf.cpp new file mode 100644 index 000000000..b633edd16 --- /dev/null +++ b/builtin_printf.cpp @@ -0,0 +1,634 @@ +/* printf - format and print data + Copyright (C) 1990-2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Usage: printf format [argument...] + + A front end to the printf function that lets it be used from the shell. + + Backslash escapes: + + \" = double quote + \\ = backslash + \a = alert (bell) + \b = backspace + \c = produce no further output + \f = form feed + \n = new line + \r = carriage return + \t = horizontal tab + \v = vertical tab + \ooo = octal number (ooo is 1 to 3 digits) + \xhh = hexadecimal number (hhh is 1 to 2 digits) + \uhhhh = 16-bit Unicode character (hhhh is 4 digits) + \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits) + + Additional directive: + + %b = print an argument string, interpreting backslash escapes, + except that octal escapes are of the form \0 or \0ooo. + + The `format' argument is re-used as many times as necessary + to convert all of the given arguments. + + David MacKenzie */ + +#include +#include +#include + +#include "common.h" + +// This file has bee imported from source code of printf command in GNU Coreutils version 6.9 + +/* The official name of this program (e.g., no `g' prefix). */ +#define PROGRAM_NAME "printf" + +#define AUTHORS "David MacKenzie" + +#define isodigit(c) ((c) >= L'0' && (c) <= L'7') +#define hextobin(c) ((c) >= L'a' && (c) <= L'f' ? (c) - L'a' + 10 : \ + (c) >= L'A' && (c) <= L'F' ? (c) - L'A' + 10 : (c) - L'0') +#define octtobin(c) ((c) - L'0') + +# define ISDIGIT(c) ((unsigned int) (c) - L'0' <= 9) + +# define PRIdMAX L"ld" + +/* True if the POSIXLY_CORRECT environment variable is set. */ +static bool posixly_correct; + +/* This message appears in N_() here rather than just in _() below because + the sole use would have been in a #define. */ +static wchar_t const *const cfcc_msg = + N_(L"warning: %s: character(s) following character constant have been ignored"); + +int strtoimax (wchar_t const *ptr, wchar_t **endptr, int base) +{ + return wcstol (ptr, endptr, base); +} + +int strtoumax (wchar_t const *ptr, wchar_t **endptr, int base) +{ + return wcstol (ptr, endptr, base); +} + +# define STRTOD wcstod + +double +C_STRTOD (wchar_t const *nptr, wchar_t **endptr) +{ + double r; + + const wcstring saved_locale = wsetlocale (LC_NUMERIC, NULL); + + if (!saved_locale.empty()) + { + setlocale (LC_NUMERIC, "C"); + } + + r = STRTOD (nptr, endptr); + + if (!saved_locale.empty()) + { + wsetlocale (LC_NUMERIC, saved_locale.c_str()); + } + + return r; +} + +static inline unsigned wchar_t to_uchar (wchar_t ch) +{ + return ch; +} + +static void verify_numeric (const wchar_t *s, const wchar_t *end) +{ + if (errno) + { + append_format(stderr_buffer, L"%ls", s); + } + else if (*end) + { + if (s == end) + append_format(stderr_buffer, _(L"%ls: expected a numeric value"), s); + else + append_format(stderr_buffer, _(L"%ls: value not completely converted"), s); + } +} + +#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \ +static TYPE \ +FUNC_NAME (wchar_t const *s) \ +{ \ + wchar_t *end; \ + TYPE val; \ + \ + if (*s == L'\"' || *s == L'\'') \ + { \ + unsigned wchar_t ch = *++s; \ + val = ch; \ + /* If POSIXLY_CORRECT is not set, then give a warning that there \ + are characters following the character constant and that GNU \ + printf is ignoring those characters. If POSIXLY_CORRECT *is* \ + set, then don't give the warning. */ \ + if (*++s != 0 && !posixly_correct) \ + append_format(stderr_buffer, _(cfcc_msg), s); \ + } \ + else \ + { \ + errno = 0; \ + val = (LIB_FUNC_EXPR); \ + verify_numeric (s, end); \ + } \ + return val; \ +} \ + +STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0)) +STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0)) +STRTOX (long double, vstrtold, C_STRTOD(s, &end)) + +/* Output a single-character \ escape. */ + +static void +print_esc_char (wchar_t c) +{ + switch (c) + { + case L'a': /* Alert. */ + append_format(stdout_buffer, L"%lc", L'\a'); + break; + case L'b': /* Backspace. */ + append_format(stdout_buffer, L"%lc", L'\b'); + break; + case L'c': /* Cancel the rest of the output. */ + exit (EXIT_SUCCESS); + break; + case L'f': /* Form feed. */ + append_format(stdout_buffer, L"%lc", L'\f'); + break; + case L'n': /* New line. */ + append_format(stdout_buffer, L"%lc", L'\n'); + break; + case L'r': /* Carriage retturn. */ + append_format(stdout_buffer, L"%lc", L'\r'); + break; + case L't': /* Horizontal tab. */ + append_format(stdout_buffer, L"%lc", L'\t'); + break; + case L'v': /* Vertical tab. */ + append_format(stdout_buffer, L"%lc", L'\v'); + break; + default: + append_format(stdout_buffer, L"%lc", c); + break; + } +} + +/* Print a \ escape sequence starting at ESCSTART. + Return the number of characters in the escape sequence + besides the backslash. + If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o + is an octal digit; otherwise they are of the form \ooo. */ + +static int print_esc (const wchar_t *escstart, bool octal_0) +{ + const wchar_t *p = escstart + 1; + int esc_value = 0; /* Value of \nnn escape. */ + int esc_length; /* Length of \nnn escape. */ + + if (*p == L'x') + { + /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */ + for (esc_length = 0, ++p; + esc_length < 2 && isxdigit (to_uchar (*p)); + ++esc_length, ++p) + esc_value = esc_value * 16 + hextobin (*p); + if (esc_length == 0) + append_format(stderr_buffer, _(L"missing hexadecimal number in escape")); + append_format (stdout_buffer, L"%lc", esc_value); + } + else if (isodigit (*p)) + { + /* Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). + Allow \ooo if octal_0 && *p != L'0'; this is an undocumented + extension to POSIX that is compatible with Bash 2.05b. */ + for (esc_length = 0, p += octal_0 && *p == L'0'; + esc_length < 3 && isodigit (*p); + ++esc_length, ++p) + esc_value = esc_value * 8 + octtobin (*p); + append_format(stdout_buffer, L"%c", esc_value); + } + else if (*p && wcschr (L"\"\\abcfnrtv", *p)) + print_esc_char (*p++); + else if (*p == L'u' || *p == L'U') + { + wchar_t esc_char = *p; + unsigned int uni_value; + + uni_value = 0; + for (esc_length = (esc_char == L'u' ? 4 : 8), ++p; + esc_length > 0; + --esc_length, ++p) + { + if (! isxdigit (to_uchar (*p))) + append_format(stderr_buffer, _(L"missing hexadecimal number in escape")); + uni_value = uni_value * 16 + hextobin (*p); + } + + /* A universal character name shall not specify a character short + identifier in the range 00000000 through 00000020, 0000007F through + 0000009F, or 0000D800 through 0000DFFF inclusive. A universal + character name shall not designate a character in the required + character set. */ + if ((uni_value <= 0x9f + && uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60) + || (uni_value >= 0xd800 && uni_value <= 0xdfff)) + append_format(stderr_buffer, _(L"invalid universal character name \\%c%0*x"), + esc_char, (esc_char == L'u' ? 4 : 8), uni_value); + + append_format(stdout_buffer, L"%lc", uni_value); + } + else + { + append_format(stdout_buffer, L"%lc", L'\\'); + if (*p) + { + append_format (stdout_buffer, L"%lc", *p); + p++; + } + } + return p - escstart - 1; +} + +/* Print string STR, evaluating \ escapes. */ + +static void +print_esc_string (const wchar_t *str) +{ + for (; *str; str++) + if (*str == L'\\') + str += print_esc (str, true); + else + append_format (stdout_buffer, L"%lc", *str); +} + +/* Evaluate a printf conversion specification. START is the start of + the directive, LENGTH is its length, and CONVERSION specifies the + type of conversion. LENGTH does not include any length modifier or + the conversion specifier itself. FIELD_WIDTH and PRECISION are the + field width and precision for '*' values, if HAVE_FIELD_WIDTH and + HAVE_PRECISION are true, respectively. ARGUMENT is the argument to + be formatted. */ + +static void print_direc (const wchar_t *start, size_t length, wchar_t conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + wchar_t const *argument) +{ + wchar_t *p; /* Null-terminated copy of % directive. */ + wcstring fmt; + + /* Create a null-terminated copy of the % directive, with an + intmax_t-wide length modifier substituted for any existing + integer length modifier. */ + { + wchar_t *q; + wchar_t const *length_modifier; + size_t length_modifier_len; + + switch (conversion) + { + case L'd': case L'i': case L'o': case L'u': case L'x': case L'X': + length_modifier = PRIdMAX; + length_modifier_len = sizeof PRIdMAX - 2; + break; + + case L'a': case L'e': case L'f': case L'g': + case L'A': case L'E': case L'F': case L'G': + length_modifier = L"L"; + length_modifier_len = 1; + break; + + default: + length_modifier = start; /* Any valid pointer will do. */ + length_modifier_len = 0; + break; + } + + p = static_cast(malloc (length + length_modifier_len + 2)); + q = static_cast(mempcpy (p, start, length)); + q = static_cast(mempcpy (q, length_modifier, length_modifier_len)); + *q++ = conversion; + *q = L'\0'; + } + + append_format(fmt, L"%%l%lc", conversion); + switch (conversion) + { + case L'd': + case L'i': + { + intmax_t arg = vstrtoimax (argument); + if (!have_field_width) + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), arg); + else + append_format(stdout_buffer, fmt.c_str(), precision, arg); + } + else + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), field_width, arg); + else + append_format(stdout_buffer, fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'o': + case L'u': + case L'x': + case L'X': + { + + uintmax_t arg = vstrtoumax (argument); + if (!have_field_width) + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), arg); + else + append_format(stdout_buffer, fmt.c_str(), precision, arg); + } + else + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), field_width, arg); + else + append_format(stdout_buffer, fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'a': + case L'A': + case L'e': + case L'E': + case L'f': + case L'F': + case L'g': + case L'G': + { +debug(0, "Field width %d, Precision %d", field_width, precision); + + long double arg = vstrtold (argument); + if (!have_field_width) + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), arg); + else + append_format(stdout_buffer, fmt.c_str(), precision, arg); + } + else + { + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), field_width, arg); + else + append_format(stdout_buffer, fmt.c_str(), field_width, precision, arg); + } + } + break; + + case L'c': + if (!have_field_width) + append_format(stdout_buffer, fmt.c_str(), *argument); + else + append_format(stdout_buffer, fmt.c_str(), field_width, *argument); + break; + + case L's': + +debug(0, "Field width %d, Precision %d", field_width, precision); + if (!have_field_width) + { + if (!have_precision){ + append_format(stdout_buffer, fmt.c_str(), argument);} + else + append_format(stdout_buffer, fmt.c_str(), precision, argument); + } + else + { + + if (!have_precision) + append_format(stdout_buffer, fmt.c_str(), field_width, argument); + else + append_format(stdout_buffer, fmt.c_str(), field_width, precision, argument); + } + break; + } + + free (p); +} + +/* Print the text in FORMAT, using ARGV (with ARGC elements) for + arguments to any `%' directives. + Return the number of elements of ARGV used. */ + + static int print_formatted (const wchar_t *format, int argc, wchar_t **argv) + { + int save_argc = argc; /* Preserve original value. */ + const wchar_t *f; /* Pointer into `format'. */ + const wchar_t *direc_start; /* Start of % directive. */ + size_t direc_length; /* Length of % directive. */ + bool have_field_width; /* True if FIELD_WIDTH is valid. */ + int field_width = 0; /* Arg to first '*'. */ + bool have_precision; /* True if PRECISION is valid. */ + int precision = 0; /* Arg to second '*'. */ + wchar_t ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */ + + for (f = format; *f != L'\0'; ++f) + { + switch (*f) + { + case L'%': + direc_start = f++; + direc_length = 1; + have_field_width = have_precision = false; + if (*f == L'%') + { + append_format(stdout_buffer, L"%lc", L'%'); + break; + } + if (*f == L'b') + { + /* FIXME: Field width and precision are not supported + for %b, even though POSIX requires it. */ + if (argc > 0) + { + print_esc_string (*argv); + ++argv; + --argc; + } + break; + } + + memset (ok, 0, sizeof ok); + ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] = + ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = + ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1; + + for (;; f++, direc_length++) + switch (*f) + { +#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__ + case L'I': +#endif + case L'\'': + ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = + ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; + break; + case '-': case '+': case ' ': + break; + case L'#': + ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; + break; + case '0': + ok['c'] = ok['s'] = 0; + break; + default: + goto no_more_flag_characters; + } + no_more_flag_characters:; + + if (*f == L'*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t width = vstrtoimax (*argv); + if (INT_MIN <= width && width <= INT_MAX) + field_width = width; + else + append_format(stderr_buffer, _(L"invalid field width: %ls"), + *argv); + ++argv; + --argc; + } + else + field_width = 0; + have_field_width = true; + } + else + while (iswdigit(*f)) + { + ++f; + ++direc_length; + } + if (*f == L'.') + { + ++f; + ++direc_length; + ok['c'] = 0; + if (*f == L'*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t prec = vstrtoimax (*argv); + if (prec < 0) + { + /* A negative precision is taken as if the + precision were omitted, so -1 is safe + here even if prec < INT_MIN. */ + precision = -1; + } + else if (INT_MAX < prec) + append_format(stderr_buffer, _(L"invalid precision: %ls"), + *argv); + else + precision = prec; + ++argv; + --argc; + } + else + precision = 0; + have_precision = true; + } + else + while (iswdigit(*f)) + { + ++f; + ++direc_length; + } + } + + while (*f == L'l' || *f == L'L' || *f == L'h' + || *f == L'j' || *f == L't' || *f == L'z') + ++f; + + { + unsigned wchar_t conversion = *f; + if (! ok[conversion]) + append_format(stderr_buffer, + _("%.*ls: invalid conversion specification"), + (int) (f + 1 - direc_start), direc_start); + } + + print_direc (direc_start, direc_length, *f, + have_field_width, field_width, + have_precision, precision, + (argc <= 0 ? L"" : (argc--, *argv++))); + break; + + case L'\\': + f += print_esc (f, false); + break; + + default: + append_format (stdout_buffer, L"%lc", *f); + } + } + + return save_argc - argc; +} + +static int builtin_printf(parser_t &parser, wchar_t **argv) +{ + wchar_t *format; + int args_used; + int argc=builtin_count_args(argv); + + if (argc <= 1) + { + append_format(stderr_buffer, _(L"missing operand")); + return EXIT_FAILURE; + } + + format = argv[1]; + argc -= 2; + argv += 2; + + do + { + args_used = print_formatted (format, argc, argv); + argc -= args_used; + argv += args_used; + } + while (args_used > 0 && argc > 0); +} From 6ff88a44f0c1cb2e16f948fbd7705d1a68e6ecac Mon Sep 17 00:00:00 2001 From: Siteshwar Vashisht Date: Sat, 2 Mar 2013 16:02:20 +0100 Subject: [PATCH 2/2] Fixed code to support field width and precision --- builtin_printf.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/builtin_printf.cpp b/builtin_printf.cpp index b633edd16..e5f0e4ae1 100644 --- a/builtin_printf.cpp +++ b/builtin_printf.cpp @@ -97,7 +97,7 @@ C_STRTOD (wchar_t const *nptr, wchar_t **endptr) if (!saved_locale.empty()) { - setlocale (LC_NUMERIC, "C"); + wsetlocale (LC_NUMERIC, L"C"); } r = STRTOD (nptr, endptr); @@ -322,21 +322,24 @@ static void print_direc (const wchar_t *start, size_t length, wchar_t conversion length_modifier = L"L"; length_modifier_len = 1; break; - + case L's': + length_modifier = L"l"; + length_modifier_len = 1; + break; default: length_modifier = start; /* Any valid pointer will do. */ length_modifier_len = 0; break; } - p = static_cast(malloc (length + length_modifier_len + 2)); - q = static_cast(mempcpy (p, start, length)); - q = static_cast(mempcpy (q, length_modifier, length_modifier_len)); + p = new wchar_t[length + length_modifier_len + 2]; + q = static_cast(mempcpy (p, start, sizeof(wchar_t) * length)); + q = static_cast(mempcpy (q, length_modifier, sizeof(wchar_t) * length_modifier_len)); *q++ = conversion; *q = L'\0'; } - - append_format(fmt, L"%%l%lc", conversion); + + fmt = p; switch (conversion) { case L'd': @@ -393,7 +396,6 @@ static void print_direc (const wchar_t *start, size_t length, wchar_t conversion case L'g': case L'G': { -debug(0, "Field width %d, Precision %d", field_width, precision); long double arg = vstrtold (argument); if (!have_field_width) @@ -421,8 +423,6 @@ debug(0, "Field width %d, Precision %d", field_width, precision); break; case L's': - -debug(0, "Field width %d, Precision %d", field_width, precision); if (!have_field_width) { if (!have_precision){ @@ -529,16 +529,18 @@ debug(0, "Field width %d, Precision %d", field_width, precision); ++argv; --argc; } - else - field_width = 0; + else{ + field_width = 0; + } have_field_width = true; } - else + else { while (iswdigit(*f)) { ++f; ++direc_length; } + } if (*f == L'.') { ++f;