From 1d4a0fb091f649634d90149f2a4c1cb14a7ae23f Mon Sep 17 00:00:00 2001
From: Kurtis Rader <krader@skepticism.us>
Date: Tue, 13 Jun 2017 17:48:47 -0700
Subject: [PATCH] split builtin echo into its own module

---
 Makefile.in          |   2 +-
 src/builtin.cpp      | 270 +-----------------------------------------
 src/builtin_echo.cpp | 275 +++++++++++++++++++++++++++++++++++++++++++
 src/builtin_echo.h   |   9 ++
 4 files changed, 286 insertions(+), 270 deletions(-)
 create mode 100644 src/builtin_echo.cpp
 create mode 100644 src/builtin_echo.h

diff --git a/Makefile.in b/Makefile.in
index 0da541e59..ddf8ff1dd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -101,7 +101,7 @@ HAVE_DOXYGEN=@HAVE_DOXYGEN@
 FISH_OBJS := obj/autoload.o obj/builtin.o obj/builtin_bind.o obj/builtin_block.o \
 	obj/builtin_commandline.o obj/builtin_emit.o obj/builtin_functions.o \
 	obj/builtin_history.o obj/builtin_status.o obj/builtin_read.o \
-	obj/builtin_random.o \
+	obj/builtin_random.o obj/builtin_echo.o \
 	obj/builtin_function.o \
 	obj/builtin_complete.o obj/builtin_jobs.o obj/builtin_printf.o \
 	obj/builtin_set.o obj/builtin_set_color.o obj/builtin_string.o \
diff --git a/src/builtin.cpp b/src/builtin.cpp
index 6e5e798c5..93ddbe6a3 100644
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@@ -19,7 +19,6 @@
 
 #include <errno.h>
 #include <fcntl.h>
-#include <limits.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -38,8 +37,8 @@
 #include "builtin_block.h"
 #include "builtin_commandline.h"
 #include "builtin_complete.h"
+#include "builtin_echo.h"
 #include "builtin_emit.h"
-#include "builtin_function.h"
 #include "builtin_functions.h"
 #include "builtin_history.h"
 #include "builtin_jobs.h"
@@ -55,20 +54,16 @@
 #include "common.h"
 #include "complete.h"
 #include "env.h"
-#include "event.h"
 #include "exec.h"
 #include "fallback.h"  // IWYU pragma: keep
-#include "function.h"
 #include "intern.h"
 #include "io.h"
 #include "parse_constants.h"
 #include "parse_util.h"
 #include "parser.h"
-#include "parser_keywords.h"
 #include "path.h"
 #include "proc.h"
 #include "reader.h"
-#include "signal.h"
 #include "tokenizer.h"
 #include "wgetopt.h"
 #include "wutil.h"  // IWYU pragma: keep
@@ -389,269 +384,6 @@ static int builtin_generic(parser_t &parser, io_streams_t &streams, wchar_t **ar
     return STATUS_CMD_ERROR;
 }
 
-// Convert a octal or hex character to its binary value. Surprisingly a version
-// of this function using a lookup table is only ~1.5% faster than the `switch`
-// statement version below. Since that requires initializing a table statically
-// (which is problematic if we run on an EBCDIC system) we don't use that
-// solution. Also, we relax the style rule that `case` blocks should always be
-// enclosed in parentheses given the nature of this code.
-static unsigned int builtin_echo_digit(wchar_t wc, unsigned int base) {
-    assert(base == 8 || base == 16);  // base must be hex or octal
-    switch (wc) {
-        case L'0':
-            return 0;
-        case L'1':
-            return 1;
-        case L'2':
-            return 2;
-        case L'3':
-            return 3;
-        case L'4':
-            return 4;
-        case L'5':
-            return 5;
-        case L'6':
-            return 6;
-        case L'7':
-            return 7;
-        default: { break; }
-    }
-
-    if (base != 16) return UINT_MAX;
-
-    switch (wc) {
-        case L'8':
-            return 8;
-        case L'9':
-            return 9;
-        case L'a':
-        case L'A':
-            return 10;
-        case L'b':
-        case L'B':
-            return 11;
-        case L'c':
-        case L'C':
-            return 12;
-        case L'd':
-        case L'D':
-            return 13;
-        case L'e':
-        case L'E':
-            return 14;
-        case L'f':
-        case L'F':
-            return 15;
-        default: { break; }
-    }
-
-    return UINT_MAX;
-}
-
-/// Parse a numeric escape sequence in str, returning whether we succeeded. Also return the number
-/// of characters consumed and the resulting value. Supported escape sequences:
-///
-/// \0nnn: octal value, zero to three digits
-/// \nnn: octal value, one to three digits
-/// \xhh: hex value, one to two digits
-static bool builtin_echo_parse_numeric_sequence(const wchar_t *str, size_t *consumed,
-                                                unsigned char *out_val) {
-    bool success = false;
-    unsigned int start = 0;  // the first character of the numeric part of the sequence
-
-    unsigned int base = 0, max_digits = 0;
-    if (builtin_echo_digit(str[0], 8) != UINT_MAX) {
-        // Octal escape
-        base = 8;
-
-        // If the first digit is a 0, we allow four digits (including that zero); otherwise, we
-        // allow 3.
-        max_digits = (str[0] == L'0' ? 4 : 3);
-    } else if (str[0] == L'x') {
-        // Hex escape
-        base = 16;
-        max_digits = 2;
-
-        // Skip the x
-        start = 1;
-    }
-
-    if (base == 0) {
-        return success;
-    }
-
-    unsigned int idx;
-    unsigned char val = 0;  // resulting character
-    for (idx = start; idx < start + max_digits; idx++) {
-        unsigned int digit = builtin_echo_digit(str[idx], base);
-        if (digit == UINT_MAX) break;
-        val = val * base + digit;
-    }
-
-    // We succeeded if we consumed at least one digit.
-    if (idx > start) {
-        *consumed = idx;
-        *out_val = val;
-        success = true;
-    }
-    return success;
-}
-
-/// The echo builtin.
-///
-/// Bash only respects -n if it's the first argument. We'll do the same. We also support a new
-/// option -s to mean "no spaces"
-static int builtin_echo(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
-    UNUSED(parser);
-    // Skip first arg
-    if (!*argv++) return STATUS_INVALID_ARGS;
-
-    // Process options. Options must come at the beginning - the first non-option kicks us out.
-    bool print_newline = true, print_spaces = true, interpret_special_chars = false;
-    size_t option_idx = 0;
-    for (option_idx = 0; argv[option_idx] != NULL; option_idx++) {
-        const wchar_t *arg = argv[option_idx];
-        assert(arg != NULL);
-        bool arg_is_valid_option = false;
-        if (arg[0] == L'-') {
-            // We have a leading dash. Ensure that every subseqnent character is a valid option.
-            size_t i = 1;
-            while (arg[i] != L'\0' && wcschr(L"nesE", arg[i]) != NULL) {
-                i++;
-            }
-            // We must have at least two characters to be a valid option, and have consumed the
-            // whole string.
-            arg_is_valid_option = (i >= 2 && arg[i] == L'\0');
-        }
-
-        if (!arg_is_valid_option) {
-            // This argument is not an option, so there are no more options.
-            break;
-        }
-
-        // Ok, we are sure the argument is an option. Parse it.
-        assert(arg_is_valid_option);
-        for (size_t i = 1; arg[i] != L'\0'; i++) {
-            switch (arg[i]) {
-                case L'n': {
-                    print_newline = false;
-                    break;
-                }
-                case L'e': {
-                    interpret_special_chars = true;
-                    break;
-                }
-                case L's': {
-                    print_spaces = false;
-                    break;
-                }
-                case L'E': {
-                    interpret_special_chars = false;
-                    break;
-                }
-                default: {
-                    DIE("unexpected character in builtin_echo argument");
-                    break;
-                }
-            }
-        }
-    }
-
-    // The special character \c can be used to indicate no more output.
-    bool continue_output = true;
-
-    /* Skip over the options */
-    const wchar_t *const *args_to_echo = argv + option_idx;
-    for (size_t idx = 0; continue_output && args_to_echo[idx] != NULL; idx++) {
-        if (print_spaces && idx > 0) {
-            streams.out.push_back(' ');
-        }
-
-        const wchar_t *str = args_to_echo[idx];
-        for (size_t j = 0; continue_output && str[j]; j++) {
-            if (!interpret_special_chars || str[j] != L'\\') {
-                // Not an escape.
-                streams.out.push_back(str[j]);
-            } else {
-                // Most escapes consume one character in addition to the backslash; the numeric
-                // sequences may consume more, while an unrecognized escape sequence consumes none.
-                wchar_t wc;
-                size_t consumed = 1;
-                switch (str[j + 1]) {
-                    case L'a': {
-                        wc = L'\a';
-                        break;
-                    }
-                    case L'b': {
-                        wc = L'\b';
-                        break;
-                    }
-                    case L'e': {
-                        wc = L'\e';
-                        break;
-                    }
-                    case L'f': {
-                        wc = L'\f';
-                        break;
-                    }
-                    case L'n': {
-                        wc = L'\n';
-                        break;
-                    }
-                    case L'r': {
-                        wc = L'\r';
-                        break;
-                    }
-                    case L't': {
-                        wc = L'\t';
-                        break;
-                    }
-                    case L'v': {
-                        wc = L'\v';
-                        break;
-                    }
-                    case L'\\': {
-                        wc = L'\\';
-                        break;
-                    }
-                    case L'c': {
-                        wc = 0;
-                        continue_output = false;
-                        break;
-                    }
-                    default: {
-                        // Octal and hex escape sequences.
-                        unsigned char narrow_val = 0;
-                        if (builtin_echo_parse_numeric_sequence(str + j + 1, &consumed,
-                                                                &narrow_val)) {
-                            // Here consumed must have been set to something. The narrow_val is a
-                            // literal byte that we want to output (#1894).
-                            wc = ENCODE_DIRECT_BASE + narrow_val % 256;
-                        } else {
-                            // Not a recognized escape. We consume only the backslash.
-                            wc = L'\\';
-                            consumed = 0;
-                        }
-                        break;
-                    }
-                }
-
-                // Skip over characters that were part of this escape sequence (but not the
-                // backslash, which will be handled by the loop increment.
-                j += consumed;
-
-                if (continue_output) {
-                    streams.out.push_back(wc);
-                }
-            }
-        }
-    }
-    if (print_newline && continue_output) {
-        streams.out.push_back('\n');
-    }
-    return STATUS_CMD_OK;
-}
-
 /// The pwd builtin. We don't respect -P to resolve symbolic links because we
 /// try to always resolve them.
 static int builtin_pwd(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
diff --git a/src/builtin_echo.cpp b/src/builtin_echo.cpp
new file mode 100644
index 000000000..9b244c95a
--- /dev/null
+++ b/src/builtin_echo.cpp
@@ -0,0 +1,275 @@
+// Implementation of the echo builtin.
+#include "config.h"  // IWYU pragma: keep
+
+#include <limits.h>
+#include <stddef.h>
+
+#include "builtin.h"
+#include "builtin_echo.h"
+#include "common.h"
+#include "fallback.h"  // IWYU pragma: keep
+#include "io.h"
+#include "wgetopt.h"
+#include "wutil.h"  // IWYU pragma: keep
+
+struct cmd_opts {
+    bool print_help = false;
+    bool print_newline = true;
+    bool print_spaces = true;
+    bool interpret_special_chars = false;
+};
+static const wchar_t *short_options = L"+Eens";
+static const struct woption *long_options = NULL;
+
+static int parse_cmd_opts(struct cmd_opts *opts, int *optind, int argc, wchar_t **argv,
+                          parser_t &parser, io_streams_t &streams) {
+    UNUSED(parser);
+    UNUSED(streams);
+    int opt;
+    wgetopter_t w;
+    while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, NULL)) != -1) {
+        switch (opt) {
+            case 'n': {
+                opts->print_newline = false;
+                break;
+            }
+            case 'e': {
+                opts->interpret_special_chars = true;
+                break;
+            }
+            case 's': {
+                opts->print_spaces = false;
+                break;
+            }
+            case 'E': {
+                opts->interpret_special_chars = false;
+                break;
+            }
+            case '?': {
+                *optind = w.woptind - 1;
+                return STATUS_CMD_OK;
+            }
+            default: {
+                DIE("unexpected retval from wgetopt_long");
+                break;
+            }
+        }
+    }
+
+    *optind = w.woptind;
+    return STATUS_CMD_OK;
+}
+
+// Convert a octal or hex character to its binary value. Surprisingly a version
+// of this function using a lookup table is only ~1.5% faster than the `switch`
+// statement version below. Since that requires initializing a table statically
+// (which is problematic if we run on an EBCDIC system) we don't use that
+// solution. Also, we relax the style rule that `case` blocks should always be
+// enclosed in parentheses given the nature of this code.
+static unsigned int builtin_echo_digit(wchar_t wc, unsigned int base) {
+    assert(base == 8 || base == 16);  // base must be hex or octal
+    switch (wc) {
+        case L'0':
+            return 0;
+        case L'1':
+            return 1;
+        case L'2':
+            return 2;
+        case L'3':
+            return 3;
+        case L'4':
+            return 4;
+        case L'5':
+            return 5;
+        case L'6':
+            return 6;
+        case L'7':
+            return 7;
+        default: { break; }
+    }
+
+    if (base != 16) return UINT_MAX;
+
+    switch (wc) {
+        case L'8':
+            return 8;
+        case L'9':
+            return 9;
+        case L'a':
+        case L'A':
+            return 10;
+        case L'b':
+        case L'B':
+            return 11;
+        case L'c':
+        case L'C':
+            return 12;
+        case L'd':
+        case L'D':
+            return 13;
+        case L'e':
+        case L'E':
+            return 14;
+        case L'f':
+        case L'F':
+            return 15;
+        default: { break; }
+    }
+
+    return UINT_MAX;
+}
+
+/// Parse a numeric escape sequence in str, returning whether we succeeded. Also return the number
+/// of characters consumed and the resulting value. Supported escape sequences:
+///
+/// \0nnn: octal value, zero to three digits
+/// \nnn: octal value, one to three digits
+/// \xhh: hex value, one to two digits
+static bool builtin_echo_parse_numeric_sequence(const wchar_t *str, size_t *consumed,
+                                                unsigned char *out_val) {
+    bool success = false;
+    unsigned int start = 0;  // the first character of the numeric part of the sequence
+
+    unsigned int base = 0, max_digits = 0;
+    if (builtin_echo_digit(str[0], 8) != UINT_MAX) {
+        // Octal escape
+        base = 8;
+
+        // If the first digit is a 0, we allow four digits (including that zero); otherwise, we
+        // allow 3.
+        max_digits = (str[0] == L'0' ? 4 : 3);
+    } else if (str[0] == L'x') {
+        // Hex escape
+        base = 16;
+        max_digits = 2;
+
+        // Skip the x
+        start = 1;
+    }
+
+    if (base == 0) {
+        return success;
+    }
+
+    unsigned int idx;
+    unsigned char val = 0;  // resulting character
+    for (idx = start; idx < start + max_digits; idx++) {
+        unsigned int digit = builtin_echo_digit(str[idx], base);
+        if (digit == UINT_MAX) break;
+        val = val * base + digit;
+    }
+
+    // We succeeded if we consumed at least one digit.
+    if (idx > start) {
+        *consumed = idx;
+        *out_val = val;
+        success = true;
+    }
+    return success;
+}
+
+/// The echo builtin.
+///
+/// Bash only respects -n if it's the first argument. We'll do the same. We also support a new,
+/// fish specific, option -s to mean "no spaces".
+int builtin_echo(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
+    wchar_t *cmd = argv[0];
+    int argc = builtin_count_args(argv);
+    struct cmd_opts opts;
+    int optind;
+    int retval = parse_cmd_opts(&opts, &optind, argc, argv, parser, streams);
+    if (retval != STATUS_CMD_OK) return retval;
+
+    // The special character \c can be used to indicate no more output.
+    bool continue_output = true;
+
+    const wchar_t *const *args_to_echo = argv + optind;
+    for (size_t idx = 0; continue_output && args_to_echo[idx] != NULL; idx++) {
+        if (opts.print_spaces && idx > 0) {
+            streams.out.push_back(' ');
+        }
+
+        const wchar_t *str = args_to_echo[idx];
+        for (size_t j = 0; continue_output && str[j]; j++) {
+            if (!opts.interpret_special_chars || str[j] != L'\\') {
+                // Not an escape.
+                streams.out.push_back(str[j]);
+            } else {
+                // Most escapes consume one character in addition to the backslash; the numeric
+                // sequences may consume more, while an unrecognized escape sequence consumes none.
+                wchar_t wc;
+                size_t consumed = 1;
+                switch (str[j + 1]) {
+                    case L'a': {
+                        wc = L'\a';
+                        break;
+                    }
+                    case L'b': {
+                        wc = L'\b';
+                        break;
+                    }
+                    case L'e': {
+                        wc = L'\e';
+                        break;
+                    }
+                    case L'f': {
+                        wc = L'\f';
+                        break;
+                    }
+                    case L'n': {
+                        wc = L'\n';
+                        break;
+                    }
+                    case L'r': {
+                        wc = L'\r';
+                        break;
+                    }
+                    case L't': {
+                        wc = L'\t';
+                        break;
+                    }
+                    case L'v': {
+                        wc = L'\v';
+                        break;
+                    }
+                    case L'\\': {
+                        wc = L'\\';
+                        break;
+                    }
+                    case L'c': {
+                        wc = 0;
+                        continue_output = false;
+                        break;
+                    }
+                    default: {
+                        // Octal and hex escape sequences.
+                        unsigned char narrow_val = 0;
+                        if (builtin_echo_parse_numeric_sequence(str + j + 1, &consumed,
+                                                                &narrow_val)) {
+                            // Here consumed must have been set to something. The narrow_val is a
+                            // literal byte that we want to output (#1894).
+                            wc = ENCODE_DIRECT_BASE + narrow_val % 256;
+                        } else {
+                            // Not a recognized escape. We consume only the backslash.
+                            wc = L'\\';
+                            consumed = 0;
+                        }
+                        break;
+                    }
+                }
+
+                // Skip over characters that were part of this escape sequence (but not the
+                // backslash, which will be handled by the loop increment.
+                j += consumed;
+
+                if (continue_output) {
+                    streams.out.push_back(wc);
+                }
+            }
+        }
+    }
+    if (opts.print_newline && continue_output) {
+        streams.out.push_back('\n');
+    }
+    return STATUS_CMD_OK;
+}
diff --git a/src/builtin_echo.h b/src/builtin_echo.h
new file mode 100644
index 000000000..ecf133700
--- /dev/null
+++ b/src/builtin_echo.h
@@ -0,0 +1,9 @@
+// Prototypes for executing builtin_echo function.
+#ifndef FISH_BUILTIN_ECHO_H
+#define FISH_BUILTIN_ECHO_H
+
+class parser_t;
+struct io_streams_t;
+
+int builtin_echo(parser_t &parser, io_streams_t &streams, wchar_t **argv);
+#endif