[string] Instance the argument parsing

This adds a new class arg_iterator_t which encapsulates decisions about
whether to read arguments from stdin or argv. It also migrates the
unread bytes buffer from a static variable to an instance variable.
This commit is contained in:
ridiculousfish 2017-12-21 12:42:57 -08:00
parent 94ff789c4b
commit 0c55b79cfc

View file

@ -62,50 +62,68 @@ static bool string_args_from_stdin(const io_streams_t &streams) {
return streams.stdin_is_directly_redirected; return streams.stdin_is_directly_redirected;
} }
static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) { static const wchar_t *string_get_arg_argv(int *argidx, const wchar_t *const *argv) {
// We might read more than a line - store the rest in a static buffer. return argv && argv[*argidx] ? argv[(*argidx)++] : NULL;
static std::string buffer; }
// A helper type for extracting arguments from either argv or stdin.
namespace {
class arg_iterator_t {
// The list of arguments passed to the string builtin.
const wchar_t *const *argv_;
// If using argv, index of the next argument to return.
int argidx_;
// If not using argv, a string to store bytes that have been read but not yet returned.
std::string buffer_;
// Backing storage for the next() string.
wcstring storage_;
const io_streams_t &streams_;
/// \return the next argument from stdin
const wchar_t *get_arg_stdin() {
assert(string_args_from_stdin(streams_) && "should not be reading from stdin");
// Read in chunks from fd until buffer has a line. // Read in chunks from fd until buffer has a line.
size_t pos; size_t pos;
while ((pos = buffer.find('\n')) == std::string::npos) { while ((pos = buffer_.find('\n')) == std::string::npos) {
char buf[STRING_CHUNK_SIZE]; char buf[STRING_CHUNK_SIZE];
int n = read_blocked(streams.stdin_fd, buf, STRING_CHUNK_SIZE); long n = read_blocked(streams_.stdin_fd, buf, STRING_CHUNK_SIZE);
if (n == 0) { if (n == 0) {
// If we still have buffer contents, flush them, // If we still have buffer contents, flush them,
// in case there was no trailing '\n'. // in case there was no trailing '\n'.
if (buffer.empty()) return NULL; if (buffer_.empty()) return NULL;
*storage = str2wcstring(buffer); storage_ = str2wcstring(buffer_);
buffer.clear(); buffer_.clear();
return storage->c_str(); return storage_.c_str();
} }
if (n == -1) { if (n == -1) {
// Some error happened. We can't do anything about it, // Some error happened. We can't do anything about it,
// so ignore it. // so ignore it.
// (read_blocked already retries for EAGAIN and EINTR) // (read_blocked already retries for EAGAIN and EINTR)
*storage = str2wcstring(buffer); storage_ = str2wcstring(buffer_);
buffer.clear(); buffer_.clear();
return NULL; return NULL;
} }
buffer.append(buf, n); buffer_.append(buf, n);
} }
// Split the buffer on the '\n' and return the first part. // Split the buffer on the '\n' and return the first part.
*storage = str2wcstring(buffer, pos); storage_ = str2wcstring(buffer_, pos);
buffer.erase(0, pos + 1); buffer_.erase(0, pos + 1);
return storage->c_str(); return storage_.c_str();
} }
static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv) { public:
return argv && argv[*argidx] ? argv[(*argidx)++] : NULL; arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams)
} : argv_(argv), argidx_(argidx), streams_(streams) {}
static const wchar_t *string_get_arg(int *argidx, wchar_t **argv, wcstring *storage, /// \return the next argument, or null if the argument list is exhausted.
const io_streams_t &streams) { const wchar_t *next() {
if (string_args_from_stdin(streams)) { if (string_args_from_stdin(streams_)) {
return string_get_arg_stdin(storage, streams); return get_arg_stdin();
} }
return string_get_arg_argv(argidx, argv); return string_get_arg_argv(&argidx_, argv_);
}
};
} }
// This is used by the string subcommands to communicate with the option parser which flags are // This is used by the string subcommands to communicate with the option parser which flags are
@ -458,12 +476,12 @@ static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wc
/// Escape a string so that it can be used in a fish script without further word splitting. /// Escape a string so that it can be used in a fish script without further word splitting.
static int string_escape_script(options_t &opts, int optind, wchar_t **argv, static int string_escape_script(options_t &opts, int optind, wchar_t **argv,
io_streams_t &streams) { io_streams_t &streams) {
wcstring storage;
int nesc = 0; int nesc = 0;
escape_flags_t flags = ESCAPE_ALL; escape_flags_t flags = ESCAPE_ALL;
if (opts.no_quoted) flags |= ESCAPE_NO_QUOTED; if (opts.no_quoted) flags |= ESCAPE_NO_QUOTED;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
streams.out.append(escape_string(arg, flags, STRING_STYLE_SCRIPT)); streams.out.append(escape_string(arg, flags, STRING_STYLE_SCRIPT));
streams.out.append(L'\n'); streams.out.append(L'\n');
nesc++; nesc++;
@ -475,11 +493,11 @@ static int string_escape_script(options_t &opts, int optind, wchar_t **argv,
/// Escape a string so that it can be used as a URL. /// Escape a string so that it can be used as a URL.
static int string_escape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) { static int string_escape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
UNUSED(opts); UNUSED(opts);
wcstring storage;
int nesc = 0; int nesc = 0;
escape_flags_t flags = 0; escape_flags_t flags = 0;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
streams.out.append(escape_string(arg, flags, STRING_STYLE_URL)); streams.out.append(escape_string(arg, flags, STRING_STYLE_URL));
streams.out.append(L'\n'); streams.out.append(L'\n');
nesc++; nesc++;
@ -491,11 +509,11 @@ static int string_escape_url(options_t &opts, int optind, wchar_t **argv, io_str
/// Escape a string so that it can be used as a fish var name. /// Escape a string so that it can be used as a fish var name.
static int string_escape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) { static int string_escape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
UNUSED(opts); UNUSED(opts);
wcstring storage;
int nesc = 0; int nesc = 0;
escape_flags_t flags = 0; escape_flags_t flags = 0;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
streams.out.append(escape_string(arg, flags, STRING_STYLE_VAR)); streams.out.append(escape_string(arg, flags, STRING_STYLE_VAR));
streams.out.append(L'\n'); streams.out.append(L'\n');
nesc++; nesc++;
@ -508,11 +526,11 @@ static int string_escape_var(options_t &opts, int optind, wchar_t **argv, io_str
static int string_unescape_script(options_t &opts, int optind, wchar_t **argv, static int string_unescape_script(options_t &opts, int optind, wchar_t **argv,
io_streams_t &streams) { io_streams_t &streams) {
UNUSED(opts); UNUSED(opts);
wcstring storage;
int nesc = 0; int nesc = 0;
unescape_flags_t flags = 0; unescape_flags_t flags = 0;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
wcstring result; wcstring result;
if (unescape_string(arg, &result, flags, STRING_STYLE_SCRIPT)) { if (unescape_string(arg, &result, flags, STRING_STYLE_SCRIPT)) {
streams.out.append(result); streams.out.append(result);
@ -527,11 +545,11 @@ static int string_unescape_script(options_t &opts, int optind, wchar_t **argv,
/// Unescape an encoded URL. /// Unescape an encoded URL.
static int string_unescape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) { static int string_unescape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
UNUSED(opts); UNUSED(opts);
wcstring storage;
int nesc = 0; int nesc = 0;
unescape_flags_t flags = 0; unescape_flags_t flags = 0;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
wcstring result; wcstring result;
if (unescape_string(arg, &result, flags, STRING_STYLE_URL)) { if (unescape_string(arg, &result, flags, STRING_STYLE_URL)) {
streams.out.append(result); streams.out.append(result);
@ -546,11 +564,11 @@ static int string_unescape_url(options_t &opts, int optind, wchar_t **argv, io_s
/// Unescape an encoded var name. /// Unescape an encoded var name.
static int string_unescape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) { static int string_unescape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
UNUSED(opts); UNUSED(opts);
wcstring storage;
int nesc = 0; int nesc = 0;
unescape_flags_t flags = 0; unescape_flags_t flags = 0;
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = aiter.next()) {
wcstring result; wcstring result;
if (unescape_string(arg, &result, flags, STRING_STYLE_VAR)) { if (unescape_string(arg, &result, flags, STRING_STYLE_VAR)) {
streams.out.append(result); streams.out.append(result);
@ -617,9 +635,8 @@ static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_
const wchar_t *sep = opts.arg1; const wchar_t *sep = opts.arg1;
int nargs = 0; int nargs = 0;
const wchar_t *arg; arg_iterator_t aiter(argv, optind, streams);
wcstring storage; while (const wchar_t *arg = aiter.next()) {
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) {
if (!opts.quiet) { if (!opts.quiet) {
if (nargs > 0) { if (nargs > 0) {
streams.out.append(sep); streams.out.append(sep);
@ -642,10 +659,9 @@ static int string_length(parser_t &parser, io_streams_t &streams, int argc, wcha
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
if (retval != STATUS_CMD_OK) return retval; if (retval != STATUS_CMD_OK) return retval;
const wchar_t *arg;
int nnonempty = 0; int nnonempty = 0;
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) { while (const wchar_t *arg = aiter.next()) {
size_t n = wcslen(arg); size_t n = wcslen(arg);
if (n > 0) { if (n > 0) {
nnonempty++; nnonempty++;
@ -922,9 +938,8 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar
matcher = make_unique<wildcard_matcher_t>(cmd, pattern, opts, streams); matcher = make_unique<wildcard_matcher_t>(cmd, pattern, opts, streams);
} }
const wchar_t *arg; arg_iterator_t aiter(argv, optind, streams);
wcstring storage; while (const wchar_t *arg = aiter.next()) {
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) {
if (!matcher->report_matches(arg)) { if (!matcher->report_matches(arg)) {
return STATUS_INVALID_ARGS; return STATUS_INVALID_ARGS;
} }
@ -1100,8 +1115,8 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, wch
replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams); replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams);
} }
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { while (const wchar_t *arg = aiter.next()) {
if (!replacer->replace_matches(arg)) return STATUS_INVALID_ARGS; if (!replacer->replace_matches(arg)) return STATUS_INVALID_ARGS;
} }
@ -1123,9 +1138,8 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar
wcstring_list_t splits; wcstring_list_t splits;
size_t arg_count = 0; size_t arg_count = 0;
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
const wchar_t *arg; while (const wchar_t *arg = aiter.next()) {
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) {
const wchar_t *arg_end = arg + wcslen(arg); const wchar_t *arg_end = arg + wcslen(arg);
if (opts.right) { if (opts.right) {
typedef std::reverse_iterator<const wchar_t *> reverser; typedef std::reverse_iterator<const wchar_t *> reverser;
@ -1189,10 +1203,10 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, wcha
if (retval != STATUS_CMD_OK) return retval; if (retval != STATUS_CMD_OK) return retval;
const wchar_t *to_repeat; const wchar_t *to_repeat;
wcstring storage;
bool is_empty = true; bool is_empty = true;
if ((to_repeat = string_get_arg(&optind, argv, &storage, streams)) != NULL && *to_repeat) { arg_iterator_t aiter(argv, optind, streams);
if ((to_repeat = aiter.next()) != NULL && *to_repeat) {
const wcstring word(to_repeat); const wcstring word(to_repeat);
const bool limit_repeat = const bool limit_repeat =
(opts.max > 0 && word.length() * opts.count > (size_t)opts.max) || !opts.count; (opts.max > 0 && word.length() * opts.count > (size_t)opts.max) || !opts.count;
@ -1220,9 +1234,8 @@ static int string_sub(parser_t &parser, io_streams_t &streams, int argc, wchar_t
if (retval != STATUS_CMD_OK) return retval; if (retval != STATUS_CMD_OK) return retval;
int nsub = 0; int nsub = 0;
const wchar_t *arg; arg_iterator_t aiter(argv, optind, streams);
wcstring storage; while (const wchar_t *arg = aiter.next()) {
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != NULL) {
typedef wcstring::size_type size_type; typedef wcstring::size_type size_type;
size_type pos = 0; size_type pos = 0;
size_type count = wcstring::npos; size_type count = wcstring::npos;
@ -1268,12 +1281,11 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_
opts.left = opts.right = true; opts.left = opts.right = true;
} }
const wchar_t *arg;
size_t ntrim = 0; size_t ntrim = 0;
wcstring argstr; wcstring argstr;
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) { while (const wchar_t *arg = aiter.next()) {
argstr = arg; argstr = arg;
// Begin and end are respectively the first character to keep on the left, and first // Begin and end are respectively the first character to keep on the left, and first
// character to trim on the right. The length is thus end - start. // character to trim on the right. The length is thus end - start.
@ -1306,8 +1318,8 @@ static int string_lower(parser_t &parser, io_streams_t &streams, int argc, wchar
if (retval != STATUS_CMD_OK) return retval; if (retval != STATUS_CMD_OK) return retval;
int n_transformed = 0; int n_transformed = 0;
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { while (const wchar_t *arg = aiter.next()) {
wcstring transformed(arg); wcstring transformed(arg);
std::transform(transformed.begin(), transformed.end(), transformed.begin(), std::towlower); std::transform(transformed.begin(), transformed.end(), transformed.begin(), std::towlower);
if (wcscmp(transformed.c_str(), arg)) n_transformed++; if (wcscmp(transformed.c_str(), arg)) n_transformed++;
@ -1329,8 +1341,8 @@ static int string_upper(parser_t &parser, io_streams_t &streams, int argc, wchar
if (retval != STATUS_CMD_OK) return retval; if (retval != STATUS_CMD_OK) return retval;
int n_transformed = 0; int n_transformed = 0;
wcstring storage; arg_iterator_t aiter(argv, optind, streams);
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) { while (const wchar_t *arg = aiter.next()) {
wcstring transformed(arg); wcstring transformed(arg);
std::transform(transformed.begin(), transformed.end(), transformed.begin(), std::towupper); std::transform(transformed.begin(), transformed.end(), transformed.begin(), std::towupper);
if (wcscmp(transformed.c_str(), arg)) n_transformed++; if (wcscmp(transformed.c_str(), arg)) n_transformed++;