implement argparse --exclusive

Fixes #4190
This commit is contained in:
Kurtis Rader 2017-07-10 20:58:19 -07:00
parent 277999adef
commit e260c42a13
2 changed files with 235 additions and 108 deletions

View file

@ -117,6 +117,9 @@
9C7A557D1DCD71890049C25D /* fish_key_reader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */; };
9C7A557E1DCD71CD0049C25D /* print_help.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855613B3ACEE0099B651 /* print_help.cpp */; };
9C7A55811DCD739C0049C25D /* fish_key_reader in CopyFiles */ = {isa = PBXBuildFile; fileRef = 9C7A55721DCD71330049C25D /* fish_key_reader */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
CB0F034C1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; };
CB0F034D1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; };
CB0F034E1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; };
D001B5EE1F041CBD000838CC /* builtin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F592F1F041AE4003EE978 /* builtin.cpp */; };
D001B5F01F041CBD000838CC /* builtin_ulimit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F59311F041AE4003EE978 /* builtin_ulimit.cpp */; };
D001B5F21F041CBD000838CC /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F59331F041AE4003EE978 /* builtin_test.cpp */; };
@ -647,8 +650,10 @@
63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_find_bracket.c; sourceTree = "<group>"; };
9C7A55721DCD71330049C25D /* fish_key_reader */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_key_reader; sourceTree = BUILT_PRODUCTS_DIR; };
9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_key_reader.cpp; sourceTree = "<group>"; };
CBB772591F11F93F00780A21 /* builtin_argparse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_argparse.cpp; sourceTree = "<group>"; };
CBB7725A1F11F93F00780A21 /* builtin_argparse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin_argparse.h; sourceTree = "<group>"; };
CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_argparse.cpp; sourceTree = "<group>"; };
CB0F034B1F156FE3001827D3 /* builtin_argparse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin_argparse.h; sourceTree = "<group>"; };
CBB7725B1F14964100780A21 /* fish-shell */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "fish-shell"; sourceTree = "<group>"; };
CBB7725C1F1496AF00780A21 /* fish-shell */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "fish-shell"; sourceTree = "<group>"; };
D00769421990137800CA4627 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; };
D00F63F019137E9D00FCCDEC /* fish_version.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_version.cpp; sourceTree = "<group>"; };
D01A2D23169B730A00767098 /* man1 */ = {isa = PBXFileReference; lastKnownFileType = text; name = man1; path = pages_for_manpath/man1; sourceTree = BUILT_PRODUCTS_DIR; };
@ -964,6 +969,8 @@
D0A084F013B3AC130099B651 = {
isa = PBXGroup;
children = (
CBB7725C1F1496AF00780A21 /* fish-shell */,
CBB7725B1F14964100780A21 /* fish-shell */,
D0D02A91159845EF008E62BD /* Sources */,
D0D02AFC159871BF008E62BD /* Launcher */,
D0D02A8E15983D5F008E62BD /* Libraries */,
@ -986,8 +993,8 @@
D0D02A91159845EF008E62BD /* Sources */ = {
isa = PBXGroup;
children = (
CBB772591F11F93F00780A21 /* builtin_argparse.cpp */,
CBB7725A1F11F93F00780A21 /* builtin_argparse.h */,
CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */,
CB0F034B1F156FE3001827D3 /* builtin_argparse.h */,
9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */,
4E142D731B56B5D7008783C8 /* config.h */,
D0C6FCCB14CFA4B7004CE8AD /* autoload.h */,
@ -1684,6 +1691,7 @@
D007692D1990137800CA4627 /* proc.cpp in Sources */,
D007692E1990137800CA4627 /* reader.cpp in Sources */,
D007692F1990137800CA4627 /* sanity.cpp in Sources */,
CB0F034E1F156FE3001827D3 /* builtin_argparse.cpp in Sources */,
D05F597F1F041AE4003EE978 /* builtin_source.cpp in Sources */,
D05F59B51F041AE4003EE978 /* builtin_contains.cpp in Sources */,
D05F59911F041AE4003EE978 /* builtin_random.cpp in Sources */,
@ -1771,6 +1779,7 @@
D05F59AB1F041AE4003EE978 /* builtin_emit.cpp in Sources */,
D05F59961F041AE4003EE978 /* builtin_printf.cpp in Sources */,
D030FC001A4A38F300F7ADA0 /* function.cpp in Sources */,
CB0F034D1F156FE3001827D3 /* builtin_argparse.cpp in Sources */,
D05F59C61F041AE4003EE978 /* builtin_block.cpp in Sources */,
D05F59C91F041AE4003EE978 /* builtin_bind.cpp in Sources */,
D05F599C1F041AE4003EE978 /* builtin_history.cpp in Sources */,
@ -1880,6 +1889,7 @@
D0D02A7015983842008E62BD /* proc.cpp in Sources */,
D0D02A7115983848008E62BD /* reader.cpp in Sources */,
D0D02A721598384C008E62BD /* sanity.cpp in Sources */,
CB0F034C1F156FE3001827D3 /* builtin_argparse.cpp in Sources */,
D05F597D1F041AE4003EE978 /* builtin_source.cpp in Sources */,
D05F59B31F041AE4003EE978 /* builtin_contains.cpp in Sources */,
D05F598F1F041AE4003EE978 /* builtin_random.cpp in Sources */,

View file

@ -3,19 +3,22 @@
// See issue #4190 for the rationale behind the original behavior of this builtin.
#include "config.h" // IWYU pragma: keep
#if 0
#include <malloc/malloc.h>
#endif
#include <errno.h>
#include <limits.h>
#include <stddef.h>
#include <wchar.h>
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include "builtin.h"
#include "builtin_argparse.h"
@ -48,8 +51,11 @@ class argparse_cmd_opts_t {
bool print_help = false;
bool require_order = false;
wcstring name = L"argparse";
wcstring_list_t raw_exclusive_flags;
wcstring_list_t argv;
struct std::map<wchar_t, option_spec_t *> options;
std::map<wchar_t, option_spec_t *> options;
std::map<wcstring, wchar_t> long_to_short_flag;
std::vector<std::vector<wchar_t>> exclusive_flag_sets;
~argparse_cmd_opts_t() {
for (auto it : options) {
@ -58,12 +64,108 @@ class argparse_cmd_opts_t {
}
};
static const wchar_t *short_options = L"+:hn:r";
static const wchar_t *short_options = L"+:hn:rx:";
static const struct woption long_options[] = {{L"require-order", no_argument, NULL, 'r'},
{L"name", required_argument, NULL, 'n'},
{L"exclusive", required_argument, NULL, 'x'},
{L"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0}};
// Check if any pair of mutually exclusive options was seen. Note that since every option must have
// a short name we only need to check those.
static int check_for_mutually_exclusive_flags(argparse_cmd_opts_t &opts, io_streams_t &streams) {
for (auto it : opts.options) {
auto opt_spec = it.second;
if (opt_spec->num_seen == 0) continue;
// We saw this option at least once. Check all the sets of mutually exclusive options to see
// if this option appears in any of them.
for (auto xarg_set : opts.exclusive_flag_sets) {
auto found = std::find(xarg_set.begin(), xarg_set.end(), opt_spec->short_flag);
if (found != xarg_set.end()) {
// Okay, this option is in a mutually exclusive set of options. Check if any of the
// other mutually exclusive options have been seen.
for (auto xflag : xarg_set) {
auto xopt_spec = opts.options[xflag];
// Ignore this flag in the list of mutually exclusive flags.
if (xopt_spec->short_flag == opt_spec->short_flag) continue;
// If it is a different flag check if it has been seen.
if (xopt_spec->num_seen) {
wcstring flag1;
if (opt_spec->short_flag_valid) flag1 = wcstring(1, opt_spec->short_flag);
if (!opt_spec->long_flag.empty()) {
if (opt_spec->short_flag_valid) flag1 += L"/";
flag1 += opt_spec->long_flag;
}
wcstring flag2;
if (xopt_spec->short_flag_valid) flag2 = wcstring(1, xopt_spec->short_flag);
if (!xopt_spec->long_flag.empty()) {
if (xopt_spec->short_flag_valid) flag2 += L"/";
flag2 += xopt_spec->long_flag;
}
streams.err.append_format(
_(L"%ls: Mutually exclusive flags '%ls' and `%ls` seen\n"),
opts.name.c_str(), flag1.c_str(), flag2.c_str());
return STATUS_CMD_ERROR;
}
}
}
}
}
return STATUS_CMD_OK;
}
// This is used as a specialization to allow us to force operator>> to split the input on something
// other than spaces.
class WordDelimitedByComma : public wcstring {};
// Cppcheck incorrectly complains this is unused. It is indirectly used by std:istream_iterator.
std::wistream &operator>>(std::wistream &is, WordDelimitedByComma &output) { // cppcheck-suppress
std::getline(is, output, L',');
return is;
}
// This should be called after all the option specs have been parsed. At that point we have enough
// information to parse the values associated with any `--exclusive` flags.
static int parse_exclusive_args(argparse_cmd_opts_t &opts, io_streams_t &streams) {
for (auto raw_xflags : opts.raw_exclusive_flags) {
// This is an advanced technique that leverages the C++ STL to split the string on commas.
std::wistringstream iss(raw_xflags);
wcstring_list_t xflags((std::istream_iterator<WordDelimitedByComma, wchar_t>(iss)),
std::istream_iterator<WordDelimitedByComma, wchar_t>());
if (xflags.size() < 2) {
streams.err.append_format(_(L"%ls: exclusive flag string '%ls' is not valid\n"),
opts.name.c_str(), raw_xflags.c_str());
return STATUS_CMD_ERROR;
}
std::vector<wchar_t> exclusive_set;
for (auto flag : xflags) {
if (flag.size() == 1 && opts.options.find(flag[0]) != opts.options.end()) {
// It's a short flag.
exclusive_set.push_back(flag[0]);
} else {
auto x = opts.long_to_short_flag.find(flag);
if (x != opts.long_to_short_flag.end()) {
// It's a long flag we store as its short flag equivalent.
exclusive_set.push_back(x->second);
} else {
streams.err.append_format(_(L"%ls: exclusive flag '%ls' is not valid\n"),
opts.name.c_str(), flag.c_str());
return STATUS_CMD_ERROR;
}
}
}
// Store the set of exclusive flags for use when parsing the supplied set of arguments.
opts.exclusive_flag_sets.push_back(exclusive_set);
}
return STATUS_CMD_OK;
}
static bool parse_flag_modifiers(argparse_cmd_opts_t &opts, option_spec_t *opt_spec,
const wcstring &option_spec, const wchar_t *s,
io_streams_t &streams) {
@ -122,9 +224,30 @@ static bool parse_option_spec(argparse_cmd_opts_t &opts, wcstring option_spec,
}
opt_spec->long_flag = wcstring(s, e - s);
opts.long_to_short_flag.emplace(opt_spec->long_flag, opt_spec->short_flag);
return parse_flag_modifiers(opts, opt_spec, option_spec, e, streams);
}
static int collect_option_specs(argparse_cmd_opts_t &opts, int *optind, int argc, wchar_t **argv,
io_streams_t &streams) {
wchar_t *cmd = argv[0];
while (true) {
if (wcscmp(L"--", argv[*optind]) == 0) {
++*optind;
return STATUS_CMD_OK;
}
if (!parse_option_spec(opts, argv[*optind], streams)) {
return STATUS_CMD_ERROR;
}
if (++*optind == argc) {
streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0);
return STATUS_INVALID_ARGS;
}
}
}
static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method)
int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) {
wchar_t *cmd = argv[0];
@ -140,6 +263,12 @@ static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(hig
opts.require_order = true;
break;
}
case 'x': {
// Just save the raw string here. Later, when we have all the short and long flag
// definitions we'll parse these strings into a more useful data structure.
opts.raw_exclusive_flags.push_back(w.woptarg);
break;
}
case 'h': {
opts.print_help = true;
break;
@ -160,7 +289,7 @@ static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(hig
}
*optind = w.woptind;
return STATUS_CMD_OK;
return collect_option_specs(opts, optind, argc, argv, streams);
}
static void populate_option_strings(
@ -188,45 +317,34 @@ static void populate_option_strings(
long_options.get()[i] = {NULL, 0, NULL, 0};
}
// This function mimics the `wgetopt_long()` usage found elsewhere in our other builtin commands.
// It's different in that the short and long option structures are constructed dynamically based on
// arguments provided to the `argparse` command.
static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t &args,
parser_t &parser, io_streams_t &streams) {
if (args.empty()) return STATUS_CMD_OK;
wcstring short_options = opts.require_order ? L"+:" : L":";
int nflags = opts.options.size();
// This assumes every option has a long flag. Which is the worst case and isn't worth optimizing
// since the number of options is always quite small. Thus the size of the array will never be
// much larger than the minimum size required for all the long options.
auto long_options = std::unique_ptr<woption, std::function<void(woption *)>>(
new woption[nflags + 1], [](woption *p) { delete[] p; });
populate_option_strings(opts, short_options, long_options);
const wchar_t *cmd = opts.name.c_str();
int argc = args.size();
// This is awful but we need to convert our wcstring_list_t to a <wchar_t **> that can be passed
// to w.wgetopt_long(). Furthermore, because we're dynamically allocating the array of pointers
// we need to ensure the memory for the data structure is freed when we leave this scope.
null_terminated_array_t<wchar_t> argv_container(args);
auto argv = (wchar_t **)argv_container.get();
// Add a count for how many times we saw each boolean flag but only if we saw the flag at least
// once.
static void update_bool_flag_counts(argparse_cmd_opts_t &opts) {
for (auto it : opts.options) {
auto opt_spec = it.second;
if (opt_spec->num_allowed != 0 || opt_spec->num_seen == 0) continue;
wchar_t count[20];
swprintf(count, sizeof count / sizeof count[0], L"%d", opt_spec->num_seen);
opt_spec->vals.push_back(wcstring(count));
}
}
static int argparse_parse_flags(argparse_cmd_opts_t &opts, const wchar_t *short_options,
const woption *long_options, const wchar_t *cmd, int argc,
wchar_t **argv, int *optind, parser_t &parser,
io_streams_t &streams) {
int opt;
wgetopter_t w;
auto long_opts = long_options.get();
while ((opt = w.wgetopt_long(argc, argv, short_options.c_str(), long_opts, NULL)) != -1) {
switch (opt) {
case ':': {
while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, NULL)) != -1) {
if (opt == ':') {
builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]);
return STATUS_INVALID_ARGS;
}
case '?': {
if (opt == '?') {
builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]);
return STATUS_INVALID_ARGS;
}
default: {
auto found = opts.options.find(opt);
assert(found != opts.options.end());
@ -235,12 +353,9 @@ static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t
if (opt_spec->num_allowed == 0) {
assert(!w.woptarg);
} else if (opt_spec->num_allowed == -1 || opt_spec->num_allowed == 1) {
// This is subtle. We're depending on `wgetopt_long()` to report that a
// mandatory value is missing if `opt_spec->num_allowed == 1` and thus return
// ':' so that we don't take this branch if the mandatory arg is missing.
// Otherwise we can treat the optional and mandatory arg cases the same. That
// is, store the arg as the only value for the flag. Even if we've seen earlier
// instances of the flag.
// We're depending on `wgetopt_long()` to report that a mandatory value is missing if
// `opt_spec->num_allowed == 1` and thus return ':' so that we don't take this branch if
// the mandatory arg is missing.
opt_spec->vals.clear();
if (w.woptarg) {
opt_spec->vals.push_back(w.woptarg);
@ -249,22 +364,46 @@ static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t
assert(w.woptarg);
opt_spec->vals.push_back(w.woptarg);
}
break;
}
}
}
// Add a count for how many times we saw each boolean flag but only if we saw the flag at least
// once.
for (auto it : opts.options) {
auto opt_spec = it.second;
if (opt_spec->num_allowed != 0 || opt_spec->num_seen == 0) continue;
wchar_t count[20];
swprintf(count, sizeof count / sizeof count[0], L"%d", opt_spec->num_seen);
opt_spec->vals.push_back(wcstring(count));
*optind = w.woptind;
return STATUS_CMD_OK;
}
for (int i = w.woptind; argv[i]; i++) opts.argv.push_back(argv[i]);
// This function mimics the `wgetopt_long()` usage found elsewhere in our other builtin commands.
// It's different in that the short and long option structures are constructed dynamically based on
// arguments provided to the `argparse` command.
static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t &args,
parser_t &parser, io_streams_t &streams) {
if (args.empty()) return STATUS_CMD_OK;
wcstring short_options = opts.require_order ? L"+:" : L":";
size_t nflags = opts.options.size();
// This assumes every option has a long flag. Which is the worst case and isn't worth optimizing
// since the number of options is always quite small.
auto long_options = std::unique_ptr<woption, std::function<void(woption *)>>(
new woption[nflags + 1], [](woption *p) { delete[] p; });
populate_option_strings(opts, short_options, long_options);
const wchar_t *cmd = opts.name.c_str();
int argc = static_cast<int>(args.size());
// This is awful but we need to convert our wcstring_list_t to a <wchar_t **> that can be passed
// to w.wgetopt_long(). Furthermore, because we're dynamically allocating the array of pointers
// we need to ensure the memory for the data structure is freed when we leave this scope.
null_terminated_array_t<wchar_t> argv_container(args);
auto argv = (wchar_t **)argv_container.get();
int optind;
int retval = argparse_parse_flags(opts, short_options.c_str(), long_options.get(), cmd, argc,
argv, &optind, parser, streams);
if (retval != STATUS_CMD_OK) return retval;
retval = check_for_mutually_exclusive_flags(opts, streams);
if (retval != STATUS_CMD_OK) return retval;
update_bool_flag_counts(opts);
for (int i = optind; argv[i]; i++) opts.argv.push_back(argv[i]);
return STATUS_CMD_OK;
}
@ -290,35 +429,18 @@ int builtin_argparse(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
}
if (optind == argc) {
streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0);
return STATUS_INVALID_ARGS;
// Apparently we weren't handed any arguments to be parsed according to the option specs we
// just collected. So there isn't anything for us to do.
return STATUS_CMD_OK;
}
while (true) {
if (wcscmp(L"--", argv[optind]) == 0) {
optind++;
break;
}
if (!parse_option_spec(opts, argv[optind], streams)) {
return STATUS_CMD_ERROR;
}
if (++optind == argc) {
streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0);
return STATUS_INVALID_ARGS;
}
}
#if 0
auto stats = mstats();
fwprintf(stderr, L"WTF %d bytes_total %lu chunks_used %lu bytes_used %lu chunks_free %lu bytes_free %lu\n", __LINE__, stats.bytes_total, stats.chunks_used, stats.bytes_used, stats.chunks_free);
#endif
wcstring_list_t args;
args.push_back(opts.name);
while (optind < argc) args.push_back(argv[optind++]);
retval = parse_exclusive_args(opts, streams);
if (retval != STATUS_CMD_OK) return retval;
retval = argparse_parse_args(opts, args, parser, streams);
if (retval != STATUS_CMD_OK) return retval;
@ -345,10 +467,5 @@ int builtin_argparse(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
auto val = list_to_array_val(opts.argv);
env_set(L"argv", *val == ENV_NULL ? NULL : val->c_str(), ENV_LOCAL);
#if 0
stats = mstats();
fwprintf(stderr, L"WTF %d bytes_total %lu chunks_used %lu bytes_used %lu chunks_free %lu bytes_free %lu\n", __LINE__, stats.bytes_total, stats.chunks_used, stats.bytes_used, stats.chunks_free);
#endif
return retval;
}