Implement an --invert/-v for string match, like grep -v.

Only lines that do not match the pattern are shown.
This commit is contained in:
Aaron Gyes 2016-04-08 10:18:58 +08:00 committed by David Adam
parent 155befe90e
commit 790c7f80c7
4 changed files with 107 additions and 36 deletions

View file

@ -12,7 +12,7 @@ string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)]
[(-q | --quiet)] [STRING...] [(-q | --quiet)] [STRING...]
string escape [(-n | --no-quoted)] [STRING...] string escape [(-n | --no-quoted)] [STRING...]
string match [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)] string match [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)]
[(-n | --index)] [(-q | --quiet)] PATTERN [STRING...] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...]
string replace [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)] string replace [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)]
[(-q | --quiet)] PATTERN REPLACEMENT [STRING...] [(-q | --quiet)] PATTERN REPLACEMENT [STRING...]
\endfish \endfish
@ -44,7 +44,7 @@ The following subcommands are available:
- `escape` escapes each STRING such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-n` or `--no-quote` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise. - `escape` escapes each STRING such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-n` or `--no-quote` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise.
- `match` tests each STRING against PATTERN and prints matching substrings. Only the first match for each STRING is reported unless `-a` or `--all` is given, in which case all matches are reported. Matching can be made case-insensitive with `-i` or `--ignore-case`. If `-n` or `--index` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. Exit status: 0 if at least one match was found, or 1 otherwise. - `match` tests each STRING against PATTERN and prints matching substrings. Only the first match for each STRING is reported unless `-a` or `--all` is given, in which case all matches are reported. Matching can be made case-insensitive with `-i` or `--ignore-case`. If `-n` or `--index` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. If --invert or -v is used the selected lines will be only those which do not match the given glob pattern or regular expression. Exit status: 0 if at least one match was found, or 1 otherwise.
- `replace` is similar to `match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can contain C-style escape sequences like `\t` as well as references to capturing groups by number or name as `$n` or `${n}`. Exit status: 0 if at least one replacement was performed, or 1 otherwise. - `replace` is similar to `match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can contain C-style escape sequences like `\t` as well as references to capturing groups by number or name as `$n` or `${n}`. Exit status: 0 if at least one replacement was performed, or 1 otherwise.
@ -120,6 +120,14 @@ The following subcommands are available:
>_ echo 'ok?' | string match '*\\?' >_ echo 'ok?' | string match '*\\?'
>_ <outp>ok?</outp> >_ <outp>ok?</outp>
>_ string match -r -v "c.*[12]" {cat,dog}(seq 1 4)
<outp>dog1</outp>
<outp>dog2</outp>
<outp>cat3</outp>
<outp>dog3</outp>
<outp>cat4</outp>
<outp>dog4</outp>
\endfish \endfish
\subsection string-example-match-regex Match Regex Examples \subsection string-example-match-regex Match Regex Examples

View file

@ -304,9 +304,10 @@ struct match_options_t
bool all; bool all;
bool ignore_case; bool ignore_case;
bool index; bool index;
bool invert_match;
bool quiet; bool quiet;
match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { } match_options_t(): all(false), ignore_case(false), index(false), invert_match(false), quiet(false) { }
}; };
class string_matcher_t class string_matcher_t
@ -328,17 +329,15 @@ public:
class wildcard_matcher_t: public string_matcher_t class wildcard_matcher_t: public string_matcher_t
{ {
private:
wcstring wcpattern; wcstring wcpattern;
public: public:
wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts, io_streams_t &streams) wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts, io_streams_t &streams)
: string_matcher_t(opts, streams) : string_matcher_t(opts, streams), wcpattern(parse_util_unescape_wildcards(pattern))
{ {
wcpattern = parse_util_unescape_wildcards(pattern);
if (opts.ignore_case) if (opts.ignore_case)
{ {
for (int i = 0; i < wcpattern.length(); i++) for (size_t i = 0; i < wcpattern.length(); i++)
{ {
wcpattern[i] = towlower(wcpattern[i]); wcpattern[i] = towlower(wcpattern[i]);
} }
@ -352,10 +351,11 @@ public:
// Note: --all is a no-op for glob matching since the pattern is always // Note: --all is a no-op for glob matching since the pattern is always
// matched against the entire argument // matched against the entire argument
bool match; bool match;
if (opts.ignore_case) if (opts.ignore_case)
{ {
wcstring s = arg; wcstring s = arg;
for (int i = 0; i < s.length(); i++) for (size_t i = 0; i < s.length(); i++)
{ {
s[i] = towlower(s[i]); s[i] = towlower(s[i]);
} }
@ -365,13 +365,11 @@ public:
{ {
match = wildcard_match(arg, wcpattern, false); match = wildcard_match(arg, wcpattern, false);
} }
if (match) if (match ^ opts.invert_match)
{ {
total_matched++; total_matched++;
}
if (!opts.quiet) if (!opts.quiet)
{
if (match)
{ {
if (opts.index) if (opts.index)
{ {
@ -458,28 +456,38 @@ class pcre2_matcher_t: public string_matcher_t
// Return values: -1 = error, 0 = no match, 1 = match // Return values: -1 = error, 0 = no match, 1 = match
if (pcre2_rc == PCRE2_ERROR_NOMATCH) if (pcre2_rc == PCRE2_ERROR_NOMATCH)
{ {
return 0; if (opts.invert_match && !opts.quiet)
{
streams.out.append(arg);
streams.out.push_back(L'\n');
} }
if (pcre2_rc < 0)
return opts.invert_match ? 1 : 0;
}
else if (pcre2_rc < 0)
{ {
string_error(streams, _(L"%ls: Regular expression match error: %ls\n"), string_error(streams, _(L"%ls: Regular expression match error: %ls\n"),
argv0, pcre2_strerror(pcre2_rc).c_str()); argv0, pcre2_strerror(pcre2_rc).c_str());
return -1; return -1;
} }
if (pcre2_rc == 0) else if (pcre2_rc == 0)
{ {
// The output vector wasn't big enough. Should not happen. // The output vector wasn't big enough. Should not happen.
string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0); string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0);
return -1; return -1;
} }
else if (opts.invert_match)
return 0;
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
for (int j = 0; j < pcre2_rc; j++) for (int j = 0; j < pcre2_rc; j++)
{ {
PCRE2_SIZE begin = ovector[2*j]; PCRE2_SIZE begin = ovector[2*j];
PCRE2_SIZE end = ovector[2*j + 1]; PCRE2_SIZE end = ovector[2*j + 1];
if (!opts.quiet)
{ if (begin != PCRE2_UNSET && end != PCRE2_UNSET && !opts.quiet)
if (begin != PCRE2_UNSET && end != PCRE2_UNSET)
{ {
if (opts.index) if (opts.index)
{ {
@ -489,11 +497,12 @@ class pcre2_matcher_t: public string_matcher_t
{ {
streams.out.append(wcstring(&arg[begin], end - begin)); streams.out.append(wcstring(&arg[begin], end - begin));
} }
streams.out.append(L'\n'); streams.out.push_back(L'\n');
} }
} }
}
return 1; return opts.invert_match ? 0 : 1;
} }
public: public:
@ -525,7 +534,7 @@ public:
// pcre2 match error // pcre2 match error
return false; return false;
} }
if (rc == 0) else if (rc == 0)
{ {
// no match // no match
return true; return true;
@ -533,6 +542,11 @@ public:
matched++; matched++;
total_matched++; total_matched++;
if (opts.invert_match)
{
return true;
}
// Report any additional matches // Report any additional matches
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
while (opts.all || matched == 0) while (opts.all || matched == 0)
@ -573,12 +587,13 @@ public:
static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv)
{ {
const wchar_t *short_options = L"ainqr"; const wchar_t *short_options = L"ainvqr";
const struct woption long_options[] = const struct woption long_options[] =
{ {
{ L"all", no_argument, 0, 'a'}, { L"all", no_argument, 0, 'a'},
{ L"ignore-case", no_argument, 0, 'i'}, { L"ignore-case", no_argument, 0, 'i'},
{ L"index", no_argument, 0, 'n'}, { L"index", no_argument, 0, 'n'},
{ L"invert", no_argument, 0, 'v'},
{ L"quiet", no_argument, 0, 'q'}, { L"quiet", no_argument, 0, 'q'},
{ L"regex", no_argument, 0, 'r'}, { L"regex", no_argument, 0, 'r'},
{ 0, 0, 0, 0 } { 0, 0, 0, 0 }
@ -612,6 +627,10 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar
opts.index = true; opts.index = true;
break; break;
case 'v':
opts.invert_match = true;
break;
case 'q': case 'q':
opts.quiet = true; opts.quiet = true;
break; break;
@ -750,7 +769,7 @@ class regex_replacer_t: public string_replacer_t
compiled_regex_t regex; compiled_regex_t regex;
wcstring replacement; wcstring replacement;
wcstring interpret_escapes(const wchar_t *orig) static wcstring interpret_escapes(const wchar_t *orig)
{ {
wcstring result; wcstring result;
@ -782,6 +801,7 @@ public:
bool replace_matches(const wchar_t *arg) bool replace_matches(const wchar_t *arg)
{ {
// A return value of true means all is well (even if no replacements // A return value of true means all is well (even if no replacements
// were performed), false indicates an unrecoverable error. // were performed), false indicates an unrecoverable error.
if (regex.code == 0) if (regex.code == 0)
@ -1091,9 +1111,9 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar
// If we are from the right, split_about gave us reversed strings, in reversed order! // If we are from the right, split_about gave us reversed strings, in reversed order!
if (right) if (right)
{ {
for (size_t i=0; i < splits.size(); i++) for (size_t j = 0; j < splits.size(); j++)
{ {
std::reverse(splits[i].begin(), splits[i].end()); std::reverse(splits[j].begin(), splits[j].end());
} }
std::reverse(splits.begin(), splits.end()); std::reverse(splits.begin(), splits.end());
} }
@ -1339,6 +1359,7 @@ static const struct string_subcommand
const wchar_t *name; const wchar_t *name;
int (*handler)(parser_t &, io_streams_t &, int argc, wchar_t **argv); int (*handler)(parser_t &, io_streams_t &, int argc, wchar_t **argv);
} }
string_subcommands[] = string_subcommands[] =
{ {
{ L"escape", &string_escape }, { L"escape", &string_escape },

View file

@ -1,5 +1,26 @@
# tests for string builtin # tests for string builtin
# mostly taken from examples # mostly taken from examples
string match -r -v "c.*" dog can cat diz; and echo "exit 0"
string match -q -r -v "c.*" dog can cat diz; and echo "exit 0"
string match -v "c*" dog can cat diz; and echo "exit 0"
string match -q -v "c*" dog can cat diz; and echo "exit 0"
string match -v "d*" dog dan dat diz; or echo "exit 1"
string match -q -v "d*" dog dan dat diz; or echo "exit 1"
string match -r -v x y; and echo "exit 0"
string match -r -v x x; or echo "exit 1"
string match -q -r -v x y; and echo "exit 0"
string match -q -r -v x x; or echo "exit 1"
string length 'hello, world' string length 'hello, world'
string length -q ""; and echo not zero length string length -q ""; and echo not zero length
@ -63,3 +84,7 @@ string match -r '[' 'a[sd' 2>/dev/null; or echo "invalid expression error"
string invalidarg 2>/dev/null; or echo "invalid argument error" string invalidarg 2>/dev/null; or echo "invalid argument error"
string length 2>/dev/null; or echo "missing argument returns 0" string length 2>/dev/null; or echo "missing argument returns 0"
string match -r -v "[dcantg].*" dog can cat diz; or echo "no regexp invert match"
string match -v "???" dog can cat diz; or echo "no glob invert match"

View file

@ -1,3 +1,18 @@
dog
diz
exit 0
exit 0
dog
diz
exit 0
exit 0
exit 1
exit 1
y
exit 0
exit 1
exit 0
exit 1
12 12
ab ab
bc bc
@ -45,3 +60,5 @@ aabb
invalid expression error invalid expression error
invalid argument error invalid argument error
missing argument returns 0 missing argument returns 0
no regexp invert match
no glob invert match