diff --git a/CHANGELOG.md b/CHANGELOG.md index ba94c0a63..413674046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Fish is now more forgiving of missing or invalid $TERM values (#3850). - The `string` command now supports a `repeat` subcommand with the obvious behavior (#3864). - The `string match` command now supports a `--filter` flag to emit the entire string partially matched by a pattern (#3957). +- The `string replace` command now supports a `--filter` flag to limit output to strings which underwent a replacement (#3348). - The `functions --details --verbose` output now includes the function description (#597). - Completions for `helm` added (#3829). - Empty components in $CDPATH, $MANPATH and $PATH are now converted to "." (#2106, #3914). diff --git a/doc_src/string.txt b/doc_src/string.txt index 3ae7744fa..b947277ed 100644 --- a/doc_src/string.txt +++ b/doc_src/string.txt @@ -13,7 +13,7 @@ string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)] string escape [(-n | --no-quoted)] [STRING...] string match [(-a | --all)] [((-f | --filter)] [(-i | --ignore-case)] [(-r | --regex)] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...] -string replace [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)] +string replace [(-a | --all)] [(-f | --filter)] [(-i | --ignore-case)] [(-r | --regex)] [(-q | --quiet)] PATTERN REPLACEMENT [STRING...] string repeat [(-n | --count)] [(-m | --max)] [(-N | --no-newline)] [(-q | --quiet)] [STRING...] @@ -76,7 +76,13 @@ Exit status: 0 if at least one match was found, or 1 otherwise. \subsection string-replace "replace" subcommand -`string replace` is similar to `string match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can contain C-style escape sequences like `\t` as well as references to capturing groups by number or name as `$n` or `${n}`. Exit status: 0 if at least one replacement was performed, or 1 otherwise. +`string replace` is similar to `string match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched. + +If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can contain C-style escape sequences like `\t` as well as references to capturing groups by number or name as `$n` or `${n}`. + +If you specify the `-f` or `--filter` flag then each input string is printed only if a replacement was done. This is useful where you would otherwise use this idiom: `a_cmd | string match pattern | string replace pattern new_pattern`. You can instead just write `a_cmd | string replace --filter pattern new_pattern`. + +Exit status: 0 if at least one replacement was performed, or 1 otherwise. \subsection string-repeat "repeat" subcommand diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index 288950488..e1c4fee57 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -307,11 +307,9 @@ class wildcard_matcher_t : public string_matcher_t { wcpattern[i] = towlower(wcpattern[i]); } } - if (opts.filter) { - if (!wcpattern.empty()) { - if (wcpattern.front() != ANY_STRING) wcpattern.insert(0, 1, ANY_STRING); - if (wcpattern.back() != ANY_STRING) wcpattern.push_back(ANY_STRING); - } + if (opts.filter && !wcpattern.empty()) { + if (wcpattern.front() != ANY_STRING) wcpattern.insert(0, 1, ANY_STRING); + if (wcpattern.back() != ANY_STRING) wcpattern.push_back(ANY_STRING); } } @@ -607,10 +605,11 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar struct replace_options_t { bool all; + bool filter; bool ignore_case; bool quiet; - replace_options_t() : all(false), ignore_case(false), quiet(false) {} + replace_options_t() : all(false), filter(false), ignore_case(false), quiet(false) {} }; class string_replacer_t { @@ -625,8 +624,8 @@ class string_replacer_t { : argv0(argv0_), opts(opts_), total_replaced(0), streams(streams_) {} virtual ~string_replacer_t() {} - virtual bool replace_matches(const wchar_t *arg) = 0; int replace_count() { return total_replaced; } + virtual bool replace_matches(const wchar_t *arg) = 0; }; class literal_replacer_t : public string_replacer_t { @@ -643,34 +642,7 @@ class literal_replacer_t : public string_replacer_t { patlen(wcslen(pattern)) {} virtual ~literal_replacer_t() {} - - bool replace_matches(const wchar_t *arg) { - wcstring result; - if (patlen == 0) { - result = arg; - } else { - int replaced = 0; - const wchar_t *cur = arg; - while (*cur != L'\0') { - if ((opts.all || replaced == 0) && - (opts.ignore_case ? wcsncasecmp(cur, pattern, patlen) - : wcsncmp(cur, pattern, patlen)) == 0) { - result += replacement; - cur += patlen; - replaced++; - total_replaced++; - } else { - result += *cur; - cur++; - } - } - } - if (!opts.quiet) { - streams.out.append(result); - streams.out.append(L'\n'); - } - return true; - } + bool replace_matches(const wchar_t *arg); }; class regex_replacer_t : public string_replacer_t { @@ -704,12 +676,42 @@ class regex_replacer_t : public string_replacer_t { /// A return value of true means all is well (even if no replacements were performed), false /// indicates an unrecoverable error. -bool regex_replacer_t::replace_matches(const wchar_t *arg) { - if (regex.code == 0) { - // pcre2_compile() failed - return false; +bool literal_replacer_t::replace_matches(const wchar_t *arg) { + wcstring result; + bool replacement_occurred = false; + + if (patlen == 0) { + replacement_occurred = true; + result = arg; + } else { + auto &cmp_func = opts.ignore_case ? wcsncasecmp : wcsncmp; + const wchar_t *cur = arg; + while (*cur != L'\0') { + if ((opts.all || !replacement_occurred) && cmp_func(cur, pattern, patlen) == 0) { + result += replacement; + cur += patlen; + replacement_occurred = true; + total_replaced++; + } else { + result += *cur; + cur++; + } + } } + if (!opts.quiet && (!opts.filter || replacement_occurred)) { + streams.out.append(result); + streams.out.append(L'\n'); + } + + return true; +} + +/// A return value of true means all is well (even if no replacements were performed), false +/// indicates an unrecoverable error. +bool regex_replacer_t::replace_matches(const wchar_t *arg) { + if (!regex.code) return false; // pcre2_compile() failed + uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED | (opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0); size_t arglen = wcslen(arg); @@ -744,7 +746,8 @@ bool regex_replacer_t::replace_matches(const wchar_t *arg) { pcre2_strerror(pcre2_rc).c_str()); rc = false; } else { - if (!opts.quiet) { + bool replacement_occurred = pcre2_rc > 0; + if (!opts.quiet && (!opts.filter || replacement_occurred)) { streams.out.append(output); streams.out.append(L'\n'); } @@ -756,30 +759,26 @@ bool regex_replacer_t::replace_matches(const wchar_t *arg) { } static int string_replace(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) { - const wchar_t *short_options = L"aiqr"; - const struct woption long_options[] = {{L"all", no_argument, 0, 'a'}, - {L"ignore-case", no_argument, 0, 'i'}, - {L"quiet", no_argument, 0, 'q'}, - {L"regex", no_argument, 0, 'r'}, - {0, 0, 0, 0}}; + const wchar_t *short_options = L"afiqr"; + const struct woption long_options[] = { + {L"all", no_argument, NULL, 'a'}, {L"filter", no_argument, NULL, 'f'}, + {L"ignore-case", no_argument, NULL, 'i'}, {L"quiet", no_argument, NULL, 'q'}, + {L"regex", no_argument, 0, 'r'}, {NULL, 0, NULL, 0}}; replace_options_t opts; bool regex = false; + int opt; wgetopter_t w; - for (;;) { - int opt = w.wgetopt_long(argc, argv, short_options, long_options, 0); - - if (opt == -1) { - break; - } + while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, NULL)) != -1) { switch (opt) { - case 0: { - break; - } case 'a': { opts.all = true; break; } + case 'f': { + opts.filter = true; + break; + } case 'i': { opts.ignore_case = true; break; @@ -797,7 +796,7 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, wch return BUILTIN_STRING_ERROR; } default: { - DIE("unexpected opt"); + DIE("unexpected retval from wgetopt_long"); break; } } diff --git a/tests/string.err b/tests/string.err index 65883c7bd..a27fc87f4 100644 --- a/tests/string.err +++ b/tests/string.err @@ -5,7 +5,7 @@ string match: ^ # string invalidarg string: Unknown subcommand 'invalidarg' -Standard input (line 167): +Standard input (line 183): string invalidarg; and echo "unexpected exit 0" >&2 ^ @@ -29,6 +29,6 @@ string repeat: Expected argument # string repeat -l fakearg 2>&1 string repeat: Unknown option '-l' -Standard input (line 268): +Standard input (line 284): string repeat -l fakearg ^ diff --git a/tests/string.in b/tests/string.in index 397872789..27396927f 100644 --- a/tests/string.in +++ b/tests/string.in @@ -158,6 +158,22 @@ echo echo '# string replace -r -a "(\w)" "\$1\$1" ab' string replace -r -a "(\w)" "\$1\$1" ab +echo +echo '# string replace --filter x X abc axc x def jkx' +string replace --filter x X abc axc x def jkx +or echo Unexpected exit status at line (status --current-line-number) + +string replace --filter y Y abc axc x def jkx +and echo Unexpected exit status at line (status --current-line-number) + +echo +echo '# string replace --regex -f "\d" X 1bc axc 2 d3f jk4 xyz' +string replace --regex -f "\d" X 1bc axc 2 d3f jk4 xyz +or echo Unexpected exit status at line (status --current-line-number) + +string replace --regex -f "Z" X 1bc axc 2 d3f jk4 xyz +and echo Unexpected exit status at line (status --current-line-number) + # test some failure cases echo '# string match -r "[" "a[sd"' >&2 string match -r "[" "a[sd"; and echo "unexpected exit 0" >&2 diff --git a/tests/string.out b/tests/string.out index f22c82546..b2f0472e0 100644 --- a/tests/string.out +++ b/tests/string.out @@ -133,6 +133,17 @@ here # string replace -r -a "(\w)" "\$1\$1" ab aabb +# string replace --filter x X abc axc x def jkx +aXc +X +jkX + +# string replace --regex -f "\d" X 1bc axc 2 d3f jk4 xyz +Xbc +X +dXf +jkX + # string length missing argument returns 1