From a0ec9772cd1a0a548a501a7633be05dab4e5ee46 Mon Sep 17 00:00:00 2001 From: Michael Steed Date: Thu, 10 Sep 2015 19:26:45 -0600 Subject: [PATCH] use fish's wildcard_match() for glob matching --- doc_src/string.txt | 9 --- src/builtin_string.cpp | 151 +++++++++++++---------------------------- src/fish_tests.cpp | 6 +- 3 files changed, 47 insertions(+), 119 deletions(-) diff --git a/doc_src/string.txt b/doc_src/string.txt index 0f6394e50..c77814737 100644 --- a/doc_src/string.txt +++ b/doc_src/string.txt @@ -130,15 +130,6 @@ string match -i 'a??B' Axxb # Output: # Axxb -string match -i '[aeiou]' A B C D E -# Output: -# A -# E - -string match '[^fb]*' foo bar baz qux -# Output: -# qux - echo 'ok?' | string match '*\\?' # Output: # ok? diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index c78fa99f5..b6af225a2 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -8,6 +8,8 @@ #endif #include "pcre2.h" +#include "wildcard.h" + #define MAX_REPLACE_SIZE size_t(1048576) // pcre2_substitute maximum output size in wchar_t enum @@ -291,14 +293,12 @@ struct match_options_t class string_matcher_t { protected: - const wchar_t *argv0; - const wchar_t *pattern; match_options_t opts; int total_matched; public: - string_matcher_t(const wchar_t *argv0_, const wchar_t *pattern_, const match_options_t &opts_) - : argv0(argv0_), pattern(pattern_), opts(opts_), total_matched(0) + string_matcher_t(const match_options_t &opts_) + : opts(opts_), total_matched(0) { } virtual ~string_matcher_t() { } @@ -308,112 +308,51 @@ public: class wildcard_matcher_t: public string_matcher_t { - bool arg_matches(const wchar_t *pat, const wchar_t *arg) - { - for (; *arg != L'\0'; arg++, pat++) - { - switch (*pat) - { - case L'?': - break; - - case L'*': - // skip redundant * - while (*pat == L'*') - { - pat++; - } - - // * at end matches whatever follows - if (*pat == L'\0') - { - return true; - } - - while (*arg != L'\0') - { - if (arg_matches(pat, arg++)) - { - return true; - } - } - return false; - - case L'[': - { - bool negate = false; - if (*++pat == L'^') - { - negate = true; - pat++; - } - - bool match = false; - wchar_t argch = opts.ignore_case ? towlower(*arg) : *arg; - wchar_t patch, patch2; - while ((patch = *pat++) != L']') - { - if (patch == L'\0') - { - return false; // no closing ] - } - if (*pat == L'-' && (patch2 = *(pat + 1)) != L'\0' && patch2 != L']') - { - if (opts.ignore_case ? towlower(patch) <= argch && argch <= towlower(patch2) - : patch <= argch && argch <= patch2) - { - match = true; - } - pat += 2; - } - else if (patch == argch) - { - match = true; - } - } - if (match == negate) - { - return false; - } - pat--; - break; - } - - case L'\\': - if (*(pat + 1) != L'\0') - { - pat++; - } - // fall through - - default: - if (opts.ignore_case ? towlower(*arg) != towlower(*pat) : *arg != *pat) - { - return false; - } - break; - } - } - // arg is exhausted - it's a match only if pattern is as well - while (*pat == L'*') - { - pat++; - } - return *pat == L'\0'; - } + wchar_t *wcpattern; public: - wildcard_matcher_t(const wchar_t *argv0_, const wchar_t *pattern_, const match_options_t &opts_) - : string_matcher_t(argv0_, pattern_, opts_) - { } + wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts) + : string_matcher_t(opts) + { + wcpattern = parse_util_unescape_wildcards(pattern); - virtual ~wildcard_matcher_t() { } + if (opts.ignore_case) + { + wchar_t *c = wcpattern; + while (*c != L'\0') + { + *c = towlower(*c); + c++; + } + } + } + + virtual ~wildcard_matcher_t() + { + if (wcpattern != 0) + { + free(wcpattern); + } + } bool report_matches(const wchar_t *arg) { // Note: --all is a no-op for glob matching since the pattern is always // matched against the entire argument - bool match = arg_matches(pattern, arg); + bool match; + if (opts.ignore_case) + { + wcstring s = arg; + for (int i = 0; i < s.length(); i++) + { + s[i] = towlower(s[i]); + } + match = wildcard_match(s.c_str(), wcpattern, false); + } + else + { + match = wildcard_match(arg, wcpattern, false); + } if (match) { total_matched++; @@ -501,6 +440,7 @@ struct compiled_regex_t class pcre2_matcher_t: public string_matcher_t { + const wchar_t *argv0; compiled_regex_t regex; int report_match(const wchar_t *arg, int pcre2_rc) @@ -549,8 +489,9 @@ class pcre2_matcher_t: public string_matcher_t } public: - pcre2_matcher_t(const wchar_t *argv0_, const wchar_t *pattern_, const match_options_t &opts_) - : string_matcher_t(argv0_, pattern_, opts_), + pcre2_matcher_t(const wchar_t *argv0_, const wchar_t *pattern, const match_options_t &opts) + : string_matcher_t(opts), + argv0(argv0_), regex(argv0_, pattern, opts.ignore_case) { } @@ -1294,7 +1235,7 @@ static int string_sub(parser_t &parser, int argc, wchar_t **argv) static int string_trim(parser_t &parser, int argc, wchar_t **argv) { - const wchar_t *short_options = L"c:lqr"; + const wchar_t *short_options = L":c:lqr"; const struct woption long_options[] = { { L"chars", required_argument, 0, 'c'}, diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 99d3e8cfc..5c3f6ca27 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4098,6 +4098,7 @@ static void test_string(void) { {L"string", L"match", L"a*b", L"axxb", 0}, 0, L"axxb\n" }, { {L"string", L"match", L"a??b", L"axxb", 0}, 0, L"axxb\n" }, { {L"string", L"match", L"-i", L"a??B", L"axxb", 0}, 0, L"axxb\n" }, + { {L"string", L"match", L"-i", L"a??b", L"Axxb", 0}, 0, L"Axxb\n" }, { {L"string", L"match", L"a*", L"axxb", 0}, 0, L"axxb\n" }, { {L"string", L"match", L"*a", L"xxa", 0}, 0, L"xxa\n" }, { {L"string", L"match", L"*a*", L"axa", 0}, 0, L"axa\n" }, @@ -4111,11 +4112,6 @@ static void test_string(void) { {L"string", L"match", L"*?", L"ab", 0}, 0, L"ab\n" }, { {L"string", L"match", L"?*", L"a", 0}, 0, L"a\n" }, { {L"string", L"match", L"?*", L"ab", 0}, 0, L"ab\n" }, - { {L"string", L"match", L"[A-F][^A-F]", L"FG", 0}, 0, L"FG\n" }, - { {L"string", L"match", L"[A][B]", L"AB", 0}, 0, L"AB\n" }, - { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0x6a", 0}, 0, L"0x6a\n" }, - { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xA6", 0}, 0, L"0xA6\n" }, - { {L"string", L"match", L"-i", L"0x[0-9a-f][0-9A-F]", L"0xAb", 0}, 0, L"0xAb\n" }, { {L"string", L"match", L"\\*", L"*", 0}, 0, L"*\n" }, { {L"string", L"match", L"a*\\", L"abc\\", 0}, 0, L"abc\\\n" }, { {L"string", L"match", L"a*\\?", L"abc?", 0}, 0, L"abc?\n" },