From d2daa921e9a45801ff418b0719d694e171d517db Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 29 Oct 2022 12:51:13 -0700 Subject: [PATCH] Introduce re::make_anchored This allows adjusting a pattern string so that it matches an entire string, by wrapping the regex in a group like ^(?:...)$ This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on PCRE2 prior to 2017, so we have to adjust the pattern instead. Also introduce an overload of match() which creates its own match_data_t. --- src/fish_tests.cpp | 15 +++++++++++++++ src/re.cpp | 15 +++++++++++++++ src/re.h | 8 ++++++++ 3 files changed, 38 insertions(+) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 28c38bd37..75dabd33a 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -6867,6 +6867,21 @@ static void test_re_basic() { } do_test(join_strings(matches, L',') == L"AA,CC,11"); do_test(join_strings(captures, L',') == L"A,C,1"); + + // Test make_anchored + re = regex_t::try_compile(make_anchored(L"ab(.+?)")); + do_test(re.has_value()); + do_test(!re->match(L"")); + do_test(!re->match(L"ab")); + do_test((re->match(L"abcd") == match_range_t{0, 4})); + do_test((re->match(L"abcdefghij") == match_range_t{0, 10})); + + re = regex_t::try_compile(make_anchored(L"(a+)|(b+)")); + do_test(re.has_value()); + do_test(!re->match(L"")); + do_test(!re->match(L"aabb")); + do_test((re->match(L"aaaa") == match_range_t{0, 4})); + do_test((re->match(L"bbbb") == match_range_t{0, 4})); } static void test_re_reset() { diff --git a/src/re.cpp b/src/re.cpp index 5b1424350..9dba0e18d 100644 --- a/src/re.cpp +++ b/src/re.cpp @@ -130,6 +130,11 @@ maybe_t regex_t::match(match_data_t &md, const wcstring &subject) return match_range_t{ovector[0], ovector[1]}; } +maybe_t regex_t::match(const wcstring &subject) const { + match_data_t md = this->prepare(); + return this->match(md, subject); +} + maybe_t regex_t::group(const match_data_t &md, size_t group_idx) const { if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) { return none(); @@ -288,3 +293,13 @@ regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) { } wcstring re_error_t::message() const { return message_for_code(this->code); } + +wcstring re::make_anchored(wcstring pattern) { + // PATTERN -> ^(:?PATTERN)$. + const wchar_t *prefix = L"^(?:"; + const wchar_t *suffix = L")$"; + pattern.reserve(pattern.size() + wcslen(prefix) + wcslen(suffix)); + pattern.insert(0, prefix); + pattern.append(suffix); + return pattern; +} diff --git a/src/re.h b/src/re.h index 719cc0ff5..134b01c5e 100644 --- a/src/re.h +++ b/src/re.h @@ -111,6 +111,9 @@ class regex_t : noncopyable_t { /// \return a range on a successful match, none on no match. maybe_t match(match_data_t &md, const wcstring &subject) const; + /// A convenience function which calls prepare() for you. + maybe_t match(const wcstring &subject) const; + /// \return the matched range for an indexed or named capture group. 0 means the entire match. maybe_t group(const match_data_t &md, size_t group_idx) const; maybe_t group(const match_data_t &md, const wcstring &name) const; @@ -145,5 +148,10 @@ class regex_t : noncopyable_t { adapters::bytecode_ptr_t code_; }; +/// Adjust a pattern so that it is anchored at both beginning and end. +/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2 +/// (e.g. 10.21, on Xenial). +wcstring make_anchored(wcstring pattern); + } // namespace re #endif