Introduce re::make_anchored

This allows adjusting a pattern string so that it matches an entire
string, by wrapping the regex in a group like ^(?:...)$

This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable
on PCRE2 prior to 2017, so we have to adjust the pattern instead.

Also introduce an overload of match() which creates its own
match_data_t.
This commit is contained in:
ridiculousfish 2022-10-29 12:51:13 -07:00
parent fe7d095647
commit d2daa921e9
3 changed files with 38 additions and 0 deletions

View file

@ -6867,6 +6867,21 @@ static void test_re_basic() {
} }
do_test(join_strings(matches, L',') == L"AA,CC,11"); do_test(join_strings(matches, L',') == L"AA,CC,11");
do_test(join_strings(captures, L',') == L"A,C,1"); do_test(join_strings(captures, L',') == L"A,C,1");
// Test make_anchored
re = regex_t::try_compile(make_anchored(L"ab(.+?)"));
do_test(re.has_value());
do_test(!re->match(L""));
do_test(!re->match(L"ab"));
do_test((re->match(L"abcd") == match_range_t{0, 4}));
do_test((re->match(L"abcdefghij") == match_range_t{0, 10}));
re = regex_t::try_compile(make_anchored(L"(a+)|(b+)"));
do_test(re.has_value());
do_test(!re->match(L""));
do_test(!re->match(L"aabb"));
do_test((re->match(L"aaaa") == match_range_t{0, 4}));
do_test((re->match(L"bbbb") == match_range_t{0, 4}));
} }
static void test_re_reset() { static void test_re_reset() {

View file

@ -130,6 +130,11 @@ maybe_t<match_range_t> regex_t::match(match_data_t &md, const wcstring &subject)
return match_range_t{ovector[0], ovector[1]}; return match_range_t{ovector[0], ovector[1]};
} }
maybe_t<match_range_t> regex_t::match(const wcstring &subject) const {
match_data_t md = this->prepare();
return this->match(md, subject);
}
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const { maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) { if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
return none(); return none();
@ -288,3 +293,13 @@ regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) {
} }
wcstring re_error_t::message() const { return message_for_code(this->code); } wcstring re_error_t::message() const { return message_for_code(this->code); }
wcstring re::make_anchored(wcstring pattern) {
// PATTERN -> ^(:?PATTERN)$.
const wchar_t *prefix = L"^(?:";
const wchar_t *suffix = L")$";
pattern.reserve(pattern.size() + wcslen(prefix) + wcslen(suffix));
pattern.insert(0, prefix);
pattern.append(suffix);
return pattern;
}

View file

@ -111,6 +111,9 @@ class regex_t : noncopyable_t {
/// \return a range on a successful match, none on no match. /// \return a range on a successful match, none on no match.
maybe_t<match_range_t> match(match_data_t &md, const wcstring &subject) const; maybe_t<match_range_t> match(match_data_t &md, const wcstring &subject) const;
/// A convenience function which calls prepare() for you.
maybe_t<match_range_t> match(const wcstring &subject) const;
/// \return the matched range for an indexed or named capture group. 0 means the entire match. /// \return the matched range for an indexed or named capture group. 0 means the entire match.
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const; maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const; maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
@ -145,5 +148,10 @@ class regex_t : noncopyable_t {
adapters::bytecode_ptr_t code_; adapters::bytecode_ptr_t code_;
}; };
/// Adjust a pattern so that it is anchored at both beginning and end.
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
/// (e.g. 10.21, on Xenial).
wcstring make_anchored(wcstring pattern);
} // namespace re } // namespace re
#endif #endif