mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-29 06:13:20 +00:00
Introduce re::make_anchored
This allows adjusting a pattern string so that it matches an entire string, by wrapping the regex in a group like ^(?:...)$ This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on PCRE2 prior to 2017, so we have to adjust the pattern instead. Also introduce an overload of match() which creates its own match_data_t.
This commit is contained in:
parent
fe7d095647
commit
d2daa921e9
3 changed files with 38 additions and 0 deletions
|
@ -6867,6 +6867,21 @@ static void test_re_basic() {
|
||||||
}
|
}
|
||||||
do_test(join_strings(matches, L',') == L"AA,CC,11");
|
do_test(join_strings(matches, L',') == L"AA,CC,11");
|
||||||
do_test(join_strings(captures, L',') == L"A,C,1");
|
do_test(join_strings(captures, L',') == L"A,C,1");
|
||||||
|
|
||||||
|
// Test make_anchored
|
||||||
|
re = regex_t::try_compile(make_anchored(L"ab(.+?)"));
|
||||||
|
do_test(re.has_value());
|
||||||
|
do_test(!re->match(L""));
|
||||||
|
do_test(!re->match(L"ab"));
|
||||||
|
do_test((re->match(L"abcd") == match_range_t{0, 4}));
|
||||||
|
do_test((re->match(L"abcdefghij") == match_range_t{0, 10}));
|
||||||
|
|
||||||
|
re = regex_t::try_compile(make_anchored(L"(a+)|(b+)"));
|
||||||
|
do_test(re.has_value());
|
||||||
|
do_test(!re->match(L""));
|
||||||
|
do_test(!re->match(L"aabb"));
|
||||||
|
do_test((re->match(L"aaaa") == match_range_t{0, 4}));
|
||||||
|
do_test((re->match(L"bbbb") == match_range_t{0, 4}));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_re_reset() {
|
static void test_re_reset() {
|
||||||
|
|
15
src/re.cpp
15
src/re.cpp
|
@ -130,6 +130,11 @@ maybe_t<match_range_t> regex_t::match(match_data_t &md, const wcstring &subject)
|
||||||
return match_range_t{ovector[0], ovector[1]};
|
return match_range_t{ovector[0], ovector[1]};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
maybe_t<match_range_t> regex_t::match(const wcstring &subject) const {
|
||||||
|
match_data_t md = this->prepare();
|
||||||
|
return this->match(md, subject);
|
||||||
|
}
|
||||||
|
|
||||||
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
|
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
|
||||||
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
|
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
|
||||||
return none();
|
return none();
|
||||||
|
@ -288,3 +293,13 @@ regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) {
|
||||||
}
|
}
|
||||||
|
|
||||||
wcstring re_error_t::message() const { return message_for_code(this->code); }
|
wcstring re_error_t::message() const { return message_for_code(this->code); }
|
||||||
|
|
||||||
|
wcstring re::make_anchored(wcstring pattern) {
|
||||||
|
// PATTERN -> ^(:?PATTERN)$.
|
||||||
|
const wchar_t *prefix = L"^(?:";
|
||||||
|
const wchar_t *suffix = L")$";
|
||||||
|
pattern.reserve(pattern.size() + wcslen(prefix) + wcslen(suffix));
|
||||||
|
pattern.insert(0, prefix);
|
||||||
|
pattern.append(suffix);
|
||||||
|
return pattern;
|
||||||
|
}
|
||||||
|
|
8
src/re.h
8
src/re.h
|
@ -111,6 +111,9 @@ class regex_t : noncopyable_t {
|
||||||
/// \return a range on a successful match, none on no match.
|
/// \return a range on a successful match, none on no match.
|
||||||
maybe_t<match_range_t> match(match_data_t &md, const wcstring &subject) const;
|
maybe_t<match_range_t> match(match_data_t &md, const wcstring &subject) const;
|
||||||
|
|
||||||
|
/// A convenience function which calls prepare() for you.
|
||||||
|
maybe_t<match_range_t> match(const wcstring &subject) const;
|
||||||
|
|
||||||
/// \return the matched range for an indexed or named capture group. 0 means the entire match.
|
/// \return the matched range for an indexed or named capture group. 0 means the entire match.
|
||||||
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
|
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
|
||||||
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
|
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
|
||||||
|
@ -145,5 +148,10 @@ class regex_t : noncopyable_t {
|
||||||
adapters::bytecode_ptr_t code_;
|
adapters::bytecode_ptr_t code_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Adjust a pattern so that it is anchored at both beginning and end.
|
||||||
|
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
|
||||||
|
/// (e.g. 10.21, on Xenial).
|
||||||
|
wcstring make_anchored(wcstring pattern);
|
||||||
|
|
||||||
} // namespace re
|
} // namespace re
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue