Enable case-insensitive substring fuzzy matching

Adds a new match mode for `string_fuzzy_match_t` that matches against a
case-insensitive subsequence within a string, e.g. `LL` now (partially)
matches against `hello`. This is implemented as a separate mode, given a
lower priority of match than a same-case match (when present).

Note that `fuzzy_match_subsequence_insertions_only` has purposely not
been extended with a case-insensitive version as that would be a)
unlikely to match often, and b) adding a second inefficient fuzzy search
to something that's queried a lot. Perhaps `subsequence_insertions_only`
can simply be changed to be a case-insensitive comparison in the future?

Closes #1196. Affects #3978.
This commit is contained in:
Mahmoud Al-Qudsi 2018-10-16 21:45:04 -05:00
parent bb829075d8
commit dfe6bc531e
2 changed files with 10 additions and 0 deletions

View file

@ -1835,6 +1835,13 @@ string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
assert(match_against.size() >= string.size()); assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size(); result.match_distance_first = match_against.size() - string.size();
result.match_distance_second = location; // prefer earlier matches result.match_distance_second = location; // prefer earlier matches
} else if (limit_type >= fuzzy_match_substring &&
(location = ifind(match_against, string)) != wcstring::npos) {
// A case-insensitive version of the string is in the match against.
result.type = fuzzy_match_substring_case_insensitive;
assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size();
result.match_distance_second = location; // prefer earlier matches
} else if (limit_type >= fuzzy_match_subsequence_insertions_only && } else if (limit_type >= fuzzy_match_subsequence_insertions_only &&
subsequence_in_string(string, match_against)) { subsequence_in_string(string, match_against)) {
result.type = fuzzy_match_subsequence_insertions_only; result.type = fuzzy_match_subsequence_insertions_only;

View file

@ -399,6 +399,9 @@ enum fuzzy_match_type_t {
// We match a substring of the string: OOBA matches FOOBAR. // We match a substring of the string: OOBA matches FOOBAR.
fuzzy_match_substring, fuzzy_match_substring,
// We match a substring of the string: ooBA matches FOOBAR.
fuzzy_match_substring_case_insensitive,
// A subsequence match with insertions only: FBR matches FOOBAR. // A subsequence match with insertions only: FBR matches FOOBAR.
fuzzy_match_subsequence_insertions_only, fuzzy_match_subsequence_insertions_only,