mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-28 05:43:11 +00:00
Migrate string_fuzzy_match from common.h to wcstringutil.h
This is a more appropriate location for this functionality. Also take this opportunity to clean up subsequence_in_string.
This commit is contained in:
parent
639cd66ba1
commit
9144141ded
5 changed files with 185 additions and 187 deletions
|
@ -1634,101 +1634,6 @@ bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t e
|
|||
return success;
|
||||
}
|
||||
|
||||
/// Returns true if seq, represented as a subsequence, is contained within string.
|
||||
static bool subsequence_in_string(const wcstring &seq, const wcstring &str) {
|
||||
// Impossible if seq is larger than string.
|
||||
if (seq.size() > str.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Empty strings are considered to be subsequences of everything.
|
||||
if (seq.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t str_idx, seq_idx;
|
||||
for (seq_idx = str_idx = 0; seq_idx < seq.size() && str_idx < str.size(); seq_idx++) {
|
||||
wchar_t c = seq.at(seq_idx);
|
||||
size_t char_loc = str.find(c, str_idx);
|
||||
if (char_loc == wcstring::npos) {
|
||||
break; // didn't find this character
|
||||
} else {
|
||||
str_idx = char_loc + 1; // we found it, continue the search just after it
|
||||
}
|
||||
}
|
||||
|
||||
// We succeeded if we exhausted our sequence.
|
||||
assert(seq_idx <= seq.size());
|
||||
return seq_idx == seq.size();
|
||||
}
|
||||
|
||||
string_fuzzy_match_t::string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first,
|
||||
size_t distance_second)
|
||||
: type(t), match_distance_first(distance_first), match_distance_second(distance_second) {}
|
||||
|
||||
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
|
||||
const wcstring &match_against,
|
||||
fuzzy_match_type_t limit_type) {
|
||||
// Distances are generally the amount of text not matched.
|
||||
string_fuzzy_match_t result(fuzzy_match_none, 0, 0);
|
||||
size_t location;
|
||||
if (limit_type >= fuzzy_match_exact && string == match_against) {
|
||||
result.type = fuzzy_match_exact;
|
||||
} else if (limit_type >= fuzzy_match_prefix && string_prefixes_string(string, match_against)) {
|
||||
result.type = fuzzy_match_prefix;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
} else if (limit_type >= fuzzy_match_case_insensitive &&
|
||||
wcscasecmp(string.c_str(), match_against.c_str()) == 0) {
|
||||
result.type = fuzzy_match_case_insensitive;
|
||||
} else if (limit_type >= fuzzy_match_prefix_case_insensitive &&
|
||||
string_prefixes_string_case_insensitive(string, match_against)) {
|
||||
result.type = fuzzy_match_prefix_case_insensitive;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
} else if (limit_type >= fuzzy_match_substring &&
|
||||
(location = match_against.find(string)) != wcstring::npos) {
|
||||
// String is contained within match against.
|
||||
result.type = fuzzy_match_substring;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
result.match_distance_second = location; // prefer earlier matches
|
||||
} else if (limit_type >= fuzzy_match_substring_case_insensitive &&
|
||||
(location = ifind(match_against, string, true)) != wcstring::npos) {
|
||||
// A case-insensitive version of the string is in the match against.
|
||||
result.type = fuzzy_match_substring_case_insensitive;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
result.match_distance_second = location; // prefer earlier matches
|
||||
} else if (limit_type >= fuzzy_match_subsequence_insertions_only &&
|
||||
subsequence_in_string(string, match_against)) {
|
||||
result.type = fuzzy_match_subsequence_insertions_only;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
// It would be nice to prefer matches with greater matching runs here.
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int compare_ints(T a, T b) {
|
||||
if (a < b) return -1;
|
||||
if (a == b) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Compare types; if the types match, compare distances.
|
||||
int string_fuzzy_match_t::compare(const string_fuzzy_match_t &rhs) const {
|
||||
if (this->type != rhs.type) {
|
||||
return compare_ints(this->type, rhs.type);
|
||||
} else if (this->match_distance_first != rhs.match_distance_first) {
|
||||
return compare_ints(this->match_distance_first, rhs.match_distance_first);
|
||||
} else if (this->match_distance_second != rhs.match_distance_second) {
|
||||
return compare_ints(this->match_distance_second, rhs.match_distance_second);
|
||||
}
|
||||
return 0; // equal
|
||||
}
|
||||
|
||||
[[gnu::noinline]] void bugreport() {
|
||||
FLOG(error, _(L"This is a bug. Break on 'bugreport' to debug."));
|
||||
FLOG(error, _(L"If you can reproduce it, please report: "), PACKAGE_BUGREPORT, L'.');
|
||||
|
|
92
src/common.h
92
src/common.h
|
@ -288,98 +288,6 @@ wcstring str2wcstring(const std::string &in, size_t len);
|
|||
/// area.
|
||||
std::string wcs2string(const wcstring &input);
|
||||
|
||||
enum fuzzy_match_type_t {
|
||||
// We match the string exactly: FOOBAR matches FOOBAR.
|
||||
fuzzy_match_exact = 0,
|
||||
|
||||
// We match a prefix of the string: FO matches FOOBAR.
|
||||
fuzzy_match_prefix,
|
||||
|
||||
// We match the string exactly, but in a case insensitive way: foobar matches FOOBAR.
|
||||
fuzzy_match_case_insensitive,
|
||||
|
||||
// We match a prefix of the string, in a case insensitive way: foo matches FOOBAR.
|
||||
fuzzy_match_prefix_case_insensitive,
|
||||
|
||||
// We match a substring of the string: OOBA matches FOOBAR.
|
||||
fuzzy_match_substring,
|
||||
|
||||
// We match a substring of the string: ooBA matches FOOBAR.
|
||||
fuzzy_match_substring_case_insensitive,
|
||||
|
||||
// A subsequence match with insertions only: FBR matches FOOBAR.
|
||||
fuzzy_match_subsequence_insertions_only,
|
||||
|
||||
// We don't match the string.
|
||||
fuzzy_match_none
|
||||
};
|
||||
|
||||
/// Indicates where a match type requires replacing the entire token.
|
||||
static inline bool match_type_requires_full_replacement(fuzzy_match_type_t t) {
|
||||
switch (t) {
|
||||
case fuzzy_match_exact:
|
||||
case fuzzy_match_prefix: {
|
||||
return false;
|
||||
}
|
||||
case fuzzy_match_case_insensitive:
|
||||
case fuzzy_match_prefix_case_insensitive:
|
||||
case fuzzy_match_substring:
|
||||
case fuzzy_match_substring_case_insensitive:
|
||||
case fuzzy_match_subsequence_insertions_only:
|
||||
case fuzzy_match_none: {
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
DIE("Unreachable");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicates where a match shares a prefix with the string it matches.
|
||||
static inline bool match_type_shares_prefix(fuzzy_match_type_t t) {
|
||||
switch (t) {
|
||||
case fuzzy_match_exact:
|
||||
case fuzzy_match_prefix:
|
||||
case fuzzy_match_case_insensitive:
|
||||
case fuzzy_match_prefix_case_insensitive: {
|
||||
return true;
|
||||
}
|
||||
case fuzzy_match_substring:
|
||||
case fuzzy_match_substring_case_insensitive:
|
||||
case fuzzy_match_subsequence_insertions_only:
|
||||
case fuzzy_match_none: {
|
||||
return false;
|
||||
}
|
||||
default: {
|
||||
DIE("Unreachabe");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if string is a fuzzy match to another.
|
||||
struct string_fuzzy_match_t {
|
||||
enum fuzzy_match_type_t type;
|
||||
|
||||
// Strength of the match. The value depends on the type. Lower is stronger.
|
||||
size_t match_distance_first;
|
||||
size_t match_distance_second;
|
||||
|
||||
// Constructor.
|
||||
explicit string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first = 0,
|
||||
size_t distance_second = 0);
|
||||
|
||||
// Return -1, 0, 1 if this match is (respectively) better than, equal to, or worse than rhs.
|
||||
int compare(const string_fuzzy_match_t &rhs) const;
|
||||
};
|
||||
|
||||
/// Compute a fuzzy match for a string. If maximum_match is not fuzzy_match_none, limit the type to
|
||||
/// matches at or below that type.
|
||||
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
|
||||
const wcstring &match_against,
|
||||
fuzzy_match_type_t limit_type = fuzzy_match_none);
|
||||
|
||||
// Check if we are running in the test mode, where we should suppress error output
|
||||
#define TESTS_PROGRAM_NAME L"(ignore)"
|
||||
bool should_suppress_stderr_for_tests();
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include "common.h"
|
||||
#include "enum_set.h"
|
||||
#include "wcstringutil.h"
|
||||
|
||||
struct completion_mode_t {
|
||||
/// If set, skip file completions.
|
||||
|
|
|
@ -130,6 +130,97 @@ bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
|
|||
proposed_suffix.c_str(), suffix_size) == 0;
|
||||
}
|
||||
|
||||
/// Returns true if needle, represented as a subsequence, is contained within haystack.
|
||||
/// Note subsequence is not substring: "foo" is a subsequence of "follow" for example.
|
||||
static bool subsequence_in_string(const wcstring &needle, const wcstring &haystack) {
|
||||
// Impossible if haystack is larger than string.
|
||||
if (haystack.size() > haystack.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Empty strings are considered to be subsequences of everything.
|
||||
if (needle.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto ni = needle.begin();
|
||||
for (auto hi = haystack.begin(); ni != needle.end() && hi != haystack.end(); ++hi) {
|
||||
if (*ni == *hi) {
|
||||
++ni;
|
||||
}
|
||||
}
|
||||
// We succeeded if we exhausted our sequence.
|
||||
assert(ni <= needle.end());
|
||||
return ni == needle.end();
|
||||
}
|
||||
|
||||
string_fuzzy_match_t::string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first,
|
||||
size_t distance_second)
|
||||
: type(t), match_distance_first(distance_first), match_distance_second(distance_second) {}
|
||||
|
||||
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
|
||||
const wcstring &match_against,
|
||||
fuzzy_match_type_t limit_type) {
|
||||
// Distances are generally the amount of text not matched.
|
||||
string_fuzzy_match_t result(fuzzy_match_none, 0, 0);
|
||||
size_t location;
|
||||
if (limit_type >= fuzzy_match_exact && string == match_against) {
|
||||
result.type = fuzzy_match_exact;
|
||||
} else if (limit_type >= fuzzy_match_prefix && string_prefixes_string(string, match_against)) {
|
||||
result.type = fuzzy_match_prefix;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
} else if (limit_type >= fuzzy_match_case_insensitive &&
|
||||
wcscasecmp(string.c_str(), match_against.c_str()) == 0) {
|
||||
result.type = fuzzy_match_case_insensitive;
|
||||
} else if (limit_type >= fuzzy_match_prefix_case_insensitive &&
|
||||
string_prefixes_string_case_insensitive(string, match_against)) {
|
||||
result.type = fuzzy_match_prefix_case_insensitive;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
} else if (limit_type >= fuzzy_match_substring &&
|
||||
(location = match_against.find(string)) != wcstring::npos) {
|
||||
// String is contained within match against.
|
||||
result.type = fuzzy_match_substring;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
result.match_distance_second = location; // prefer earlier matches
|
||||
} else if (limit_type >= fuzzy_match_substring_case_insensitive &&
|
||||
(location = ifind(match_against, string, true)) != wcstring::npos) {
|
||||
// A case-insensitive version of the string is in the match against.
|
||||
result.type = fuzzy_match_substring_case_insensitive;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
result.match_distance_second = location; // prefer earlier matches
|
||||
} else if (limit_type >= fuzzy_match_subsequence_insertions_only &&
|
||||
subsequence_in_string(string, match_against)) {
|
||||
result.type = fuzzy_match_subsequence_insertions_only;
|
||||
assert(match_against.size() >= string.size());
|
||||
result.match_distance_first = match_against.size() - string.size();
|
||||
// It would be nice to prefer matches with greater matching runs here.
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int compare_ints(T a, T b) {
|
||||
if (a < b) return -1;
|
||||
if (a == b) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Compare types; if the types match, compare distances.
|
||||
int string_fuzzy_match_t::compare(const string_fuzzy_match_t &rhs) const {
|
||||
if (this->type != rhs.type) {
|
||||
return compare_ints(this->type, rhs.type);
|
||||
} else if (this->match_distance_first != rhs.match_distance_first) {
|
||||
return compare_ints(this->match_distance_first, rhs.match_distance_first);
|
||||
} else if (this->match_distance_second != rhs.match_distance_second) {
|
||||
return compare_ints(this->match_distance_second, rhs.match_distance_second);
|
||||
}
|
||||
return 0; // equal
|
||||
}
|
||||
|
||||
template <bool Fuzzy, typename T>
|
||||
size_t ifind_impl(const T &haystack, const T &needle) {
|
||||
using char_t = typename T::value_type;
|
||||
|
|
|
@ -35,6 +35,99 @@ bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,
|
|||
size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy = false);
|
||||
size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy = false);
|
||||
|
||||
// Ways that a string may fuzzily match another.
|
||||
enum fuzzy_match_type_t {
|
||||
// We match the string exactly: FOOBAR matches FOOBAR.
|
||||
fuzzy_match_exact = 0,
|
||||
|
||||
// We match a prefix of the string: FO matches FOOBAR.
|
||||
fuzzy_match_prefix,
|
||||
|
||||
// We match the string exactly, but in a case insensitive way: foobar matches FOOBAR.
|
||||
fuzzy_match_case_insensitive,
|
||||
|
||||
// We match a prefix of the string, in a case insensitive way: foo matches FOOBAR.
|
||||
fuzzy_match_prefix_case_insensitive,
|
||||
|
||||
// We match a substring of the string: OOBA matches FOOBAR.
|
||||
fuzzy_match_substring,
|
||||
|
||||
// We match a substring of the string: ooBA matches FOOBAR.
|
||||
fuzzy_match_substring_case_insensitive,
|
||||
|
||||
// A subsequence match with insertions only: FBR matches FOOBAR.
|
||||
fuzzy_match_subsequence_insertions_only,
|
||||
|
||||
// We don't match the string.
|
||||
fuzzy_match_none
|
||||
};
|
||||
|
||||
/// Indicates where a match type requires replacing the entire token.
|
||||
static inline bool match_type_requires_full_replacement(fuzzy_match_type_t t) {
|
||||
switch (t) {
|
||||
case fuzzy_match_exact:
|
||||
case fuzzy_match_prefix: {
|
||||
return false;
|
||||
}
|
||||
case fuzzy_match_case_insensitive:
|
||||
case fuzzy_match_prefix_case_insensitive:
|
||||
case fuzzy_match_substring:
|
||||
case fuzzy_match_substring_case_insensitive:
|
||||
case fuzzy_match_subsequence_insertions_only:
|
||||
case fuzzy_match_none: {
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
DIE("Unreachable");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicates where a match shares a prefix with the string it matches.
|
||||
static inline bool match_type_shares_prefix(fuzzy_match_type_t t) {
|
||||
switch (t) {
|
||||
case fuzzy_match_exact:
|
||||
case fuzzy_match_prefix:
|
||||
case fuzzy_match_case_insensitive:
|
||||
case fuzzy_match_prefix_case_insensitive: {
|
||||
return true;
|
||||
}
|
||||
case fuzzy_match_substring:
|
||||
case fuzzy_match_substring_case_insensitive:
|
||||
case fuzzy_match_subsequence_insertions_only:
|
||||
case fuzzy_match_none: {
|
||||
return false;
|
||||
}
|
||||
default: {
|
||||
DIE("Unreachable");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if string is a fuzzy match to another.
|
||||
struct string_fuzzy_match_t {
|
||||
enum fuzzy_match_type_t type;
|
||||
|
||||
// Strength of the match. The value depends on the type. Lower is stronger.
|
||||
size_t match_distance_first;
|
||||
size_t match_distance_second;
|
||||
|
||||
// Constructor.
|
||||
explicit string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first = 0,
|
||||
size_t distance_second = 0);
|
||||
|
||||
// Return -1, 0, 1 if this match is (respectively) better than, equal to, or worse than rhs.
|
||||
int compare(const string_fuzzy_match_t &rhs) const;
|
||||
};
|
||||
|
||||
/// Compute a fuzzy match for a string. If maximum_match is not fuzzy_match_none, limit the type to
|
||||
/// matches at or below that type.
|
||||
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
|
||||
const wcstring &match_against,
|
||||
fuzzy_match_type_t limit_type = fuzzy_match_none);
|
||||
|
||||
/// Split a string by a separator character.
|
||||
wcstring_list_t split_string(const wcstring &val, wchar_t sep);
|
||||
|
||||
|
|
Loading…
Reference in a new issue