Implement Linear glob match #4094

This commit is contained in:
hazem samir 2017-12-06 08:15:43 +02:00 committed by ridiculousfish
parent ae700c8707
commit c45d4abaef

View file

@ -93,54 +93,63 @@ bool wildcard_has(const wcstring &str, bool internal) {
/// \param wc The wildcard.
/// \param is_first Whether files beginning with dots should not be matched against wildcards.
static enum fuzzy_match_type_t wildcard_match_internal(const wchar_t *str, const wchar_t *wc,
bool leading_dots_fail_to_match,
bool is_first) {
if (*str == 0 && *wc == 0) {
return fuzzy_match_exact; // we're done
}
bool leading_dots_fail_to_match) {
// Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow
// literal matches.
if (leading_dots_fail_to_match && is_first && (!wcscmp(str, L".") || !wcscmp(str, L".."))) {
if (leading_dots_fail_to_match && (!wcscmp(str, L".") || !wcscmp(str, L".."))) {
// The string is '.' or '..'. Return true if the wildcard exactly matches.
return wcscmp(str, wc) ? fuzzy_match_none : fuzzy_match_exact;
}
if (*wc == ANY_STRING || *wc == ANY_STRING_RECURSIVE) {
// Ignore hidden file
if (leading_dots_fail_to_match && is_first && *str == L'.') {
return fuzzy_match_none;
}
// Near Linear implementation as proposed here https://research.swtch.com/glob.
const wchar_t *wc_x = wc;
const wchar_t *str_x = str;
const wchar_t *restart_wc_x = wc;
const wchar_t *restart_str_x = str;
bool restart_is_out_of_str = false;
for (; *wc_x != 0 || *str_x != 0;) {
bool is_first = (str_x == str);
if (*wc_x != 0) {
if (*wc_x == ANY_STRING || *wc_x == ANY_STRING_RECURSIVE) {
// Ignore hidden file
if (leading_dots_fail_to_match && is_first && *str == L'.') {
return fuzzy_match_none;
}
// Common case of * at the end. In that case we can early out since we know it will match.
if (wc[1] == L'\0') {
return fuzzy_match_exact;
}
// Try all submatches.
do {
enum fuzzy_match_type_t subresult =
wildcard_match_internal(str, wc + 1, leading_dots_fail_to_match, false);
if (subresult != fuzzy_match_none) {
return subresult;
// Common case of * at the end. In that case we can early out since we know it will match.
if (wc_x[1] == L'\0') {
return fuzzy_match_exact;
}
// Try to match at str_x.
// If that doesn't work out, restart at str_x+1 next.
restart_wc_x = wc_x;
restart_str_x = str_x + 1;
restart_is_out_of_str = (*str_x == 0);
wc_x++;
continue;
} else if (*wc_x == ANY_CHAR && *str_x != 0) {
if (is_first && *str_x == L'.') {
return fuzzy_match_none;
}
wc_x++;
str_x++;
continue;
} else if (*str_x != 0 && *str_x == *wc_x) { // ordinary character
wc_x++;
str_x++;
continue;
}
} while (*str++ != 0);
return fuzzy_match_none;
} else if (*str == 0) {
// End of string, but not end of wildcard, and the next wildcard element is not a '*', so
// this is not a match.
return fuzzy_match_none;
} else if (*wc == ANY_CHAR) {
if (is_first && *str == L'.') {
return fuzzy_match_none;
}
return wildcard_match_internal(str + 1, wc + 1, leading_dots_fail_to_match, false);
} else if (*wc == *str) {
return wildcard_match_internal(str + 1, wc + 1, leading_dots_fail_to_match, false);
// Mismatch. Maybe restart.
if (restart_str_x != str && !restart_is_out_of_str) {
wc_x = restart_wc_x;
str_x = restart_str_x;
continue;
}
return fuzzy_match_none;
}
return fuzzy_match_none;
// Matched all of pattern to all of name. Success.
return fuzzy_match_exact;
}
// This does something horrible refactored from an even more horrible function.
@ -312,7 +321,7 @@ bool wildcard_complete(const wcstring &str, const wchar_t *wc, const wchar_t *de
bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match) {
enum fuzzy_match_type_t match = wildcard_match_internal(
str.c_str(), wc.c_str(), leading_dots_fail_to_match, true /* first */);
str.c_str(), wc.c_str(), leading_dots_fail_to_match);
return match != fuzzy_match_none;
}