Merge pull request #4668 from corneliusroemer/fix-4660

fix(suggestions): Replace wrong Jaro-Winkler
This commit is contained in:
Ed Page 2023-01-23 15:55:59 -06:00 committed by GitHub
commit 401d86fb9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,10 +4,9 @@ use std::cmp::Ordering;
// Internal // Internal
use crate::builder::Command; use crate::builder::Command;
/// Produces multiple strings from a given list of possible values which are similar /// Find strings from an iterable of `possible_values` similar to a given value `v`
/// to the passed in value `v` within a certain confidence by least confidence. /// Returns a Vec of all possible values that exceed a similarity threshold
/// Thus in a list of possible values like ["foo", "bar"], the value "fop" will yield /// sorted by ascending similarity, most similar comes last
/// `Some("foo")`, whereas "blark" would yield `None`.
#[cfg(feature = "suggestions")] #[cfg(feature = "suggestions")]
pub(crate) fn did_you_mean<T, I>(v: &str, possible_values: I) -> Vec<String> pub(crate) fn did_you_mean<T, I>(v: &str, possible_values: I) -> Vec<String>
where where
@ -16,8 +15,11 @@ where
{ {
let mut candidates: Vec<(f64, String)> = possible_values let mut candidates: Vec<(f64, String)> = possible_values
.into_iter() .into_iter()
.map(|pv| (strsim::jaro_winkler(v, pv.as_ref()), pv.as_ref().to_owned())) // GH #4660: using `jaro` because `jaro_winkler` implementation in `strsim-rs` is wrong
.filter(|(confidence, _)| *confidence > 0.8) // causing strings with common prefix >=10 to be considered perfectly similar
.map(|pv| (strsim::jaro(v, pv.as_ref()), pv.as_ref().to_owned()))
// Confidence of 0.7 so that bar -> baz is suggested
.filter(|(confidence, _)| *confidence > 0.7)
.collect(); .collect();
candidates.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal)); candidates.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
candidates.into_iter().map(|(_, pv)| pv).collect() candidates.into_iter().map(|(_, pv)| pv).collect()
@ -112,6 +114,15 @@ mod test {
); );
} }
#[test]
fn best_fit_long_common_prefix_issue_4660() {
let p_vals = ["alignmentScore", "alignmentStart"];
assert_eq!(
did_you_mean("alignmentScorr", p_vals.iter()),
vec!["alignmentStart", "alignmentScore"]
);
}
#[test] #[test]
fn flag_missing_letter() { fn flag_missing_letter() {
let p_vals = ["test", "possible", "values"]; let p_vals = ["test", "possible", "values"];