using System; using System.Text; namespace OpenTap.Package { internal class FuzzyMatcher { public class Match { public Match(string candidate, int score) { Candidate = candidate; Score = score; } public string Candidate { get; set; } public int Score { get; set; } public override string ToString() { return $"{Candidate}: {Score}"; } } public string Input { get; } public FuzzyMatcher(string input, int maxThreshhold) { Input = preprocess(input); _maxThreshhold = maxThreshhold; } /// /// Remove all punctuation and whitespace from the input, and lowercase it /// /// /// private string preprocess(string str) { var sb = new StringBuilder(str.Length); foreach (var ch in str) { if (char.IsLetterOrDigit(ch)) sb.Append(ch); } return sb.ToString().ToLower(); } // Only consider candidates that are within an edit distance of 3 from the input // E.g. the input string can be transformed to the candidate with 3 changes (adding, removing, or replacing a character) private int _maxThreshhold; private int LevensteinDistance(string candidateString, string inputString, int x, int y, int score) { if (score > _maxThreshhold) return score; if (x == candidateString.Length) return score + inputString.Length - y; if (y == inputString.Length) return score + candidateString.Length - x; if (candidateString[x] == inputString[y]) return LevensteinDistance(candidateString, inputString, x + 1, y + 1, score); return Math.Min( LevensteinDistance(candidateString, inputString, x, y + 1, score + 1), // insert Math.Min( LevensteinDistance(candidateString, inputString, x + 1, y, score + 1), // remove LevensteinDistance(candidateString, inputString, x + 1, y + 1, score + 1))); // replace } /// /// Calculate how similar a candidate is to the input /// /// /// public Match Score(string candidate) { var score = 0; if (string.IsNullOrWhiteSpace(candidate)) return new Match(candidate, 0); var prep = preprocess(candidate); if (prep.Contains(Input)) { if (prep == Input) return new Match(candidate, 0); // If the input is a prefix of the package name, it is considered a very good candidate if (prep.StartsWith(Input) || prep.EndsWith(Input)) return new Match(candidate, 1); // If the input is a substring of the package name, it is a pretty good candiate return new Match(candidate, 2); } // If the candidate is a substring of the input, it is still worth considering if there are no better options. else if (Input.Contains(prep)) { return new Match(candidate, 3); } // Otherwise, the suitability is determined by the edit distance // E.g. how many modifications do we need to make to the candidate to turn it into the input score = LevensteinDistance(prep, Input, 0, 0, 0); return new Match(candidate, score); } } }