using System;
using System.Text;
namespace OpenTap.Package
{
internal class FuzzyMatcher
{
public class Match
{
public Match(string candidate, int score)
{
Candidate = candidate;
Score = score;
}
public string Candidate { get; set; }
public int Score { get; set; }
public override string ToString()
{
return $"{Candidate}: {Score}";
}
}
public string Input { get; }
public FuzzyMatcher(string input, int maxThreshhold)
{
Input = preprocess(input);
_maxThreshhold = maxThreshhold;
}
///
/// Remove all punctuation and whitespace from the input, and lowercase it
///
///
///
private string preprocess(string str)
{
var sb = new StringBuilder(str.Length);
foreach (var ch in str)
{
if (char.IsLetterOrDigit(ch))
sb.Append(ch);
}
return sb.ToString().ToLower();
}
// Only consider candidates that are within an edit distance of 3 from the input
// E.g. the input string can be transformed to the candidate with 3 changes (adding, removing, or replacing a character)
private int _maxThreshhold;
private int LevensteinDistance(string candidateString, string inputString, int x, int y, int score)
{
if (score > _maxThreshhold) return score;
if (x == candidateString.Length) return score + inputString.Length - y;
if (y == inputString.Length) return score + candidateString.Length - x;
if (candidateString[x] == inputString[y]) return LevensteinDistance(candidateString, inputString, x + 1, y + 1, score);
return Math.Min(
LevensteinDistance(candidateString, inputString, x, y + 1, score + 1), // insert
Math.Min(
LevensteinDistance(candidateString, inputString, x + 1, y, score + 1), // remove
LevensteinDistance(candidateString, inputString, x + 1, y + 1, score + 1))); // replace
}
///
/// Calculate how similar a candidate is to the input
///
///
///
public Match Score(string candidate)
{
var score = 0;
if (string.IsNullOrWhiteSpace(candidate)) return new Match(candidate, 0);
var prep = preprocess(candidate);
if (prep.Contains(Input))
{
if (prep == Input)
return new Match(candidate, 0);
// If the input is a prefix of the package name, it is considered a very good candidate
if (prep.StartsWith(Input) || prep.EndsWith(Input))
return new Match(candidate, 1);
// If the input is a substring of the package name, it is a pretty good candiate
return new Match(candidate, 2);
}
// If the candidate is a substring of the input, it is still worth considering if there are no better options.
else if (Input.Contains(prep))
{
return new Match(candidate, 3);
}
// Otherwise, the suitability is determined by the edit distance
// E.g. how many modifications do we need to make to the candidate to turn it into the input
score = LevensteinDistance(prep, Input, 0, 0, 0);
return new Match(candidate, score);
}
}
}