Addtional ToAlpha work for edge cases.

This commit is contained in:
Steven Hildreth 2019-07-02 10:25:21 -05:00
parent 7295095460
commit 6bb4e8671c
4 changed files with 102 additions and 23 deletions

View file

@ -80,7 +80,7 @@ namespace Roadie.Library.Tests
// using (var context = new RoadieDbContext(optionsBuilder.Options))
// {
// var now = DateTime.UtcNow;
// foreach(var release in context.Releases)
// foreach (var release in context.Releases)
// {
// var releaseModel = release.Adapt<Roadie.Library.Models.Releases.Release>();
// var specialReleaseTitle = release.Title.ToAlphanumericName();

View file

@ -72,12 +72,27 @@ namespace Roadie.Library.Tests
[InlineData("Ringo Starr And His All-Starr Band", "ringostarrandhisallstarrband")]
[InlineData("Leslie & Tom", "leslieandtom")]
[InlineData(" Leslie & Tom", "leslieandtom")]
[InlineData("C o l i n H a y", "colinhay")]
[InlineData("ColinHay", "colinhay")]
[InlineData("Colin Hay!", "colinhay")]
[InlineData("colinhay", "colinhay")]
[InlineData("COLINHAY", "colinhay")]
[InlineData("C.O!L&quot;I$N⌐HƒAY;", "colinhay")]
[InlineData(" Leslie &amp; Tom", "leslieandtom")]
[InlineData("<b>Leslie &amp; &#32;&#32; Tom</b>", "leslieandtom")]
[InlineData("Leslie;/&/;Tom", "leslieandtom")]
[InlineData("Leslie And Tom", "leslieandtom")]
[InlineData("L≈esl|ie ƒand T╗om╣;", "leslieandtom")]
[InlineData("Leslie Tom", "leslietom")]
[InlineData("Hüsker Dü", "huskerdu")]
[InlineData("Motörhead", "motorhead")] //
[InlineData("Motörhead", "motorhead")]
[InlineData("Alright, Still", "alrightstill")]
[InlineData("Something, SOMETHING & somEthing!", "somethingsomethingandsomething")]
[InlineData("Something, SOMETHING & somEthing!", "somethingsomethingandsomething")]
[InlineData("comfort y mãºsica para volar", "comfortymasicaparavolar")]
[InlineData("canciã³n animal", "canciananimal")]
[InlineData("Xylø", "xyloe")]
[InlineData("Метель", "metel")]
[InlineData("Svartidauði", "svartidaudhi")]
public void ToAlphanumericNameShouldStripAndMatch(string input, string shouldBe)
{
var t = input.ToAlphanumericName();

View file

@ -1,9 +1,11 @@
using Roadie.Library.Configuration;
using HtmlAgilityPack;
using Roadie.Library.Configuration;
using Roadie.Library.Utility;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
@ -11,6 +13,29 @@ namespace Roadie.Library.Extensions
{
public static class StringExt
{
private static readonly Dictionary<char, string> UnicodeAccents = new Dictionary<char, string>() {
{'À', "A"}, {'Á', "A"}, {'Â', "A"}, {'Ã', "A"}, {'Ä', "Ae"}, {'Å', "A"}, {'Æ', "Ae"},
{'Ç', "C"},
{'È', "E"}, {'É', "E"}, {'Ê', "E"}, {'Ë', "E"},
{'Ì', "I"}, {'Í', "I"}, {'Î', "I"}, {'Ï', "I"},
{'Ð', "Dh"}, {'Þ', "Th"},
{'Ñ', "N"},
{'Ò', "O"}, {'Ó', "O"}, {'Ô', "O"}, {'Õ', "O"}, {'Ö', "Oe"}, {'Ø', "Oe"},
{'Ù', "U"}, {'Ú', "U"}, {'Û', "U"}, {'Ü', "Ue"},
{'Ý', "Y"},
{'ß', "ss"},
{'à', "a"}, {'á', "a"}, {'â', "a"}, {'ã', "a"}, {'ä', "ae"}, {'å', "a"}, {'æ', "ae"},
{'ç', "c"},
{'è', "e"}, {'é', "e"}, {'ê', "e"}, {'ë', "e"},
{'ì', "i"}, {'í', "i"}, {'î', "i"}, {'ï', "i"},
{'ð', "dh"}, {'þ', "th"},
{'ñ', "n"},
{'ò', "o"}, {'ó', "o"}, {'ô', "o"}, {'õ', "o"}, {'ö', "oe"}, {'ø', "oe"},
{'ù', "u"}, {'ú', "u"}, {'û', "u"}, {'ü', "ue"},
{'ý', "y"}, {'ÿ', "y"}
};
public static string AddToDelimitedList(this string input, IEnumerable<string> values, char delimiter = '|')
{
if (string.IsNullOrEmpty(input) && (values == null || !values.Any()))
@ -201,6 +226,20 @@ namespace Roadie.Library.Extensions
return input;
}
public static string RemoveUnicodeAccents(this string text)
{
return text.Aggregate(
new StringBuilder(),
(sb, c) => {
string r;
if (UnicodeAccents.TryGetValue(c, out r))
{
return sb.Append(r);
}
return sb.Append(c);
}).ToString();
}
public static String RemoveDiacritics(this string s)
{
String normalizedString = s.Normalize(NormalizationForm.FormD);
@ -216,16 +255,41 @@ namespace Roadie.Library.Extensions
return stringBuilder.ToString();
}
public static string Translit(this string str)
{
string[] lat_up = { "A", "B", "V", "G", "D", "E", "Yo", "Zh", "Z", "I", "Y", "K", "L", "M", "N", "O", "P", "R", "S", "T", "U", "F", "Kh", "Ts", "Ch", "Sh", "Shch", "\"", "Y", "'", "E", "Yu", "Ya" };
string[] lat_low = { "a", "b", "v", "g", "d", "e", "yo", "zh", "z", "i", "y", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "f", "kh", "ts", "ch", "sh", "shch", "\"", "y", "'", "e", "yu", "ya" };
string[] rus_up = { "А", "Б", "В", "Г", "Д", "Е", "Ё", "Ж", "З", "И", "Й", "К", "Л", "М", "Н", "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ы", "Ь", "Э", "Ю", "Я" };
string[] rus_low = { "а", "б", "в", "г", "д", "е", "ё", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я" };
for (int i = 0; i <= 32; i++)
{
str = str.Replace(rus_up[i], lat_up[i]);
str = str.Replace(rus_low[i], lat_low[i]);
}
return str;
}
public static string ToAlphanumericName(this string input)
{
if (string.IsNullOrEmpty(input))
{
return input;
}
input = input.ToLower().Trim().Replace("&", "and");
input = WebUtility.HtmlDecode(input);
input = input.ScrubHtml().ToLower().Trim().Replace("&", "and");
char[] arr = input.ToCharArray();
arr = Array.FindAll<char>(arr, (c => (char.IsLetterOrDigit(c))));
return new string(arr).RemoveDiacritics();
input = new string(arr).RemoveDiacritics().RemoveUnicodeAccents().Translit();
input = Regex.Replace(input, @"[^A-Za-z0-9]+", "");
return input;
}
public static string ScrubHtml(this string value)
{
var step1 = Regex.Replace(value, @"<[^>]+>|&nbsp;", "").Trim();
var step2 = Regex.Replace(step1, @"\s{2,}", " ");
return step2;
}
public static string ToContentDispositionFriendly(this string input)

View file

@ -648,27 +648,29 @@ namespace Roadie.Api.Services
data.Artist artist = null;
data.Release release = null;
var searchName = csvRelease.Artist.NormalizeName();
var specialSearchName = csvRelease.Artist.ToAlphanumericName();
var artistSearchName = csvRelease.Artist.NormalizeName();
var artistSpecialSearchName = csvRelease.Artist.ToAlphanumericName();
var releaseSearchName = csvRelease.Release.NormalizeName().ToLower();
var releaseSpecialSearchName = csvRelease.Release.ToAlphanumericName();
var artistResults = (from a in DbContext.Artists
where a.Name.Contains(searchName) ||
a.SortName.Contains(searchName) ||
a.AlternateNames.Contains(searchName) ||
a.AlternateNames.Contains(specialSearchName)
where a.Name.Contains(artistSearchName) ||
a.SortName.Contains(artistSearchName) ||
a.AlternateNames.Contains(artistSearchName) ||
a.AlternateNames.Contains(artistSpecialSearchName)
select a).ToArray();
if (!artistResults.Any())
{
await LogAndPublish(
$"Unable To Find Artist [{csvRelease.Artist}], SearchName [{searchName}]",
$"Unable To Find Artist [{csvRelease.Artist}], SearchName [{artistSpecialSearchName}]",
LogLevel.Warning);
csvRelease.Status = Statuses.Missing;
DbContext.CollectionMissings.Add(new data.CollectionMissing
{
CollectionId = collection.Id,
Position = csvRelease.Position,
Artist = searchName,
Release = csvRelease.Release.NormalizeName()
Artist = artistSpecialSearchName,
Release = releaseSpecialSearchName
});
continue;
}
@ -676,13 +678,11 @@ namespace Roadie.Api.Services
foreach (var artistResult in artistResults)
{
artist = artistResult;
searchName = csvRelease.Release.NormalizeName().ToLower();
specialSearchName = csvRelease.Release.ToAlphanumericName();
release = (from r in DbContext.Releases
where r.ArtistId == artist.Id
where r.Title.Contains(searchName) ||
r.AlternateNames.Contains(searchName) ||
r.AlternateNames.Contains(specialSearchName)
where r.Title.Contains(releaseSearchName) ||
r.AlternateNames.Contains(releaseSearchName) ||
r.AlternateNames.Contains(releaseSpecialSearchName)
select r
).FirstOrDefault();
if (release != null) break;
@ -691,7 +691,7 @@ namespace Roadie.Api.Services
if (release == null)
{
await LogAndPublish(
$"Unable To Find Release [{csvRelease.Release}] for Artist [{csvRelease.Artist}], SearchName [{searchName}]",
$"Unable To Find Release [{csvRelease.Release}] for Artist [{csvRelease.Artist}], SearchName [{artistSearchName}]",
LogLevel.Warning);
csvRelease.Status = Statuses.Missing;
DbContext.CollectionMissings.Add(new data.CollectionMissing
@ -699,8 +699,8 @@ namespace Roadie.Api.Services
CollectionId = collection.Id,
IsArtistFound = true,
Position = csvRelease.Position,
Artist = csvRelease.Artist,
Release = searchName
Artist = artistSpecialSearchName,
Release = releaseSpecialSearchName
});
continue;
}