mirror of
https://github.com/sphildreth/roadie
synced 2024-11-10 06:44:12 +00:00
Addtional ToAlpha work for edge cases.
This commit is contained in:
parent
7295095460
commit
6bb4e8671c
4 changed files with 102 additions and 23 deletions
|
@ -80,7 +80,7 @@ namespace Roadie.Library.Tests
|
|||
// using (var context = new RoadieDbContext(optionsBuilder.Options))
|
||||
// {
|
||||
// var now = DateTime.UtcNow;
|
||||
// foreach(var release in context.Releases)
|
||||
// foreach (var release in context.Releases)
|
||||
// {
|
||||
// var releaseModel = release.Adapt<Roadie.Library.Models.Releases.Release>();
|
||||
// var specialReleaseTitle = release.Title.ToAlphanumericName();
|
||||
|
|
|
@ -72,12 +72,27 @@ namespace Roadie.Library.Tests
|
|||
[InlineData("Ringo Starr And His All-Starr Band", "ringostarrandhisallstarrband")]
|
||||
[InlineData("Leslie & Tom", "leslieandtom")]
|
||||
[InlineData(" Leslie & Tom", "leslieandtom")]
|
||||
[InlineData("C o l i n H a y", "colinhay")]
|
||||
[InlineData("ColinHay", "colinhay")]
|
||||
[InlineData("Colin Hay!", "colinhay")]
|
||||
[InlineData("colinhay", "colinhay")]
|
||||
[InlineData("COLINHAY", "colinhay")]
|
||||
[InlineData("C.O!L"I$N⌐HƒAY;", "colinhay")]
|
||||
[InlineData(" Leslie & Tom", "leslieandtom")]
|
||||
[InlineData("<b>Leslie &    Tom</b>", "leslieandtom")]
|
||||
[InlineData("Leslie;/&/;Tom", "leslieandtom")]
|
||||
[InlineData("Leslie And Tom", "leslieandtom")]
|
||||
[InlineData("L≈esl|ie ƒand T╗om╣;", "leslieandtom")]
|
||||
[InlineData("Leslie Tom", "leslietom")]
|
||||
[InlineData("Hüsker Dü", "huskerdu")]
|
||||
[InlineData("Motörhead", "motorhead")] //
|
||||
[InlineData("Motörhead", "motorhead")]
|
||||
[InlineData("Alright, Still", "alrightstill")]
|
||||
[InlineData("Something, SOMETHING & somEthing!", "somethingsomethingandsomething")]
|
||||
[InlineData("Something, SOMETHING & somEthing!", "somethingsomethingandsomething")]
|
||||
[InlineData("comfort y mãºsica para volar", "comfortymasicaparavolar")]
|
||||
[InlineData("canciã³n animal", "canciananimal")]
|
||||
[InlineData("Xylø", "xyloe")]
|
||||
[InlineData("Метель", "metel")]
|
||||
[InlineData("Svartidauði", "svartidaudhi")]
|
||||
public void ToAlphanumericNameShouldStripAndMatch(string input, string shouldBe)
|
||||
{
|
||||
var t = input.ToAlphanumericName();
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
using Roadie.Library.Configuration;
|
||||
using HtmlAgilityPack;
|
||||
using Roadie.Library.Configuration;
|
||||
using Roadie.Library.Utility;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
|
@ -11,6 +13,29 @@ namespace Roadie.Library.Extensions
|
|||
{
|
||||
public static class StringExt
|
||||
{
|
||||
private static readonly Dictionary<char, string> UnicodeAccents = new Dictionary<char, string>() {
|
||||
{'À', "A"}, {'Á', "A"}, {'Â', "A"}, {'Ã', "A"}, {'Ä', "Ae"}, {'Å', "A"}, {'Æ', "Ae"},
|
||||
{'Ç', "C"},
|
||||
{'È', "E"}, {'É', "E"}, {'Ê', "E"}, {'Ë', "E"},
|
||||
{'Ì', "I"}, {'Í', "I"}, {'Î', "I"}, {'Ï', "I"},
|
||||
{'Ð', "Dh"}, {'Þ', "Th"},
|
||||
{'Ñ', "N"},
|
||||
{'Ò', "O"}, {'Ó', "O"}, {'Ô', "O"}, {'Õ', "O"}, {'Ö', "Oe"}, {'Ø', "Oe"},
|
||||
{'Ù', "U"}, {'Ú', "U"}, {'Û', "U"}, {'Ü', "Ue"},
|
||||
{'Ý', "Y"},
|
||||
{'ß', "ss"},
|
||||
{'à', "a"}, {'á', "a"}, {'â', "a"}, {'ã', "a"}, {'ä', "ae"}, {'å', "a"}, {'æ', "ae"},
|
||||
{'ç', "c"},
|
||||
{'è', "e"}, {'é', "e"}, {'ê', "e"}, {'ë', "e"},
|
||||
{'ì', "i"}, {'í', "i"}, {'î', "i"}, {'ï', "i"},
|
||||
{'ð', "dh"}, {'þ', "th"},
|
||||
{'ñ', "n"},
|
||||
{'ò', "o"}, {'ó', "o"}, {'ô', "o"}, {'õ', "o"}, {'ö', "oe"}, {'ø', "oe"},
|
||||
{'ù', "u"}, {'ú', "u"}, {'û', "u"}, {'ü', "ue"},
|
||||
{'ý', "y"}, {'ÿ', "y"}
|
||||
};
|
||||
|
||||
|
||||
public static string AddToDelimitedList(this string input, IEnumerable<string> values, char delimiter = '|')
|
||||
{
|
||||
if (string.IsNullOrEmpty(input) && (values == null || !values.Any()))
|
||||
|
@ -201,6 +226,20 @@ namespace Roadie.Library.Extensions
|
|||
return input;
|
||||
}
|
||||
|
||||
public static string RemoveUnicodeAccents(this string text)
|
||||
{
|
||||
return text.Aggregate(
|
||||
new StringBuilder(),
|
||||
(sb, c) => {
|
||||
string r;
|
||||
if (UnicodeAccents.TryGetValue(c, out r))
|
||||
{
|
||||
return sb.Append(r);
|
||||
}
|
||||
return sb.Append(c);
|
||||
}).ToString();
|
||||
}
|
||||
|
||||
public static String RemoveDiacritics(this string s)
|
||||
{
|
||||
String normalizedString = s.Normalize(NormalizationForm.FormD);
|
||||
|
@ -216,16 +255,41 @@ namespace Roadie.Library.Extensions
|
|||
return stringBuilder.ToString();
|
||||
}
|
||||
|
||||
public static string Translit(this string str)
|
||||
{
|
||||
string[] lat_up = { "A", "B", "V", "G", "D", "E", "Yo", "Zh", "Z", "I", "Y", "K", "L", "M", "N", "O", "P", "R", "S", "T", "U", "F", "Kh", "Ts", "Ch", "Sh", "Shch", "\"", "Y", "'", "E", "Yu", "Ya" };
|
||||
string[] lat_low = { "a", "b", "v", "g", "d", "e", "yo", "zh", "z", "i", "y", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "f", "kh", "ts", "ch", "sh", "shch", "\"", "y", "'", "e", "yu", "ya" };
|
||||
string[] rus_up = { "А", "Б", "В", "Г", "Д", "Е", "Ё", "Ж", "З", "И", "Й", "К", "Л", "М", "Н", "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ы", "Ь", "Э", "Ю", "Я" };
|
||||
string[] rus_low = { "а", "б", "в", "г", "д", "е", "ё", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я" };
|
||||
for (int i = 0; i <= 32; i++)
|
||||
{
|
||||
str = str.Replace(rus_up[i], lat_up[i]);
|
||||
str = str.Replace(rus_low[i], lat_low[i]);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
public static string ToAlphanumericName(this string input)
|
||||
{
|
||||
if (string.IsNullOrEmpty(input))
|
||||
{
|
||||
return input;
|
||||
}
|
||||
input = input.ToLower().Trim().Replace("&", "and");
|
||||
input = WebUtility.HtmlDecode(input);
|
||||
input = input.ScrubHtml().ToLower().Trim().Replace("&", "and");
|
||||
char[] arr = input.ToCharArray();
|
||||
arr = Array.FindAll<char>(arr, (c => (char.IsLetterOrDigit(c))));
|
||||
return new string(arr).RemoveDiacritics();
|
||||
input = new string(arr).RemoveDiacritics().RemoveUnicodeAccents().Translit();
|
||||
input = Regex.Replace(input, @"[^A-Za-z0-9]+", "");
|
||||
return input;
|
||||
}
|
||||
|
||||
|
||||
public static string ScrubHtml(this string value)
|
||||
{
|
||||
var step1 = Regex.Replace(value, @"<[^>]+>| ", "").Trim();
|
||||
var step2 = Regex.Replace(step1, @"\s{2,}", " ");
|
||||
return step2;
|
||||
}
|
||||
|
||||
public static string ToContentDispositionFriendly(this string input)
|
||||
|
|
|
@ -648,27 +648,29 @@ namespace Roadie.Api.Services
|
|||
data.Artist artist = null;
|
||||
data.Release release = null;
|
||||
|
||||
var searchName = csvRelease.Artist.NormalizeName();
|
||||
var specialSearchName = csvRelease.Artist.ToAlphanumericName();
|
||||
var artistSearchName = csvRelease.Artist.NormalizeName();
|
||||
var artistSpecialSearchName = csvRelease.Artist.ToAlphanumericName();
|
||||
var releaseSearchName = csvRelease.Release.NormalizeName().ToLower();
|
||||
var releaseSpecialSearchName = csvRelease.Release.ToAlphanumericName();
|
||||
|
||||
var artistResults = (from a in DbContext.Artists
|
||||
where a.Name.Contains(searchName) ||
|
||||
a.SortName.Contains(searchName) ||
|
||||
a.AlternateNames.Contains(searchName) ||
|
||||
a.AlternateNames.Contains(specialSearchName)
|
||||
where a.Name.Contains(artistSearchName) ||
|
||||
a.SortName.Contains(artistSearchName) ||
|
||||
a.AlternateNames.Contains(artistSearchName) ||
|
||||
a.AlternateNames.Contains(artistSpecialSearchName)
|
||||
select a).ToArray();
|
||||
if (!artistResults.Any())
|
||||
{
|
||||
await LogAndPublish(
|
||||
$"Unable To Find Artist [{csvRelease.Artist}], SearchName [{searchName}]",
|
||||
$"Unable To Find Artist [{csvRelease.Artist}], SearchName [{artistSpecialSearchName}]",
|
||||
LogLevel.Warning);
|
||||
csvRelease.Status = Statuses.Missing;
|
||||
DbContext.CollectionMissings.Add(new data.CollectionMissing
|
||||
{
|
||||
CollectionId = collection.Id,
|
||||
Position = csvRelease.Position,
|
||||
Artist = searchName,
|
||||
Release = csvRelease.Release.NormalizeName()
|
||||
Artist = artistSpecialSearchName,
|
||||
Release = releaseSpecialSearchName
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
@ -676,13 +678,11 @@ namespace Roadie.Api.Services
|
|||
foreach (var artistResult in artistResults)
|
||||
{
|
||||
artist = artistResult;
|
||||
searchName = csvRelease.Release.NormalizeName().ToLower();
|
||||
specialSearchName = csvRelease.Release.ToAlphanumericName();
|
||||
release = (from r in DbContext.Releases
|
||||
where r.ArtistId == artist.Id
|
||||
where r.Title.Contains(searchName) ||
|
||||
r.AlternateNames.Contains(searchName) ||
|
||||
r.AlternateNames.Contains(specialSearchName)
|
||||
where r.Title.Contains(releaseSearchName) ||
|
||||
r.AlternateNames.Contains(releaseSearchName) ||
|
||||
r.AlternateNames.Contains(releaseSpecialSearchName)
|
||||
select r
|
||||
).FirstOrDefault();
|
||||
if (release != null) break;
|
||||
|
@ -691,7 +691,7 @@ namespace Roadie.Api.Services
|
|||
if (release == null)
|
||||
{
|
||||
await LogAndPublish(
|
||||
$"Unable To Find Release [{csvRelease.Release}] for Artist [{csvRelease.Artist}], SearchName [{searchName}]",
|
||||
$"Unable To Find Release [{csvRelease.Release}] for Artist [{csvRelease.Artist}], SearchName [{artistSearchName}]",
|
||||
LogLevel.Warning);
|
||||
csvRelease.Status = Statuses.Missing;
|
||||
DbContext.CollectionMissings.Add(new data.CollectionMissing
|
||||
|
@ -699,8 +699,8 @@ namespace Roadie.Api.Services
|
|||
CollectionId = collection.Id,
|
||||
IsArtistFound = true,
|
||||
Position = csvRelease.Position,
|
||||
Artist = csvRelease.Artist,
|
||||
Release = searchName
|
||||
Artist = artistSpecialSearchName,
|
||||
Release = releaseSpecialSearchName
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue