Cache all created regexes

Regex objects aren't cheap, and the RegexCache has DefaultMaxCacheSize = 15.

We're checking 4,000 regexes for each unique string, so just keep the created regexes around instead of cycling new through the cache.

+4MB passive consumption, but each IsFiltered call no longer generates >4MB of discarded objects.

My unit tests run >25% faster now... nice?!
This commit is contained in:
Kurt 2021-06-03 17:24:45 -07:00
parent 8ec25da194
commit 927ccee613

View file

@ -9,9 +9,21 @@ namespace PKHeX.Core
public static class WordFilter
{
/// <summary>
/// Source pattern regexes to check with
/// Regex patterns to check against
/// </summary>
private static readonly string[] Patterns = Util.GetStringList("badwords");
/// <remarks>No need to keep the original pattern strings around; the <see cref="Regex"/> object retrieves this via <see cref="Regex.ToString()"/></remarks>
private static readonly Regex[] Regexes = LoadPatterns(Util.GetStringList("badwords"));
// if you're running this as a server and don't mind a few extra seconds of startup, add RegexOptions.Compiled for slightly better checking.
private const RegexOptions Options = RegexOptions.CultureInvariant;
private static Regex[] LoadPatterns(IReadOnlyList<string> patterns)
{
var result = new Regex[patterns.Count];
for (int i = 0; i < patterns.Count; i++)
result[i] = new Regex(patterns[i], Options);
return result;
}
/// <summary>
/// Due to some messages repeating (Trainer names), keep a list of repeated values for faster lookup.
@ -43,13 +55,13 @@ namespace PKHeX.Core
}
// not in dictionary, check patterns
foreach (var pattern in Patterns)
foreach (var regex in Regexes)
{
if (!Regex.IsMatch(msg, pattern))
if (!regex.IsMatch(msg))
continue;
// match found, cache result
regMatch = pattern;
regMatch = regex.ToString(); // fetches from regex field
lock (dictLock)
Lookup[msg] = regMatch;
return true;