using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace PKHeX.Core { /// /// Logic for converting a between the various generation specific encoding formats. /// public static class StringConverter { private const char TerminatorNull = (char)0; private const char TerminatorFFFF = (char)0xFFFF; /// /// Converts bytes to a string according to the input parameters. /// /// Encoded data /// Generation string format /// Encoding is Japanese /// Encoding is Big Endian /// Length of data to read. /// Offset to read from /// Decoded string. public static string GetString(byte[] data, int generation, bool jp, bool isBigEndian, int count, int offset = 0) { if (isBigEndian) return generation == 3 ? StringConverter3.GetBEString3(data, offset, count) : StringConverter4.GetBEString4(data, offset, count); return generation switch { 1 or 2 => StringConverter12.GetString1(data, offset, count, jp), 3 => StringConverter3.GetString3(data, offset, count, jp), 4 => StringConverter4.GetString4(data, offset, count), 5 => GetString5(data, offset, count), 6 => GetString6(data, offset, count), 7 => GetString7(data, offset, count), _ => GetString7(data, offset, count), }; } /// /// Gets the bytes for a Generation specific string according to the input parameters. /// /// Decoded string. /// Generation string format /// Encoding is Japanese /// Encoding is Big Endian /// Maximum length of the input /// Language specific conversion (Chinese) /// Pad the input to given length /// Pad the input with this character value /// Encoded data. public static byte[] SetString(string value, int generation, bool jp, bool isBigEndian, int maxLength, int language = 0, int padTo = 0, ushort padWith = 0) { if (isBigEndian) return generation == 3 ? StringConverter3.SetBEString3(value, maxLength, padTo, padWith) : StringConverter4.SetBEString4(value, maxLength, padTo, padWith); return generation switch { 1 or 2 => StringConverter12.SetString1(value, maxLength, jp, padTo, padWith), 3 => StringConverter3.SetString3(value, maxLength, jp, padTo, padWith), 4 => StringConverter4.SetString4(value, maxLength, padTo, padWith), 5 => SetString5(value, maxLength, padTo, padWith), 6 => SetString6(value, maxLength, padTo, padWith), 7 => SetString7(value, maxLength, language, padTo, padWith), _ => SetString7b(value, maxLength, language, padTo, padWith), }; } /// Converts Generation 5 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString5(byte[] data, int offset, int count) { var raw = Encoding.Unicode.GetString(data, offset, count); var sb = new StringBuilder(raw); Util.TrimFromFirst(sb, TerminatorFFFF); SanitizeString(sb); return sb.ToString(); } /// Gets the bytes for a Generation 5 string. /// Decoded string. /// Maximum length of the input /// Pad the input to given length /// Pad the input with this character value /// Encoded data. public static byte[] SetString5(string value, int maxLength, int padTo = 0, ushort padWith = 0) { var sb = new StringBuilder(value, Math.Max(maxLength, padTo)); var delta = sb.Length - maxLength; if (delta > 0) sb.Remove(maxLength, delta); // Replace Special Characters and add Terminator UnSanitizeString(sb, 5); sb.Append(TerminatorFFFF); var d2 = padTo - sb.Length; if (d2 > 0) sb.Append((char)padWith, d2); return Encoding.Unicode.GetBytes(sb.ToString()); } /// Converts Generation 6 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString6(byte[] data, int offset, int count) { var raw = Encoding.Unicode.GetString(data, offset, count); var sb = new StringBuilder(raw); Util.TrimFromFirst(sb, TerminatorNull); SanitizeString(sb); return sb.ToString(); } /// Gets the bytes for a Generation 6 string. /// Decoded string. /// Maximum length of the input /// Pad the input to given length /// Pad the input with this character value /// Encoded data. public static byte[] SetString6(string value, int maxLength, int padTo = 0, ushort padWith = 0) { var sb = new StringBuilder(value); var delta = sb.Length - maxLength; if (delta > 0) sb.Remove(maxLength, delta); // Replace Special Characters and add Terminator UnSanitizeString(sb, 6); sb.Append(TerminatorNull); var d2 = padTo - sb.Length; if (d2 > 0) sb.Append((char)padWith, d2); return Encoding.Unicode.GetBytes(sb.ToString()); } /// Converts Generation 7 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString7(byte[] data, int offset, int count) { var raw = Encoding.Unicode.GetString(data, offset, count); var sb = new StringBuilder(raw); Util.TrimFromFirst(sb, TerminatorNull); SanitizeString(sb); RemapChineseGlyphsBin2String(sb); return sb.ToString(); } /// Gets the bytes for a Generation 7 string. /// Decoded string. /// Maximum length of the input /// Language specific conversion (Chinese) /// Pad the input to given length /// Pad the input with this character value /// Chinese string remapping should be attempted /// Encoded data. public static byte[] SetString7(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false) { var sb = new StringBuilder(value); var delta = sb.Length - maxLength; if (delta > 0) sb.Remove(maxLength, delta); if (chinese) ConvertString2BinG7_zh(sb, language); // Replace Special Characters and add Terminator UnSanitizeString(sb, 7); sb.Append(TerminatorNull); var d2 = padTo - sb.Length; if (d2 > 0) sb.Append((char)padWith, d2); return Encoding.Unicode.GetBytes(sb.ToString()); } /// Gets the bytes for a Generation 7 string. /// Decoded string. /// Maximum length of the input /// Language specific conversion (Chinese) /// Pad the input to given length /// Pad the input with this character value /// Chinese string remapping should be attempted /// Encoded data. public static byte[] SetString7b(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false) { var sb = new StringBuilder(value); var delta = sb.Length - maxLength; if (delta > 0) sb.Remove(maxLength, delta); if (chinese) ConvertString2BinG7_zh(sb, language); // Replace Special Characters and add Terminator UnSanitizeString7b(sb); sb.Append(TerminatorNull); var d2 = padTo - sb.Length; if (d2 > 0) sb.Append((char)padWith, d2); return Encoding.Unicode.GetBytes(sb.ToString()); } /// /// Converts a Unicode string to Generation 7 in-game Chinese string. /// /// Unicode string. /// Detection of language for Traditional Chinese check /// In-game Chinese string. private static void ConvertString2BinG7_zh(StringBuilder sb, int lang) { // A string cannot contain a mix of CHS and CHT characters. var input = sb.ToString(); bool traditional = input.Any(chr => G7_CHT.ContainsKey(chr) && !G7_CHS.ContainsKey(chr)) || (lang == 10 && !input.Any(chr => G7_CHT.ContainsKey(chr) ^ G7_CHS.ContainsKey(chr))); // CHS and CHT have the same display name var table = traditional ? G7_CHT : G7_CHS; for (int i = 0; i < sb.Length; i++) { var chr = sb[i]; if (table.TryGetValue(chr, out var index)) sb[i] = (char) (index + Gen7_ZH_Ofs); } } /// /// Converts a Generation 7 in-game Chinese string to Unicode string. /// /// In-game Chinese string. /// Unicode string. private static void RemapChineseGlyphsBin2String(StringBuilder input) { for (int i = 0; i < input.Length; i++) { char val = input[i]; if (val < Gen7_ZH_Ofs || val >= Gen7_ZH_Ofs + Gen7_ZH.Length) continue; input[i] = Gen7_ZH[val - Gen7_ZH_Ofs]; } } #region Gen 7 Chinese Character Tables private static readonly char[] Gen7_ZH = Util.GetStringList("Char", "zh")[0].ToCharArray(); private const ushort Gen7_ZH_Ofs = 0xE800; private const ushort SM_ZHCharTable_Size = 0x30F; private const ushort USUM_CHS_Size = 0x4; private static bool GetisG7CHSChar(int idx) => idx is < SM_ZHCharTable_Size or >= SM_ZHCharTable_Size * 2 and < (SM_ZHCharTable_Size * 2) + USUM_CHS_Size; private static readonly Dictionary G7_CHS = Gen7_ZH .Select((value, index) => new { value, index }) .Where(pair => GetisG7CHSChar(pair.index)) .ToDictionary(pair => pair.value, pair => pair.index); private static readonly Dictionary G7_CHT = Gen7_ZH .Select((value, index) => new { value, index }) .Where(pair => !GetisG7CHSChar(pair.index)) .ToDictionary(pair => pair.value, pair => pair.index); #endregion /// /// Converts full width to single width /// /// Input string to sanitize. /// internal static void SanitizeString(StringBuilder s) { if (s.Length == 0) return; // remap custom glyphs to unicode s.Replace('\uE08F', '♀'); // ♀ (gen6+) s.Replace('\uE08E', '♂'); // ♂ (gen6+) s.Replace('\u246E', '♀'); // ♀ (gen5) s.Replace('\u246D', '♂'); // ♂ (gen5) } /// /// Converts full width to half width when appropriate /// /// Input string to set. /// private static void UnSanitizeString7b(StringBuilder str) { // gender chars always full width } /// /// Converts full width to half width when appropriate /// /// Input string to set. /// Generation specific context /// internal static void UnSanitizeString(StringBuilder str, int generation) { if (generation <= 5) { str.Replace('\u2640', '\u246E'); // ♀ str.Replace('\u2642', '\u246D'); // ♂ return; } var context = str.ToString().Except(FullToHalf); bool fullwidth = context.Select(c => c >> 12) // select the group the char belongs to .Any(c => c is not (0 or 0xE) /* Latin, Special Symbols */); if (fullwidth) // jp/ko/zh strings return; // keep as full width // Convert back to half width glyphs str.Replace('\u2640', '\uE08F'); // ♀ str.Replace('\u2642', '\uE08E'); // ♂ } private static readonly char[] FullToHalf = {'\u2640', '\u2642'}; // ♀♂ public static bool HasEastAsianScriptCharacters(IEnumerable str) => str.Any(c => c is >= '\u4E00' and <= '\u9FFF'); } }