using System.Collections.Generic; using System.Linq; using System.Text; namespace PKHeX.Core { /// /// Logic for converting a between the various generation specific encoding formats. /// public static class StringConverter { /// /// Converts bytes to a string according to the input parameters. /// /// Encoded data /// Generation string format /// Encoding is Japanese /// Encoding is Big Endian /// Length of data to read. /// Offset to read from /// Decoded string. public static string GetString(byte[] data, int generation, bool jp, bool isBigEndian, int count, int offset = 0) { if (isBigEndian) return generation == 3 ? StringConverter3.GetBEString3(data, offset, count) : StringConverter4.GetBEString4(data, offset, count); switch (generation) { case 1: case 2: return StringConverter12.GetString1(data, offset, count, jp); case 3: return StringConverter3.GetString3(data, offset, count, jp); case 4: return StringConverter4.GetString4(data, offset, count); case 5: return GetString5(data, offset, count); case 6: return GetString6(data, offset, count); default: return GetString7(data, offset, count); } } /// /// Gets the bytes for a Generation specific string according to the input parameters. /// /// Decoded string. /// Generation string format /// Encoding is Japanese /// Encoding is Big Endian /// /// /// Pad to given length /// Pad with value /// Encoded data. public static byte[] SetString(string value, int generation, bool jp, bool isBigEndian, int maxLength, int language = 0, int padTo = 0, ushort padWith = 0) { if (isBigEndian) return generation == 3 ? StringConverter3.SetBEString3(value, maxLength, padTo, padWith) : StringConverter4.SetBEString4(value, maxLength, padTo, padWith); switch (generation) { case 1: case 2: return StringConverter12.SetString1(value, maxLength, jp, padTo, padWith); case 3: return StringConverter3.SetString3(value, maxLength, jp, padTo, padWith); case 4: return StringConverter4.SetString4(value, maxLength, padTo, padWith); case 5: return SetString5(value, maxLength, padTo, padWith); case 6: return SetString6(value, maxLength, padTo, padWith); default: return SetString7(value, maxLength, language, padTo, padWith); } } /// Converts Generation 5 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString5(byte[] data, int offset, int count) { return SanitizeString(Util.TrimFromFFFF(Encoding.Unicode.GetString(data, offset, count))); } /// Gets the bytes for a Generation 5 string. /// Decoded string. /// Maximum length /// Pad to given length /// Pad with value /// Encoded data. public static byte[] SetString5(string value, int maxLength, int padTo = 0, ushort padWith = 0) { if (value.Length > maxLength) value = value.Substring(0, maxLength); // Hard cap string temp = UnSanitizeString(value, 5) .PadRight(value.Length + 1, (char)0xFFFF) // Null Terminator .PadRight(padTo, (char)padWith); // Padding return Encoding.Unicode.GetBytes(temp); } /// Converts Generation 6 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString6(byte[] data, int offset, int count) { return SanitizeString(Util.TrimFromZero(Encoding.Unicode.GetString(data, offset, count))); } /// Gets the bytes for a Generation 6 string. /// Decoded string. /// Maximum length /// Pad to given length /// Pad with value /// Encoded data. public static byte[] SetString6(string value, int maxLength, int padTo = 0, ushort padWith = 0) { if (value.Length > maxLength) value = value.Substring(0, maxLength); // Hard cap string temp = UnSanitizeString(value, 6) .PadRight(value.Length + 1, '\0') // Null Terminator .PadRight(padTo, (char)padWith); return Encoding.Unicode.GetBytes(temp); } /// Converts Generation 7 encoded data to decoded string. /// Encoded data /// Offset to read from /// Length of data to read. /// Decoded string. public static string GetString7(byte[] data, int offset, int count) { return ConvertBin2StringG7_zh(SanitizeString(Util.TrimFromZero(Encoding.Unicode.GetString(data, offset, count)))); } /// Gets the bytes for a Generation 7 string. /// Decoded string. /// Maximum length /// Language specific conversion (Chinese) /// Pad to given length /// Pad with value /// Chinese string remapping should be attempted /// Encoded data. public static byte[] SetString7(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false) { if (chinese) value = ConvertString2BinG7_zh(value, language); if (value.Length > maxLength) value = value.Substring(0, 12); // Hard cap string temp = UnSanitizeString(value, 7) .PadRight(value.Length + 1, '\0') // Null Terminator .PadRight(padTo, (char)padWith); return Encoding.Unicode.GetBytes(temp); } /// Gets the bytes for a Generation 7 string. /// Decoded string. /// Maximum length /// Language specific conversion (Chinese) /// Pad to given length /// Pad with value /// Chinese string remapping should be attempted /// Encoded data. public static byte[] SetString7b(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false) { if (chinese) value = ConvertString2BinG7_zh(value, language); if (value.Length > maxLength) value = value.Substring(0, 12); // Hard cap string temp = UnSanitizeString7b(value) .PadRight(value.Length + 1, '\0') // Null Terminator .PadRight(padTo, (char)padWith); return Encoding.Unicode.GetBytes(temp); } /// /// Converts a Unicode string to Generation 7 in-game Chinese string. /// /// Unicode string. /// Detection of language for Traditional Chinese check /// In-game Chinese string. private static string ConvertString2BinG7_zh(string input, int lang) { var str = new StringBuilder(); // A string cannot contain a mix of CHS and CHT characters. bool traditional = input.Any(chr => G7_CHT.ContainsKey(chr) && !G7_CHS.ContainsKey(chr)) || (lang == 10 && !input.Any(chr => G7_CHT.ContainsKey(chr) ^ G7_CHS.ContainsKey(chr))); // CHS and CHT have the same display name var table = traditional ? G7_CHT : G7_CHS; foreach (char chr in input) str.Append(table.TryGetValue(chr, out int index) ? (char)(index + Gen7_ZH_Ofs) : chr); return str.ToString(); } /// /// Converts a Generation 7 in-game Chinese string to Unicode string. /// /// In-game Chinese string. /// Unicode string. private static string ConvertBin2StringG7_zh(string input) { var str = new StringBuilder(); foreach (var val in input) str.Append((char)GetGen7ChineseChar(val)); return str.ToString(); } /// /// Shifts a character from the Chinese character tables /// /// Input value to shift /// Shifted character private static ushort GetGen7ChineseChar(ushort val) { if (Gen7_ZH_Ofs <= val && val < Gen7_ZH_Ofs + Gen7_ZH.Length) return Gen7_ZH[val - Gen7_ZH_Ofs]; return val; // regular character } #region Gen 7 Chinese Character Tables private static readonly char[] Gen7_ZH = Util.GetStringList("Char", "zh")[0].ToCharArray(); private const ushort Gen7_ZH_Ofs = 0xE800; private const ushort SM_ZHCharTable_Size = 0x30F; private const ushort USUM_CHS_Size = 0x4; private static bool GetisG7CHSChar(int idx) => idx < SM_ZHCharTable_Size || (SM_ZHCharTable_Size * 2 <= idx && idx < (SM_ZHCharTable_Size * 2) + USUM_CHS_Size); private static readonly Dictionary G7_CHS = Gen7_ZH .Select((value, index) => new { value, index }) .Where(pair => GetisG7CHSChar(pair.index)) .ToDictionary(pair => pair.value, pair => pair.index); private static readonly Dictionary G7_CHT = Gen7_ZH .Select((value, index) => new { value, index }) .Where(pair => !GetisG7CHSChar(pair.index)) .ToDictionary(pair => pair.value, pair => pair.index); #endregion /// /// Converts full width to single width /// /// Input string to sanitize. /// internal static string SanitizeString(string str) { if (str.Length == 0) return str; var s = str.Replace('’', '\''); // Farfetch'd // remap custom glyphs to unicode s = s.Replace('\uE08F', '♀'); // ♀ (gen6+) s = s.Replace('\uE08E', '♂'); // ♂ (gen6+) s = s.Replace('\u246E', '♀'); // ♀ (gen5) return s.Replace('\u246D', '♂'); // ♂ (gen5) } /// /// Converts full width to half width when appropriate /// /// Input string to set. /// private static string UnSanitizeString7b(string str) { // gender chars always full width return str.Replace('\'', '’'); // Farfetch'd } /// /// Converts full width to half width when appropriate /// /// Input string to set. /// Generation specific context /// internal static string UnSanitizeString(string str, int generation) { var s = str; if (generation >= 6) s = str.Replace('\'', '’'); // Farfetch'd if (generation <= 5) { s = s.Replace('\u2640', '\u246E'); // ♀ return s.Replace('\u2642', '\u246D'); // ♂ } var context = str.Except(FullToHalf); bool fullwidth = context.Select(c => c >> 12) // select the group the char belongs to .Any(c => c != 0 /* Latin */ && c != 0xE /* Special Symbols */); if (fullwidth) // jp/ko/zh strings return s; // keep as full width // Convert back to half width glyphs s = s.Replace('\u2640', '\uE08F'); // ♀ return s.Replace('\u2642', '\uE08E'); // ♂ } private static readonly char[] FullToHalf = {'\u2640', '\u2642'}; // ♀♂ public static bool HasEastAsianScriptCharacters(IEnumerable str) => str.Any(c => 0x4E00 <= c && c <= 0x9FFF); } }