using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace PKHeX.Core
{
///
/// Logic for converting a between the various generation specific encoding formats.
///
public static class StringConverter
{
///
/// Converts bytes to a string according to the input parameters.
///
/// Encoded data
/// Generation string format
/// Encoding is Japanese
/// Encoding is Big Endian
/// Length of data to read.
/// Offset to read from
/// Decoded string.
public static string GetString(byte[] data, int generation, bool jp, bool isBigEndian, int count, int offset = 0)
{
if (isBigEndian)
return generation == 3 ? StringConverter3.GetBEString3(data, offset, count) : StringConverter4.GetBEString4(data, offset, count);
return generation switch
{
1 or 2 => StringConverter12.GetString1(data, offset, count, jp),
3 => StringConverter3.GetString3(data, offset, count, jp),
4 => StringConverter4.GetString4(data, offset, count),
5 => GetString5(data, offset, count),
6 => GetString6(data, offset, count),
7 => GetString7(data, offset, count),
_ => GetString7(data, offset, count),
};
}
///
/// Gets the bytes for a Generation specific string according to the input parameters.
///
/// Decoded string.
/// Generation string format
/// Encoding is Japanese
/// Encoding is Big Endian
/// Maximum length of the input
/// Language specific conversion (Chinese)
/// Pad the input to given length
/// Pad the input with this character value
/// Encoded data.
public static byte[] SetString(string value, int generation, bool jp, bool isBigEndian, int maxLength, int language = 0, int padTo = 0, ushort padWith = 0)
{
if (isBigEndian)
return generation == 3 ? StringConverter3.SetBEString3(value, maxLength, padTo, padWith) : StringConverter4.SetBEString4(value, maxLength, padTo, padWith);
return generation switch
{
1 or 2 => StringConverter12.SetString1(value, maxLength, jp, padTo, padWith),
3 => StringConverter3.SetString3(value, maxLength, jp, padTo, padWith),
4 => StringConverter4.SetString4(value, maxLength, padTo, padWith),
5 => SetString5(value, maxLength, padTo, padWith),
6 => SetString6(value, maxLength, padTo, padWith),
7 => SetString7(value, maxLength, language, padTo, padWith),
_ => SetString7b(value, maxLength, language, padTo, padWith),
};
}
/// Converts Generation 5 encoded data to decoded string.
/// Encoded data
/// Offset to read from
/// Length of data to read.
/// Decoded string.
public static string GetString5(byte[] data, int offset, int count)
{
var raw = Encoding.Unicode.GetString(data, offset, count);
var sb = new StringBuilder(raw);
Util.TrimFromFFFF(sb);
SanitizeString(sb);
return sb.ToString();
}
/// Gets the bytes for a Generation 5 string.
/// Decoded string.
/// Maximum length of the input
/// Pad the input to given length
/// Pad the input with this character value
/// Encoded data.
public static byte[] SetString5(string value, int maxLength, int padTo = 0, ushort padWith = 0)
{
var sb = new StringBuilder(value, Math.Max(maxLength, padTo));
var delta = sb.Length - maxLength;
if (delta > 0)
sb.Remove(maxLength, delta);
// Replace Special Characters and add Terminator
UnSanitizeString(sb, 5);
sb.Append((char)0xFFFF);
var d2 = padTo - sb.Length;
if (d2 > 0)
sb.Append((char)padWith, d2);
return Encoding.Unicode.GetBytes(sb.ToString());
}
/// Converts Generation 6 encoded data to decoded string.
/// Encoded data
/// Offset to read from
/// Length of data to read.
/// Decoded string.
public static string GetString6(byte[] data, int offset, int count)
{
var raw = Encoding.Unicode.GetString(data, offset, count);
var sb = new StringBuilder(raw);
Util.TrimFromZero(sb);
SanitizeString(sb);
return sb.ToString();
}
/// Gets the bytes for a Generation 6 string.
/// Decoded string.
/// Maximum length of the input
/// Pad the input to given length
/// Pad the input with this character value
/// Encoded data.
public static byte[] SetString6(string value, int maxLength, int padTo = 0, ushort padWith = 0)
{
var sb = new StringBuilder(value);
var delta = sb.Length - maxLength;
if (delta > 0)
sb.Remove(maxLength, delta);
// Replace Special Characters and add Terminator
UnSanitizeString(sb, 6);
sb.Append((char)0);
var d2 = padTo - sb.Length;
if (d2 > 0)
sb.Append((char)padWith, d2);
return Encoding.Unicode.GetBytes(sb.ToString());
}
/// Converts Generation 7 encoded data to decoded string.
/// Encoded data
/// Offset to read from
/// Length of data to read.
/// Decoded string.
public static string GetString7(byte[] data, int offset, int count)
{
var raw = Encoding.Unicode.GetString(data, offset, count);
var sb = new StringBuilder(raw);
Util.TrimFromZero(sb);
SanitizeString(sb);
RemapChineseGlyphsBin2String(sb);
return sb.ToString();
}
/// Gets the bytes for a Generation 7 string.
/// Decoded string.
/// Maximum length of the input
/// Language specific conversion (Chinese)
/// Pad the input to given length
/// Pad the input with this character value
/// Chinese string remapping should be attempted
/// Encoded data.
public static byte[] SetString7(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false)
{
var sb = new StringBuilder(value);
var delta = sb.Length - maxLength;
if (delta > 0)
sb.Remove(maxLength, delta);
if (chinese)
ConvertString2BinG7_zh(sb, language);
// Replace Special Characters and add Terminator
UnSanitizeString(sb, 7);
sb.Append((char)0);
var d2 = padTo - sb.Length;
if (d2 > 0)
sb.Append((char)padWith, d2);
return Encoding.Unicode.GetBytes(sb.ToString());
}
/// Gets the bytes for a Generation 7 string.
/// Decoded string.
/// Maximum length of the input
/// Language specific conversion (Chinese)
/// Pad the input to given length
/// Pad the input with this character value
/// Chinese string remapping should be attempted
/// Encoded data.
public static byte[] SetString7b(string value, int maxLength, int language, int padTo = 0, ushort padWith = 0, bool chinese = false)
{
var sb = new StringBuilder(value);
var delta = sb.Length - maxLength;
if (delta > 0)
sb.Remove(maxLength, delta);
if (chinese)
ConvertString2BinG7_zh(sb, language);
// Replace Special Characters and add Terminator
UnSanitizeString7b(sb);
sb.Append((char)0);
var d2 = padTo - sb.Length;
if (d2 > 0)
sb.Append((char)padWith, d2);
return Encoding.Unicode.GetBytes(sb.ToString());
}
///
/// Converts a Unicode string to Generation 7 in-game Chinese string.
///
/// Unicode string.
/// Detection of language for Traditional Chinese check
/// In-game Chinese string.
private static void ConvertString2BinG7_zh(StringBuilder sb, int lang)
{
// A string cannot contain a mix of CHS and CHT characters.
var input = sb.ToString();
bool traditional = input.Any(chr => G7_CHT.ContainsKey(chr) && !G7_CHS.ContainsKey(chr))
|| (lang == 10 && !input.Any(chr => G7_CHT.ContainsKey(chr) ^ G7_CHS.ContainsKey(chr))); // CHS and CHT have the same display name
var table = traditional ? G7_CHT : G7_CHS;
for (int i = 0; i < sb.Length; i++)
{
var chr = sb[i];
if (table.TryGetValue(chr, out var index))
sb[i] = (char) (index + Gen7_ZH_Ofs);
}
}
///
/// Converts a Generation 7 in-game Chinese string to Unicode string.
///
/// In-game Chinese string.
/// Unicode string.
private static void RemapChineseGlyphsBin2String(StringBuilder input)
{
for (int i = 0; i < input.Length; i++)
{
char val = input[i];
if (val < Gen7_ZH_Ofs || val >= Gen7_ZH_Ofs + Gen7_ZH.Length)
continue;
input[i] = Gen7_ZH[val - Gen7_ZH_Ofs];
}
}
#region Gen 7 Chinese Character Tables
private static readonly char[] Gen7_ZH = Util.GetStringList("Char", "zh")[0].ToCharArray();
private const ushort Gen7_ZH_Ofs = 0xE800;
private const ushort SM_ZHCharTable_Size = 0x30F;
private const ushort USUM_CHS_Size = 0x4;
private static bool GetisG7CHSChar(int idx) => idx is < SM_ZHCharTable_Size or >= SM_ZHCharTable_Size * 2 and < (SM_ZHCharTable_Size * 2) + USUM_CHS_Size;
private static readonly Dictionary G7_CHS = Gen7_ZH
.Select((value, index) => new { value, index })
.Where(pair => GetisG7CHSChar(pair.index))
.ToDictionary(pair => pair.value, pair => pair.index);
private static readonly Dictionary G7_CHT = Gen7_ZH
.Select((value, index) => new { value, index })
.Where(pair => !GetisG7CHSChar(pair.index))
.ToDictionary(pair => pair.value, pair => pair.index);
#endregion
///
/// Converts full width to single width
///
/// Input string to sanitize.
///
internal static void SanitizeString(StringBuilder s)
{
if (s.Length == 0)
return;
s.Replace('’', '\''); // Farfetch'd
// remap custom glyphs to unicode
s.Replace('\uE08F', '♀'); // ♀ (gen6+)
s.Replace('\uE08E', '♂'); // ♂ (gen6+)
s.Replace('\u246E', '♀'); // ♀ (gen5)
s.Replace('\u246D', '♂'); // ♂ (gen5)
}
///
/// Converts full width to half width when appropriate
///
/// Input string to set.
///
private static void UnSanitizeString7b(StringBuilder str)
{
// gender chars always full width
str.Replace('\'', '’'); // Farfetch'd
}
///
/// Converts full width to half width when appropriate
///
/// Input string to set.
/// Generation specific context
///
internal static void UnSanitizeString(StringBuilder str, int generation)
{
if (generation >= 6)
str.Replace('\'', '’'); // Farfetch'd
if (generation <= 5)
{
str.Replace('\u2640', '\u246E'); // ♀
str.Replace('\u2642', '\u246D'); // ♂
return;
}
var context = str.ToString().Except(FullToHalf);
bool fullwidth = context.Select(c => c >> 12) // select the group the char belongs to
.Any(c => c is not (0 or 0xE) /* Latin, Special Symbols */);
if (fullwidth) // jp/ko/zh strings
return; // keep as full width
// Convert back to half width glyphs
str.Replace('\u2640', '\uE08F'); // ♀
str.Replace('\u2642', '\uE08E'); // ♂
}
private static readonly char[] FullToHalf = {'\u2640', '\u2642'}; // ♀♂
public static bool HasEastAsianScriptCharacters(IEnumerable str) => str.Any(c => c is >= '\u4E00' and <= '\u9FFF');
}
}