PKHeX/PKHeX.Core/PKM/Strings/StringConverter3.cs
abcboy101 8a3a338c0b
Fix encodings for Gen 3/4/5 transfers (#4220)
Adjust Gen 3/4 encodings to be consistent with Gen 5 Unicode encodings
Gen 3 quotation marks are displayed differently based on the game language
Implement how Pal Park handles invalid characters and corrupts certain accented characters
Implement how Poké Transfer handles invalid characters
Use U+25BA BLACK RIGHT-POINTING POINTER, since this character is used as the pointer in menus/etc., rather than as a bullet or generic shape
2024-03-17 22:34:13 -06:00

228 lines
12 KiB
C#
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
namespace PKHeX.Core;
/// <summary>
/// Logic for converting a <see cref="string"/> for Generation 3.
/// </summary>
public static class StringConverter3
{
public const byte TerminatorByte = 0xFF;
private const char Terminator = (char)TerminatorByte;
private const char Apostrophe = '\''; //
private const byte ApostropheByte = 0xB4;
private const byte QuoteLeftByte = 0xB1;
private const byte QuoteRightByte = 0xB2;
/// <summary>
/// Converts a Generation 3 encoded value array to string.
/// </summary>
/// <param name="data">Byte array containing string data.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Decoded string.</returns>
public static string GetString(ReadOnlySpan<byte> data, int language)
{
Span<char> result = stackalloc char[data.Length];
int i = LoadString(data, result, language);
return new string(result[..i]);
}
/// <inheritdoc cref="GetString(ReadOnlySpan{byte},int)"/>
/// <param name="data">Byte array containing string data.</param>
/// <param name="jp">Value source is Japanese font.</param>
public static string GetString(ReadOnlySpan<byte> data, bool jp) => GetString(data, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
/// <summary>
/// Converts a Generation 3 encoded value array to string.
/// </summary>
/// <param name="data">Encoded data</param>
/// <param name="result">Decoded character result buffer</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Character count loaded.</returns>
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
int i = 0;
for (; i < data.Length; i++)
{
var value = data[i];
var c = value switch {
QuoteLeftByte => GetQuoteLeft(language),
QuoteRightByte => GetQuoteRight(language),
_ => table[value],
}; // Convert to Unicode
if (c == Terminator) // Stop if Terminator/Invalid
break;
c = StringConverter.SanitizeChar(c);
result[i] = c;
}
return i;
}
/// <inheritdoc cref="LoadString(ReadOnlySpan{byte},Span{char},int)"/>
/// <param name="data">Encoded data</param>
/// <param name="result">Decoded character result buffer</param>
/// <param name="jp">Value source is Japanese font.</param>
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, bool jp) => LoadString(data, result, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
/// <summary>
/// Converts a string to a Generation 3 encoded value array.
/// </summary>
/// <param name="buffer"></param>
/// <param name="value">Decoded string.</param>
/// <param name="maxLength">Maximum length of the input <see cref="value"/></param>
/// <param name="language">Language specific conversion</param>
/// <param name="option">Buffer pre-formatting option</param>
/// <returns>Encoded data.</returns>
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, int language, StringConverterOption option = StringConverterOption.ClearFF)
{
if (value.Length > maxLength)
value = value[..maxLength]; // Hard cap
if (option is StringConverterOption.ClearFF)
buffer.Fill(TerminatorByte);
else if (option is StringConverterOption.ClearZero)
buffer.Clear();
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
int i = 0;
for (; i < value.Length; i++)
{
var c = StringConverter.UnSanitizeChar5(value[i]);
if (!TryGetIndex(table, c, language, out var b))
break;
buffer[i] = b;
}
int count = i;
if (count < buffer.Length)
buffer[count++] = TerminatorByte;
return count;
}
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, bool jp, StringConverterOption option = StringConverterOption.ClearFF) =>
SetString(buffer, value, maxLength, jp ? (int)LanguageID.Japanese : (int)LanguageID.English, option);
private static bool TryGetIndex(in ReadOnlySpan<char> dict, char c, int language, out byte result)
{
var index = dict.IndexOf(c);
if (index == -1 || c == '“')
return TryGetUserFriendlyRemap(c, language, out result);
result = (byte)index;
return index != TerminatorByte;
}
/// <summary>
/// Decodes a character from a Generation 3 encoded value.
/// </summary>
/// <param name="chr">Generation 4 decoded character.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Generation 3 encoded value.</returns>
public static char GetG3Char(byte chr, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
return chr switch
{
QuoteLeftByte => GetQuoteLeft(language),
QuoteRightByte => GetQuoteRight(language),
_ => table[chr],
};
}
/// <summary>
/// Encodes a character to a Generation 3 encoded value.
/// </summary>
/// <param name="chr">Generation 4 decoded character.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Generation 3 encoded value.</returns>
public static byte SetG3Char(char chr, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
TryGetIndex(table, chr, language, out var b);
return b;
}
// Quotation marks are displayed differently based on the Gen3 game language.
// Pal Park converts these to the appropriate ones based on the PKM language.
private static char GetQuoteLeft(int language) => language switch
{
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '“',
(int)LanguageID.French => '«',
(int)LanguageID.German => '„',
_ => '『', // Invalid languages use JP quote
};
private static char GetQuoteRight(int language) => language switch
{
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '”',
(int)LanguageID.French => '»',
(int)LanguageID.German => '“',
_ => '』', // Invalid languages use JP quote
};
/// <summary>
/// Tries to remap the user input to a valid character.
/// </summary>
private static bool TryGetUserFriendlyRemap(char c, int language, out byte result)
{
result = c switch
{
Apostrophe => ApostropheByte,
'“' => language != (int)LanguageID.German ? QuoteLeftByte : QuoteRightByte,
'”' => QuoteRightByte,
'«' => QuoteLeftByte,
'»' => QuoteRightByte,
'„' => QuoteLeftByte,
'『' => QuoteLeftByte,
'』' => QuoteRightByte,
_ => TerminatorByte,
};
return result != TerminatorByte;
}
private static ReadOnlySpan<char> G3_EN =>
[
' ', 'À', 'Á', 'Â', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'こ', 'Î', 'Ï', 'Ò', 'Ó', 'Ô', // 0
'Œ', 'Ù', 'Ú', 'Û', 'Ñ', 'ß', 'à', 'á', 'ね', 'Ç', 'È', 'é', 'ê', 'ë', 'ì', 'í', // 1
'î', 'ï', 'ò', 'ó', 'ô', 'œ', 'ù', 'ú', 'û', 'ñ', 'º', 'ª', '⒅', '&', '+', 'あ', // 2
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', '=', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
'っ', '¿', '¡', '⒆', '⒇', 'オ', 'カ', 'キ', 'ク', 'ケ', 'Í', 'コ', 'サ', 'ス', 'セ', 'ソ', // 5
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'â', '', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'í', // 6
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
'ッ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', '-', '・',// A
'⑬', '“', '”', '', '', '⑭', '⑮', '$', ',', '⑧', '/', 'A', 'B', 'C', 'D', 'E', // B
'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', // C
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', // D
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '►', // E
':', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
// Make the total length 256 so that any byte access is always within the array
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
];
private static ReadOnlySpan<char> G3_JP =>
[
' ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ', // 0
'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ひ', 'ふ', 'へ', 'ほ', 'ま', // 1
'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', 'り', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', 'ぁ', // 2
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', 'ゅ', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
'っ', 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', 'セ', 'ソ', // 5
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', '', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'マ', // 6
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
'ッ', '', '', '', '', '', '', '', '', '', '', '', '', '。', 'ー', '・', // A
'…', '『', '』', '「', '」', '♂', '♀', '円', '', '×', '', '', '', '', '', '', // B
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', // C
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', // D
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '►', // E
'', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
// Make the total length 256 so that any byte access is always within the array
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
];
}