PKHeX/PKHeX.Core/PKM/Strings/StringConverter3.cs
Kurt 08ed482555 Revise gender symbol remapping
Handles Nidoran's shenanigans as well as more clear method names
- Add normalization for PK7->PK8 (no more 0xE... usage for the gender symbols... maybe more chars?)
- Not sure if Gen3 gamecube encoding needs sanitizing. Who is transferring Nidoran to CXD? :)

Requires some silly usage of Language passing as arguments. Future improvements can be made to revise the half/full encoding determination when setting a string. Probably has issues since we're just doing a naive check without considering nicknames w/ special chars.

Closes #4174
2024-05-12 10:47:55 -05:00

236 lines
12 KiB
C#
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
namespace PKHeX.Core;
/// <summary>
/// Logic for converting a <see cref="string"/> for Generation 3.
/// </summary>
public static class StringConverter3
{
public const byte TerminatorByte = 0xFF;
private const char Terminator = (char)TerminatorByte;
private const char Apostrophe = '\''; //
private const byte ApostropheByte = 0xB4;
private const byte QuoteLeftByte = 0xB1;
private const byte QuoteRightByte = 0xB2;
private const char FGM = '♂';
private const char FGF = '♀';
private const char HGM = StringConverter4Util.HGM; // '♂'
private const char HGF = StringConverter4Util.HGF; // '♀'
/// <summary>
/// Converts a Generation 3 encoded value array to string.
/// </summary>
/// <param name="data">Byte array containing string data.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Decoded string.</returns>
public static string GetString(ReadOnlySpan<byte> data, int language)
{
Span<char> result = stackalloc char[data.Length];
int i = LoadString(data, result, language);
return new string(result[..i]);
}
/// <inheritdoc cref="GetString(ReadOnlySpan{byte},int)"/>
/// <param name="data">Byte array containing string data.</param>
/// <param name="jp">Value source is Japanese font.</param>
public static string GetString(ReadOnlySpan<byte> data, bool jp) => GetString(data, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
/// <summary>
/// Converts a Generation 3 encoded value array to string.
/// </summary>
/// <param name="data">Encoded data</param>
/// <param name="result">Decoded character result buffer</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Character count loaded.</returns>
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
int i = 0;
for (; i < data.Length; i++)
{
var value = data[i];
var c = value switch {
QuoteLeftByte => GetQuoteLeft(language),
QuoteRightByte => GetQuoteRight(language),
_ => table[value],
}; // Convert to Unicode
if (c == Terminator) // Stop if Terminator/Invalid
break;
c = StringConverter4Util.NormalizeGenderSymbol(c);
result[i] = c;
}
return i;
}
/// <inheritdoc cref="LoadString(ReadOnlySpan{byte},Span{char},int)"/>
/// <param name="data">Encoded data</param>
/// <param name="result">Decoded character result buffer</param>
/// <param name="jp">Value source is Japanese font.</param>
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, bool jp) => LoadString(data, result, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
/// <summary>
/// Converts a string to a Generation 3 encoded value array.
/// </summary>
/// <param name="buffer"></param>
/// <param name="value">Decoded string.</param>
/// <param name="maxLength">Maximum length of the input <see cref="value"/></param>
/// <param name="language">Language specific conversion</param>
/// <param name="option">Buffer pre-formatting option</param>
/// <returns>Encoded data.</returns>
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, int language, StringConverterOption option = StringConverterOption.ClearFF)
{
if (value.Length > maxLength)
value = value[..maxLength]; // Hard cap
if (option is StringConverterOption.ClearFF)
buffer.Fill(TerminatorByte);
else if (option is StringConverterOption.ClearZero)
buffer.Clear();
bool jp = language == (int)LanguageID.Japanese;
var table = jp ? G3_JP : G3_EN;
int i = 0;
for (; i < value.Length; i++)
{
var chr = value[i];
if (!jp)
chr = StringConverter4Util.UnNormalizeGenderSymbol(chr);
if (!TryGetIndex(table, chr, language, out var b))
break;
buffer[i] = b;
}
int count = i;
if (count < buffer.Length)
buffer[count++] = TerminatorByte;
return count;
}
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, bool jp, StringConverterOption option = StringConverterOption.ClearFF) =>
SetString(buffer, value, maxLength, jp ? (int)LanguageID.Japanese : (int)LanguageID.English, option);
private static bool TryGetIndex(in ReadOnlySpan<char> dict, char c, int language, out byte result)
{
var index = dict.IndexOf(c);
if (index == -1 || c == '“')
return TryGetUserFriendlyRemap(c, language, out result);
result = (byte)index;
return index != TerminatorByte;
}
/// <summary>
/// Decodes a character from a Generation 3 encoded value.
/// </summary>
/// <param name="chr">Generation 4 decoded character.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Generation 3 encoded value.</returns>
public static char GetG3Char(byte chr, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
return chr switch
{
QuoteLeftByte => GetQuoteLeft(language),
QuoteRightByte => GetQuoteRight(language),
_ => table[chr],
};
}
/// <summary>
/// Encodes a character to a Generation 3 encoded value.
/// </summary>
/// <param name="chr">Generation 4 decoded character.</param>
/// <param name="language">Language specific conversion</param>
/// <returns>Generation 3 encoded value.</returns>
public static byte SetG3Char(char chr, int language)
{
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
TryGetIndex(table, chr, language, out var b);
return b;
}
// Quotation marks are displayed differently based on the Gen3 game language.
// Pal Park converts these to the appropriate ones based on the PKM language.
private static char GetQuoteLeft(int language) => language switch
{
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '“',
(int)LanguageID.French => '«',
(int)LanguageID.German => '„',
_ => '『', // Invalid languages use JP quote
};
private static char GetQuoteRight(int language) => language switch
{
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '”',
(int)LanguageID.French => '»',
(int)LanguageID.German => '“',
_ => '』', // Invalid languages use JP quote
};
/// <summary>
/// Tries to remap the user input to a valid character.
/// </summary>
private static bool TryGetUserFriendlyRemap(char c, int language, out byte result)
{
result = c switch
{
Apostrophe => ApostropheByte,
'“' => language != (int)LanguageID.German ? QuoteLeftByte : QuoteRightByte,
'”' => QuoteRightByte,
'«' => QuoteLeftByte,
'»' => QuoteRightByte,
'„' => QuoteLeftByte,
'『' => QuoteLeftByte,
'』' => QuoteRightByte,
_ => TerminatorByte,
};
return result != TerminatorByte;
}
private static ReadOnlySpan<char> G3_EN =>
[
' ', 'À', 'Á', 'Â', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'こ', 'Î', 'Ï', 'Ò', 'Ó', 'Ô', // 0
'Œ', 'Ù', 'Ú', 'Û', 'Ñ', 'ß', 'à', 'á', 'ね', 'Ç', 'È', 'é', 'ê', 'ë', 'ì', 'í', // 1
'î', 'ï', 'ò', 'ó', 'ô', 'œ', 'ù', 'ú', 'û', 'ñ', 'º', 'ª', '⒅', '&', '+', 'あ', // 2
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', '=', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
'っ', '¿', '¡', '⒆', '⒇', 'オ', 'カ', 'キ', 'ク', 'ケ', 'Í', 'コ', 'サ', 'ス', 'セ', 'ソ', // 5
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'â', '', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'í', // 6
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
'ッ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', '-', '・',// A
'⑬', '“', '”', '', '', HGM, HGF, '$', ',', '⑧', '/', 'A', 'B', 'C', 'D', 'E', // B
'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', // C
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', // D
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '►', // E
':', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
// Make the total length 256 so that any byte access is always within the array
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
];
private static ReadOnlySpan<char> G3_JP =>
[
' ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ', // 0
'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ひ', 'ふ', 'へ', 'ほ', 'ま', // 1
'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', 'り', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', 'ぁ', // 2
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', 'ゅ', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
'っ', 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', 'セ', 'ソ', // 5
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', '', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'マ', // 6
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
'ッ', '', '', '', '', '', '', '', '', '', '', '', '', '。', 'ー', '・', // A
'…', '『', '』', '「', '」', FGM, FGF, '円', '', '×', '', '', '', '', '', '', // B
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', // C
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', // D
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '►', // E
'', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
// Make the total length 256 so that any byte access is always within the array
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
];
}