mirror of
https://github.com/kwsch/PKHeX
synced 2024-11-10 14:44:24 +00:00
08ed482555
Handles Nidoran's shenanigans as well as more clear method names - Add normalization for PK7->PK8 (no more 0xE... usage for the gender symbols... maybe more chars?) - Not sure if Gen3 gamecube encoding needs sanitizing. Who is transferring Nidoran to CXD? :) Requires some silly usage of Language passing as arguments. Future improvements can be made to revise the half/full encoding determination when setting a string. Probably has issues since we're just doing a naive check without considering nicknames w/ special chars. Closes #4174
236 lines
12 KiB
C#
236 lines
12 KiB
C#
using System;
|
||
|
||
namespace PKHeX.Core;
|
||
|
||
/// <summary>
|
||
/// Logic for converting a <see cref="string"/> for Generation 3.
|
||
/// </summary>
|
||
public static class StringConverter3
|
||
{
|
||
public const byte TerminatorByte = 0xFF;
|
||
private const char Terminator = (char)TerminatorByte;
|
||
private const char Apostrophe = '\''; // ’
|
||
private const byte ApostropheByte = 0xB4;
|
||
private const byte QuoteLeftByte = 0xB1;
|
||
private const byte QuoteRightByte = 0xB2;
|
||
|
||
private const char FGM = '♂';
|
||
private const char FGF = '♀';
|
||
private const char HGM = StringConverter4Util.HGM; // '♂'
|
||
private const char HGF = StringConverter4Util.HGF; // '♀'
|
||
|
||
/// <summary>
|
||
/// Converts a Generation 3 encoded value array to string.
|
||
/// </summary>
|
||
/// <param name="data">Byte array containing string data.</param>
|
||
/// <param name="language">Language specific conversion</param>
|
||
/// <returns>Decoded string.</returns>
|
||
public static string GetString(ReadOnlySpan<byte> data, int language)
|
||
{
|
||
Span<char> result = stackalloc char[data.Length];
|
||
int i = LoadString(data, result, language);
|
||
return new string(result[..i]);
|
||
}
|
||
|
||
/// <inheritdoc cref="GetString(ReadOnlySpan{byte},int)"/>
|
||
/// <param name="data">Byte array containing string data.</param>
|
||
/// <param name="jp">Value source is Japanese font.</param>
|
||
public static string GetString(ReadOnlySpan<byte> data, bool jp) => GetString(data, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
|
||
|
||
/// <summary>
|
||
/// Converts a Generation 3 encoded value array to string.
|
||
/// </summary>
|
||
/// <param name="data">Encoded data</param>
|
||
/// <param name="result">Decoded character result buffer</param>
|
||
/// <param name="language">Language specific conversion</param>
|
||
/// <returns>Character count loaded.</returns>
|
||
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, int language)
|
||
{
|
||
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
|
||
int i = 0;
|
||
for (; i < data.Length; i++)
|
||
{
|
||
var value = data[i];
|
||
var c = value switch {
|
||
QuoteLeftByte => GetQuoteLeft(language),
|
||
QuoteRightByte => GetQuoteRight(language),
|
||
_ => table[value],
|
||
}; // Convert to Unicode
|
||
if (c == Terminator) // Stop if Terminator/Invalid
|
||
break;
|
||
c = StringConverter4Util.NormalizeGenderSymbol(c);
|
||
result[i] = c;
|
||
}
|
||
return i;
|
||
}
|
||
|
||
/// <inheritdoc cref="LoadString(ReadOnlySpan{byte},Span{char},int)"/>
|
||
/// <param name="data">Encoded data</param>
|
||
/// <param name="result">Decoded character result buffer</param>
|
||
/// <param name="jp">Value source is Japanese font.</param>
|
||
public static int LoadString(ReadOnlySpan<byte> data, Span<char> result, bool jp) => LoadString(data, result, jp ? (int)LanguageID.Japanese : (int)LanguageID.English);
|
||
|
||
/// <summary>
|
||
/// Converts a string to a Generation 3 encoded value array.
|
||
/// </summary>
|
||
/// <param name="buffer"></param>
|
||
/// <param name="value">Decoded string.</param>
|
||
/// <param name="maxLength">Maximum length of the input <see cref="value"/></param>
|
||
/// <param name="language">Language specific conversion</param>
|
||
/// <param name="option">Buffer pre-formatting option</param>
|
||
/// <returns>Encoded data.</returns>
|
||
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, int language, StringConverterOption option = StringConverterOption.ClearFF)
|
||
{
|
||
if (value.Length > maxLength)
|
||
value = value[..maxLength]; // Hard cap
|
||
|
||
if (option is StringConverterOption.ClearFF)
|
||
buffer.Fill(TerminatorByte);
|
||
else if (option is StringConverterOption.ClearZero)
|
||
buffer.Clear();
|
||
|
||
bool jp = language == (int)LanguageID.Japanese;
|
||
var table = jp ? G3_JP : G3_EN;
|
||
int i = 0;
|
||
for (; i < value.Length; i++)
|
||
{
|
||
var chr = value[i];
|
||
if (!jp)
|
||
chr = StringConverter4Util.UnNormalizeGenderSymbol(chr);
|
||
if (!TryGetIndex(table, chr, language, out var b))
|
||
break;
|
||
buffer[i] = b;
|
||
}
|
||
|
||
int count = i;
|
||
if (count < buffer.Length)
|
||
buffer[count++] = TerminatorByte;
|
||
return count;
|
||
}
|
||
|
||
public static int SetString(Span<byte> buffer, ReadOnlySpan<char> value, int maxLength, bool jp, StringConverterOption option = StringConverterOption.ClearFF) =>
|
||
SetString(buffer, value, maxLength, jp ? (int)LanguageID.Japanese : (int)LanguageID.English, option);
|
||
|
||
private static bool TryGetIndex(in ReadOnlySpan<char> dict, char c, int language, out byte result)
|
||
{
|
||
var index = dict.IndexOf(c);
|
||
if (index == -1 || c == '“')
|
||
return TryGetUserFriendlyRemap(c, language, out result);
|
||
result = (byte)index;
|
||
return index != TerminatorByte;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Decodes a character from a Generation 3 encoded value.
|
||
/// </summary>
|
||
/// <param name="chr">Generation 4 decoded character.</param>
|
||
/// <param name="language">Language specific conversion</param>
|
||
/// <returns>Generation 3 encoded value.</returns>
|
||
public static char GetG3Char(byte chr, int language)
|
||
{
|
||
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
|
||
return chr switch
|
||
{
|
||
QuoteLeftByte => GetQuoteLeft(language),
|
||
QuoteRightByte => GetQuoteRight(language),
|
||
_ => table[chr],
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// Encodes a character to a Generation 3 encoded value.
|
||
/// </summary>
|
||
/// <param name="chr">Generation 4 decoded character.</param>
|
||
/// <param name="language">Language specific conversion</param>
|
||
/// <returns>Generation 3 encoded value.</returns>
|
||
public static byte SetG3Char(char chr, int language)
|
||
{
|
||
var table = (language == (int)LanguageID.Japanese) ? G3_JP : G3_EN;
|
||
TryGetIndex(table, chr, language, out var b);
|
||
return b;
|
||
}
|
||
|
||
// Quotation marks are displayed differently based on the Gen3 game language.
|
||
// Pal Park converts these to the appropriate ones based on the PKM language.
|
||
private static char GetQuoteLeft(int language) => language switch
|
||
{
|
||
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '“',
|
||
(int)LanguageID.French => '«',
|
||
(int)LanguageID.German => '„',
|
||
_ => '『', // Invalid languages use JP quote
|
||
};
|
||
|
||
private static char GetQuoteRight(int language) => language switch
|
||
{
|
||
(int)LanguageID.English or (int)LanguageID.Italian or (int)LanguageID.Spanish => '”',
|
||
(int)LanguageID.French => '»',
|
||
(int)LanguageID.German => '“',
|
||
_ => '』', // Invalid languages use JP quote
|
||
};
|
||
|
||
/// <summary>
|
||
/// Tries to remap the user input to a valid character.
|
||
/// </summary>
|
||
private static bool TryGetUserFriendlyRemap(char c, int language, out byte result)
|
||
{
|
||
result = c switch
|
||
{
|
||
Apostrophe => ApostropheByte,
|
||
'“' => language != (int)LanguageID.German ? QuoteLeftByte : QuoteRightByte,
|
||
'”' => QuoteRightByte,
|
||
'«' => QuoteLeftByte,
|
||
'»' => QuoteRightByte,
|
||
'„' => QuoteLeftByte,
|
||
'『' => QuoteLeftByte,
|
||
'』' => QuoteRightByte,
|
||
_ => TerminatorByte,
|
||
};
|
||
return result != TerminatorByte;
|
||
}
|
||
|
||
private static ReadOnlySpan<char> G3_EN =>
|
||
[
|
||
' ', 'À', 'Á', 'Â', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'こ', 'Î', 'Ï', 'Ò', 'Ó', 'Ô', // 0
|
||
'Œ', 'Ù', 'Ú', 'Û', 'Ñ', 'ß', 'à', 'á', 'ね', 'Ç', 'È', 'é', 'ê', 'ë', 'ì', 'í', // 1
|
||
'î', 'ï', 'ò', 'ó', 'ô', 'œ', 'ù', 'ú', 'û', 'ñ', 'º', 'ª', '⒅', '&', '+', 'あ', // 2
|
||
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', '=', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
|
||
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
|
||
'っ', '¿', '¡', '⒆', '⒇', 'オ', 'カ', 'キ', 'ク', 'ケ', 'Í', 'コ', 'サ', 'ス', 'セ', 'ソ', // 5
|
||
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'â', 'ノ', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'í', // 6
|
||
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
|
||
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
|
||
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
|
||
'ッ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', '-', '・',// A
|
||
'⑬', '“', '”', '‘', '’', HGM, HGF, '$', ',', '⑧', '/', 'A', 'B', 'C', 'D', 'E', // B
|
||
'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', // C
|
||
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', // D
|
||
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '►', // E
|
||
':', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
|
||
|
||
// Make the total length 256 so that any byte access is always within the array
|
||
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
|
||
];
|
||
|
||
private static ReadOnlySpan<char> G3_JP =>
|
||
[
|
||
' ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ', // 0
|
||
'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ひ', 'ふ', 'へ', 'ほ', 'ま', // 1
|
||
'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', 'り', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', 'ぁ', // 2
|
||
'ぃ', 'ぅ', 'ぇ', 'ぉ', 'ゃ', 'ゅ', 'ょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', // 3
|
||
'ぞ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', // 4
|
||
'っ', 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', 'セ', 'ソ', // 5
|
||
'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', 'マ', // 6
|
||
'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ァ', // 7
|
||
'ィ', 'ゥ', 'ェ', 'ォ', 'ャ', 'ュ', 'ョ', 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', // 8
|
||
'ゾ', 'ダ', 'ヂ', 'ヅ', 'デ', 'ド', 'バ', 'ビ', 'ブ', 'ベ', 'ボ', 'パ', 'ピ', 'プ', 'ペ', 'ポ', // 9
|
||
'ッ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '。', 'ー', '・', // A
|
||
'…', '『', '』', '「', '」', FGM, FGF, '円', '.', '×', '/', 'A', 'B', 'C', 'D', 'E', // B
|
||
'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', // C
|
||
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', // D
|
||
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '►', // E
|
||
':', 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', // F
|
||
|
||
// Make the total length 256 so that any byte access is always within the array
|
||
Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator, Terminator,
|
||
];
|
||
}
|