From 73bb90cabcdffcd528d1002a12779779196bf200 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:36 +0100 Subject: [PATCH] efi_loader: carve out utf_to_cp() Carve out a function to translate a Unicode code point to an 8bit codepage. Provide a unit test for the new function. Signed-off-by: Heinrich Schuchardt --- include/charset.h | 11 ++++++++++ lib/charset.c | 28 +++++++++++++++++++++++++ lib/efi_loader/efi_unicode_collation.c | 19 +++-------------- test/unicode_ut.c | 29 ++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/include/charset.h b/include/charset.h index 64ba91f791..52e7d1474e 100644 --- a/include/charset.h +++ b/include/charset.h @@ -275,4 +275,15 @@ u16 *u16_strdup(const void *src); */ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); +/** + * utf_to_cp() - translate Unicode code point to 8bit codepage + * + * Codepoints that do not exist in the codepage are rendered as question mark. + * + * @c: pointer to Unicode code point to be translated + * @codepage: Unicode to codepage translation table + * Return: 0 on success, -ENOENT if codepoint cannot be translated + */ +int utf_to_cp(s32 *c, const u16 *codepage); + #endif /* __CHARSET_H_ */ diff --git a/lib/charset.c b/lib/charset.c index 814847d165..1345c8f9f0 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -472,3 +473,30 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) return dest; } + +/** + * utf_to_cp() - translate Unicode code point to 8bit codepage + * + * Codepoints that do not exist in the codepage are rendered as question mark. + * + * @c: pointer to Unicode code point to be translated + * @codepage: Unicode to codepage translation table + * Return: 0 on success, -ENOENT if codepoint cannot be translated + */ +int utf_to_cp(s32 *c, const u16 *codepage) +{ + if (*c >= 0x80) { + int j; + + /* Look up codepage translation */ + for (j = 0; j < 0x80; ++j) { + if (*c == codepage[j]) { + *c = j + 0x80; + return 0; + } + } + *c = '?'; + return -ENOENT; + } + return 0; +} diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c index bf5314c4ff..36be798f64 100644 --- a/lib/efi_loader/efi_unicode_collation.c +++ b/lib/efi_loader/efi_unicode_collation.c @@ -300,23 +300,10 @@ static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this, break; } c = utf_to_upper(c); - if (c >= 0x80) { - int j; - - /* Look for codepage translation */ - for (j = 0; j < 0x80; ++j) { - if (c == codepage[j]) { - c = j + 0x80; - break; - } - } - if (j >= 0x80) { - c = '_'; - ret = true; - } - } else if (c && (c < 0x20 || strchr(illegal, c))) { - c = '_'; + if (utf_to_cp(&c, codepage) || + (c && (c < 0x20 || strchr(illegal, c)))) { ret = true; + c = '_'; } fat[i] = c; diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 6130ef0b54..2cc6b5feff 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -595,6 +595,35 @@ static int unicode_test_u16_strsize(struct unit_test_state *uts) } UNICODE_TEST(unicode_test_u16_strsize); +static int unicode_test_utf_to_cp(struct unit_test_state *uts) +{ + int ret; + s32 c; + + c = '\n'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('\n', c); + + c = 'a'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('a', c); + + c = 0x03c4; /* Greek small letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq(0xe7, c); + + c = 0x03a4; /* Greek capital letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(-ENOENT, ret); + ut_asserteq('?', c); + + return 0; +} +UNICODE_TEST(unicode_test_utf_to_cp); + #ifdef CONFIG_EFI_LOADER static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts) {