u-boot/lib/charset.c
Tom Rini 467382ca03 lib: Remove <common.h> inclusion from these files
After some header file cleanups to add missing include files, remove
common.h from all files in the lib directory. This primarily means just
dropping the line but in a few cases we need to add in other header
files now.

Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Tom Rini <trini@konsulko.com>
2023-12-21 08:54:37 -05:00

575 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* charset conversion utils
*
* Copyright (c) 2017 Rob Clark
*/
#include <charset.h>
#include <capitalization.h>
#include <cp437.h>
#include <efi_loader.h>
#include <errno.h>
#include <malloc.h>
/**
* codepage_437 - Unicode to codepage 437 translation table
*/
const u16 codepage_437[128] = CP437;
static struct capitalization_table capitalization_table[] =
#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
UNICODE_CAPITALIZATION_TABLE;
#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
CP1250_CAPITALIZATION_TABLE;
#else
CP437_CAPITALIZATION_TABLE;
#endif
/**
* get_code() - read Unicode code point from UTF-8 stream
*
* @read_u8: - stream reader
* @src: - string buffer passed to stream reader, optional
* Return: - Unicode code point, or -1
*/
static int get_code(u8 (*read_u8)(void *data), void *data)
{
s32 ch = 0;
ch = read_u8(data);
if (!ch)
return 0;
if (ch >= 0xc2 && ch <= 0xf4) {
int code = 0;
if (ch >= 0xe0) {
if (ch >= 0xf0) {
/* 0xf0 - 0xf4 */
ch &= 0x07;
code = ch << 18;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
goto error;
ch &= 0x3f;
} else {
/* 0xe0 - 0xef */
ch &= 0x0f;
}
code += ch << 12;
if ((code >= 0xD800 && code <= 0xDFFF) ||
code >= 0x110000)
goto error;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
goto error;
}
/* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
ch &= 0x3f;
code += ch << 6;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
goto error;
ch &= 0x3f;
ch += code;
} else if (ch >= 0x80) {
goto error;
}
return ch;
error:
return -1;
}
/**
* read_string() - read byte from character string
*
* @data: - pointer to string
* Return: - byte read
*
* The string pointer is incremented if it does not point to '\0'.
*/
static u8 read_string(void *data)
{
const char **src = (const char **)data;
u8 c;
if (!src || !*src || !**src)
return 0;
c = **src;
++*src;
return c;
}
/**
* read_console() - read byte from console
*
* @data - not used, needed to match interface
* Return: - byte read or 0 on error
*/
static u8 read_console(void *data)
{
int ch;
ch = getchar();
if (ch < 0)
ch = 0;
return ch;
}
int console_read_unicode(s32 *code)
{
for (;;) {
s32 c;
if (!tstc()) {
/* No input available */
return 1;
}
/* Read Unicode code */
c = get_code(read_console, NULL);
if (c > 0) {
*code = c;
return 0;
}
}
}
s32 utf8_get(const char **src)
{
return get_code(read_string, src);
}
int utf8_put(s32 code, char **dst)
{
if (!dst || !*dst)
return -1;
if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
return -1;
if (code <= 0x007F) {
**dst = code;
} else {
if (code <= 0x07FF) {
**dst = code >> 6 | 0xC0;
} else {
if (code < 0x10000) {
**dst = code >> 12 | 0xE0;
} else {
**dst = code >> 18 | 0xF0;
++*dst;
**dst = (code >> 12 & 0x3F) | 0x80;
}
++*dst;
**dst = (code >> 6 & 0x3F) | 0x80;
}
++*dst;
**dst = (code & 0x3F) | 0x80;
}
++*dst;
return 0;
}
size_t utf8_utf16_strnlen(const char *src, size_t count)
{
size_t len = 0;
for (; *src && count; --count) {
s32 code = utf8_get(&src);
if (!code)
break;
if (code < 0) {
/* Reserve space for a replacement character */
len += 1;
} else if (code < 0x10000) {
len += 1;
} else {
len += 2;
}
}
return len;
}
int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
{
if (!src || !dst || !*dst)
return -1;
for (; count && *src; --count) {
s32 code = utf8_get(&src);
if (code < 0)
code = '?';
utf16_put(code, dst);
}
**dst = 0;
return 0;
}
s32 utf16_get(const u16 **src)
{
s32 code, code2;
if (!src || !*src)
return -1;
if (!**src)
return 0;
code = **src;
++*src;
if (code >= 0xDC00 && code <= 0xDFFF)
return -1;
if (code >= 0xD800 && code <= 0xDBFF) {
if (!**src)
return -1;
code &= 0x3ff;
code <<= 10;
code += 0x10000;
code2 = **src;
++*src;
if (code2 <= 0xDC00 || code2 >= 0xDFFF)
return -1;
code2 &= 0x3ff;
code += code2;
}
return code;
}
int utf16_put(s32 code, u16 **dst)
{
if (!dst || !*dst)
return -1;
if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
return -1;
if (code < 0x10000) {
**dst = code;
} else {
code -= 0x10000;
**dst = code >> 10 | 0xD800;
++*dst;
**dst = (code & 0x3ff) | 0xDC00;
}
++*dst;
return 0;
}
size_t utf16_strnlen(const u16 *src, size_t count)
{
size_t len = 0;
for (; *src && count; --count) {
s32 code = utf16_get(&src);
if (!code)
break;
/*
* In case of an illegal sequence still reserve space for a
* replacement character.
*/
++len;
}
return len;
}
size_t utf16_utf8_strnlen(const u16 *src, size_t count)
{
size_t len = 0;
for (; *src && count; --count) {
s32 code = utf16_get(&src);
if (!code)
break;
if (code < 0)
/* Reserve space for a replacement character */
len += 1;
else if (code < 0x80)
len += 1;
else if (code < 0x800)
len += 2;
else if (code < 0x10000)
len += 3;
else
len += 4;
}
return len;
}
int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
{
if (!src || !dst || !*dst)
return -1;
for (; count && *src; --count) {
s32 code = utf16_get(&src);
if (code < 0)
code = '?';
utf8_put(code, dst);
}
**dst = 0;
return 0;
}
s32 utf_to_lower(const s32 code)
{
struct capitalization_table *pos = capitalization_table;
s32 ret = code;
if (code <= 0x7f) {
if (code >= 'A' && code <= 'Z')
ret += 0x20;
return ret;
}
for (; pos->upper; ++pos) {
if (pos->upper == code) {
ret = pos->lower;
break;
}
}
return ret;
}
s32 utf_to_upper(const s32 code)
{
struct capitalization_table *pos = capitalization_table;
s32 ret = code;
if (code <= 0x7f) {
if (code >= 'a' && code <= 'z')
ret -= 0x20;
return ret;
}
for (; pos->lower; ++pos) {
if (pos->lower == code) {
ret = pos->upper;
break;
}
}
return ret;
}
/*
* u16_strcasecmp() - compare two u16 strings case insensitively
*
* @s1: first string to compare
* @s2: second string to compare
* @n: maximum number of u16 to compare
* Return: 0 if the first n u16 are the same in s1 and s2
* < 0 if the first different u16 in s1 is less than the
* corresponding u16 in s2
* > 0 if the first different u16 in s1 is greater than the
*/
int u16_strcasecmp(const u16 *s1, const u16 *s2)
{
int ret = 0;
s32 c1, c2;
for (;;) {
c1 = utf_to_upper(utf16_get(&s1));
c2 = utf_to_upper(utf16_get(&s2));
ret = c1 - c2;
if (ret || !c1 || c1 == -1 || c2 == -1)
break;
}
return ret;
}
/*
* u16_strncmp() - compare two u16 string
*
* @s1: first string to compare
* @s2: second string to compare
* @n: maximum number of u16 to compare
* Return: 0 if the first n u16 are the same in s1 and s2
* < 0 if the first different u16 in s1 is less than the
* corresponding u16 in s2
* > 0 if the first different u16 in s1 is greater than the
* corresponding u16 in s2
*/
int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
{
int ret = 0;
for (; n; --n, ++s1, ++s2) {
ret = *s1 - *s2;
if (ret || !*s1)
break;
}
return ret;
}
size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
{
size_t i;
for (i = 0; count-- && in[i]; i++);
return i;
}
size_t u16_strsize(const void *in)
{
return (u16_strlen(in) + 1) * sizeof(u16);
}
u16 *u16_strcpy(u16 *dest, const u16 *src)
{
u16 *tmp = dest;
for (;; dest++, src++) {
*dest = *src;
if (!*src)
break;
}
return tmp;
}
u16 *u16_strdup(const void *src)
{
u16 *new;
size_t len;
if (!src)
return NULL;
len = u16_strsize(src);
new = malloc(len);
if (!new)
return NULL;
memcpy(new, src, len);
return new;
}
size_t u16_strlcat(u16 *dest, const u16 *src, size_t count)
{
size_t destlen = u16_strnlen(dest, count);
size_t srclen = u16_strlen(src);
size_t ret = destlen + srclen;
if (destlen >= count)
return ret;
if (ret >= count)
srclen -= (ret - count + 1);
memcpy(&dest[destlen], src, 2 * srclen);
dest[destlen + srclen] = 0x0000;
return ret;
}
/* Convert UTF-16 to UTF-8. */
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
{
uint32_t code_high = 0;
while (size--) {
uint32_t code = *src++;
if (code_high) {
if (code >= 0xDC00 && code <= 0xDFFF) {
/* Surrogate pair. */
code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
} else {
/* Error... */
*dest++ = '?';
/* *src may be valid. Don't eat it. */
src--;
}
code_high = 0;
} else {
if (code <= 0x007F) {
*dest++ = code;
} else if (code <= 0x07FF) {
*dest++ = (code >> 6) | 0xC0;
*dest++ = (code & 0x3F) | 0x80;
} else if (code >= 0xD800 && code <= 0xDBFF) {
code_high = code;
continue;
} else if (code >= 0xDC00 && code <= 0xDFFF) {
/* Error... */
*dest++ = '?';
} else if (code < 0x10000) {
*dest++ = (code >> 12) | 0xE0;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
} else {
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
}
}
return dest;
}
int utf_to_cp(s32 *c, const u16 *codepage)
{
if (*c >= 0x80) {
int j;
/* Look up codepage translation */
for (j = 0; j < 0x80; ++j) {
if (*c == codepage[j]) {
*c = j + 0x80;
return 0;
}
}
*c = '?';
return -ENOENT;
}
return 0;
}
int utf8_to_cp437_stream(u8 c, char *buffer)
{
char *end;
const char *pos;
s32 s;
int ret;
for (;;) {
pos = buffer;
end = buffer + strlen(buffer);
*end++ = c;
*end = 0;
s = utf8_get(&pos);
if (s > 0) {
*buffer = 0;
ret = utf_to_cp(&s, codepage_437);
return s;
}
if (pos == end)
return 0;
*buffer = 0;
}
}
int utf8_to_utf32_stream(u8 c, char *buffer)
{
char *end;
const char *pos;
s32 s;
for (;;) {
pos = buffer;
end = buffer + strlen(buffer);
*end++ = c;
*end = 0;
s = utf8_get(&pos);
if (s > 0) {
*buffer = 0;
return s;
}
if (pos == end)
return 0;
*buffer = 0;
}
}