mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-27 20:25:12 +00:00
Fix printf's Unicode support
https://github.com/fish-shell/fish-shell/issues/625
This commit is contained in:
parent
7d480ff579
commit
77dfd7b783
1 changed files with 29 additions and 19 deletions
|
@ -342,30 +342,40 @@ long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0)
|
|||
else if (*p == L'u' || *p == L'U')
|
||||
{
|
||||
wchar_t esc_char = *p;
|
||||
unsigned int uni_value;
|
||||
|
||||
uni_value = 0;
|
||||
for (esc_length = (esc_char == L'u' ? 4 : 8), ++p;
|
||||
esc_length > 0;
|
||||
--esc_length, ++p)
|
||||
p++;
|
||||
uint32_t uni_value = 0;
|
||||
for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++)
|
||||
{
|
||||
if (! is_hex_digit(*p))
|
||||
this->fatal_error(_(L"missing hexadecimal number in escape"));
|
||||
{
|
||||
/* Escape sequence must be done. Complain if we didn't get anything */
|
||||
if (esc_length == 0)
|
||||
{
|
||||
this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
uni_value = uni_value * 16 + hex_to_bin(*p);
|
||||
p++;
|
||||
}
|
||||
|
||||
/* A universal character name shall not specify a character short
|
||||
identifier in the range 00000000 through 00000020, 0000007F through
|
||||
0000009F, or 0000D800 through 0000DFFF inclusive. A universal
|
||||
character name shall not designate a character in the required
|
||||
character set. */
|
||||
if ((uni_value <= 0x9f
|
||||
&& uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60)
|
||||
|| (uni_value >= 0xd800 && uni_value <= 0xdfff))
|
||||
this->fatal_error(_(L"invalid universal character name \\%c%0*x"),
|
||||
esc_char, (esc_char == L'u' ? 4 : 8), uni_value);
|
||||
/* PCA GNU printf respects the limitations described in ISO N717, about which universal characters "shall not" be specified. I believe this limitation is for the benefit of compilers; I see no reason to impose it in builtin_printf.
|
||||
|
||||
If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code points, so just use that. If not defined, use the %lc printf conversion; this probably won't do anything good if your wide character set is not Unicode, but such platforms are exceedingly rare.
|
||||
*/
|
||||
if (uni_value > 0x10FFFF)
|
||||
{
|
||||
this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(__STDC_ISO_10646__)
|
||||
this->append_output(uni_value);
|
||||
#else
|
||||
this->append_format_output(L"%lc", uni_value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
this->append_format_output(L"%lc", L'\\');
|
||||
|
|
Loading…
Reference in a new issue