Fix printf's Unicode support

https://github.com/fish-shell/fish-shell/issues/625
This commit is contained in:
ridiculousfish 2013-03-28 17:45:49 -07:00
parent 7d480ff579
commit 77dfd7b783

View file

@ -342,29 +342,39 @@ long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0)
else if (*p == L'u' || *p == L'U')
{
wchar_t esc_char = *p;
unsigned int uni_value;
uni_value = 0;
for (esc_length = (esc_char == L'u' ? 4 : 8), ++p;
esc_length > 0;
--esc_length, ++p)
p++;
uint32_t uni_value = 0;
for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++)
{
if (! is_hex_digit(*p))
this->fatal_error(_(L"missing hexadecimal number in escape"));
{
/* Escape sequence must be done. Complain if we didn't get anything */
if (esc_length == 0)
{
this->fatal_error(_(L"Missing hexadecimal number in Unicode escape"));
}
break;
}
uni_value = uni_value * 16 + hex_to_bin(*p);
p++;
}
/* A universal character name shall not specify a character short
identifier in the range 00000000 through 00000020, 0000007F through
0000009F, or 0000D800 through 0000DFFF inclusive. A universal
character name shall not designate a character in the required
character set. */
if ((uni_value <= 0x9f
&& uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60)
|| (uni_value >= 0xd800 && uni_value <= 0xdfff))
this->fatal_error(_(L"invalid universal character name \\%c%0*x"),
esc_char, (esc_char == L'u' ? 4 : 8), uni_value);
this->append_format_output(L"%lc", uni_value);
/* PCA GNU printf respects the limitations described in ISO N717, about which universal characters "shall not" be specified. I believe this limitation is for the benefit of compilers; I see no reason to impose it in builtin_printf.
If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code points, so just use that. If not defined, use the %lc printf conversion; this probably won't do anything good if your wide character set is not Unicode, but such platforms are exceedingly rare.
*/
if (uni_value > 0x10FFFF)
{
this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value);
}
else
{
#if defined(__STDC_ISO_10646__)
this->append_output(uni_value);
#else
this->append_format_output(L"%lc", uni_value);
}
#endif
}
else
{