diff --git a/src/env_universal_common.cpp b/src/env_universal_common.cpp index f7c07ea28..bee25a1ea 100644 --- a/src/env_universal_common.cpp +++ b/src/env_universal_common.cpp @@ -958,7 +958,7 @@ var_table_t env_universal_t::read_message_internal(int fd) // Process it if it's a newline (which is true if we are before the end of the buffer) if (cursor < bufflen && ! line.empty()) { - if (utf8_to_wchar_string(line, &wide_line)) + if (utf8_to_wchar(line.data(), line.size(), &wide_line, 0)) { env_universal_t::parse_message_internal(wide_line, &result, &storage); } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index c5fa43856..bcfff0324 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1057,7 +1057,16 @@ static void test_utf82wchar(const char *src, size_t slen, const wchar_t *dst, si do { - size = utf8_to_wchar(src, slen, mem, dlen, flags); + if (mem == NULL) + { + size = utf8_to_wchar(src, slen, NULL, flags); + } + else + { + std::wstring buff; + size = utf8_to_wchar(src, slen, &buff, flags); + std::copy(buff.begin(), buff.begin() + std::min(dlen, buff.size()), mem); + } if (res != size) { err(L"u2w: %s: FAILED (rv: %lu, must be %lu)", descr, size, res); @@ -1219,8 +1228,10 @@ static void test_utf8() UTF8_IGNORE_ERROR, sizeof(wb1) / sizeof(*wb1), "ignore bad chars"); test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm), 0, sizeof(wm) / sizeof(*wm), "mixed languages"); - test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) - 1, 0, - 0, "boundaries -1"); + // PCA this test was to ensure that if the output buffer was too small, we'd get 0 + // we no longer have statically sized result buffers, so this test is disabled + // test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) - 1, 0, + // 0, "boundaries -1"); test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) + 1, 0, sizeof(wm) / sizeof(*wm), "boundaries +1"); test_utf82wchar(um, sizeof(um), NULL, 0, 0, @@ -1235,8 +1246,11 @@ static void test_utf8() "invalid params, src buf not NULL"); test_utf82wchar((const char *)NULL, 10, NULL, 0, 0, 0, "invalid params, src length is not 0"); - test_utf82wchar(u1, sizeof(u1), w1, 0, 0, 0, - "invalid params, dst is not NULL"); + + // PCA this test was to ensure that converting into a zero length output buffer would return 0 + // we no longer statically size output buffers, so the test is disabled + // test_utf82wchar(u1, sizeof(u1), w1, 0, 0, 0, + // "invalid params, dst is not NULL"); /* * UCS-4 -> UTF-8 string. diff --git a/src/utf8.cpp b/src/utf8.cpp index 62453be46..9bd6edf27 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -36,12 +36,14 @@ typedef wchar_t utf8_wchar_t; #define UTF8_WCHAR_MAX ((size_t)std::numeric_limits::max()) +typedef std::basic_string utf8_wstring_t; + bool is_wchar_ucs2() { return UTF8_WCHAR_MAX <= 0xFFFF; } -static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t *out, size_t outsize, int flags); +static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring_t *result, int flags); static size_t wchar_to_utf8_internal(const utf8_wchar_t *in, size_t insize, char *out, size_t outsize, int flags); static bool safe_copy_wchar_to_utf8_wchar(const wchar_t *in, utf8_wchar_t *out, size_t count) @@ -60,32 +62,6 @@ static bool safe_copy_wchar_to_utf8_wchar(const wchar_t *in, utf8_wchar_t *out, return result; } -bool utf8_to_wchar_string(const std::string &str, std::wstring *result) -{ - result->clear(); - const size_t inlen = str.size(); - if (inlen == 0) - { - return true; - } - - bool success = false; - const char *input = str.c_str(); - size_t outlen = utf8_to_wchar(input, inlen, NULL, 0, 0); - if (outlen > 0) - { - wchar_t *tmp = new wchar_t[outlen]; - size_t outlen2 = utf8_to_wchar(input, inlen, tmp, outlen, 0); - if (outlen2 > 0) - { - result->assign(tmp, outlen2); - success = true; - } - delete[] tmp; - } - return success; -} - bool wchar_to_utf8_string(const std::wstring &str, std::string *result) { result->clear(); @@ -112,9 +88,9 @@ bool wchar_to_utf8_string(const std::wstring &str, std::string *result) return success; } -size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize, int flags) +size_t utf8_to_wchar(const char *in, size_t insize, std::wstring *out, int flags) { - if (in == NULL || insize == 0 || (outsize == 0 && out != NULL)) + if (in == NULL || insize == 0) { return 0; } @@ -122,21 +98,20 @@ size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize size_t result; if (sizeof(wchar_t) == sizeof(utf8_wchar_t)) { - result = utf8_to_wchar_internal(in, insize, reinterpret_cast(out), outsize, flags); + result = utf8_to_wchar_internal(in, insize, reinterpret_cast(out), flags); + } + else if (out == NULL) + { + result = utf8_to_wchar_internal(in, insize, NULL, flags); } else { - // Allocate a temporary buffer to hold the output - // note: outsize may be 0 - utf8_wchar_t *tmp_output = new utf8_wchar_t[outsize]; - - // Invoke the conversion with the temporary - result = utf8_to_wchar_internal(in, insize, tmp_output, outsize, flags); - - // Copy back from tmp to the function's output, then clean it up - size_t amount_to_copy = std::min(result, outsize); - std::copy(tmp_output, tmp_output + amount_to_copy, out); - delete[] tmp_output; + // Allocate a temporary buffer to hold the output, + // invoke the conversion with the temporary, + // and then copy it back + utf8_wstring_t tmp_output; + result = utf8_to_wchar_internal(in, insize, &tmp_output, flags); + out->insert(out->end(), tmp_output.begin(), tmp_output.end()); } return result; } @@ -213,9 +188,7 @@ __utf8_forbitten(unsigned char octet) * It takes the following arguments: * in - input UTF-8 string. It can be null-terminated. * insize - size of input string in bytes. - * out - result buffer for UCS-2/4 string. If out is NULL, - * function returns size of result buffer. - * outsize - size of out buffer in wide characters. + * out_string - result buffer for UCS-2/4 string. * * RETURN VALUES * The function returns size of result buffer (in wide characters). @@ -231,19 +204,21 @@ __utf8_forbitten(unsigned char octet) * not prepare buffer in advance (\0 terminate) but after calling this * function. */ -static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t *out, size_t outsize, int flags) +static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring_t *out_string, int flags) { unsigned char *p, *lim; - utf8_wchar_t *wlim, high; + utf8_wchar_t high; size_t n, total, i, n_bits; - if (in == NULL || insize == 0 || (outsize == 0 && out != NULL)) + if (in == NULL || insize == 0) return (0); + + if (out_string != NULL) + out_string->clear(); total = 0; p = (unsigned char *)in; lim = p + insize; - wlim = out + outsize; for (; p < lim; p += n) { @@ -319,15 +294,10 @@ static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t } total++; - - if (out == NULL) + if (out_string == NULL) continue; - if (out >= wlim) - return (0); /* no space left */ - uint32_t out_val = 0; - *out = 0; n_bits = 0; for (i = 1; i < n; i++) { @@ -364,7 +334,7 @@ static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t } else { - *out++ = out_val; + out_string->push_back(out_val); } } diff --git a/src/utf8.h b/src/utf8.h index 1c9923db5..33ed6a5ea 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -28,11 +28,10 @@ #define UTF8_SKIP_BOM 0x02 /* Convert a string between UTF8 and UCS-2/4 (depending on size of wchar_t). Returns true if successful, storing the result of the conversion in *result */ -bool utf8_to_wchar_string(const std::string &input, std::wstring *result); bool wchar_to_utf8_string(const std::wstring &input, std::string *result); -/* Variants exposed for testing */ -size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize, int flags); +/* Convert a string between UTF8 and UCS-2/4 (depending on size of wchar_t). Returns nonzero if successful, storing the result of the conversion in *out */ +size_t utf8_to_wchar(const char *in, size_t insize, std::wstring *out, int flags); size_t wchar_to_utf8(const wchar_t *in, size_t insize, char *out, size_t outsize, int flags); bool is_wchar_ucs2();