mirror of
https://github.com/fish-shell/fish-shell
synced 2024-11-10 07:04:29 +00:00
Optimize away a str2wcs_internal check
str2wcs_internal is one of worst hot paths in the codebase, and this particular check can be optimized away for non-macOS hosts at compile time.
This commit is contained in:
parent
bf31333622
commit
1365379518
4 changed files with 33 additions and 1 deletions
|
@ -224,3 +224,11 @@ LIBATOMIC_NOT_NEEDED)
|
|||
IF (NOT LIBATOMIC_NOT_NEEDED)
|
||||
set(ATOMIC_LIBRARY "atomic")
|
||||
endif()
|
||||
|
||||
# Check if mbrtowc implementation attempts to encode invalid UTF-8 sequences
|
||||
# Known culprits: at least some versions of macOS (confirmed Snow Leopard and Yosemite)
|
||||
try_run(mbrtowc_invalid_utf8_exit, mbrtowc_invalid_utf8_compiles, ${CMAKE_CURRENT_BINARY_DIR},
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/checks/mbrtowc_invalid_utf8.cpp")
|
||||
IF ("${mbrtowc_invalid_utf8_compiles}" AND ("${mbrtowc_invalid_utf8_exit}" EQUAL 1))
|
||||
SET(HAVE_BROKEN_MBRTOWC_UTF8 1)
|
||||
ENDIF()
|
||||
|
|
18
cmake/checks/mbrtowc_invalid_utf8.cpp
Normal file
18
cmake/checks/mbrtowc_invalid_utf8.cpp
Normal file
|
@ -0,0 +1,18 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cwchar>
|
||||
|
||||
// Check whether the runtime mbrtowc implementation attempts to encode
|
||||
// invalid UTF-8 values.
|
||||
|
||||
int main() {
|
||||
// TODO: I'm not sure how to enforce a UTF-8 locale without overriding the language
|
||||
char sample[] = "hello world";
|
||||
sample[0] |= 0xF8;
|
||||
wchar_t wsample[100] {};
|
||||
std::mbstate_t state = std::mbstate_t();
|
||||
int res = std::mbrtowc(wsample, sample, strlen(sample), &state);
|
||||
|
||||
return res < 0 ? 0 : 1;
|
||||
}
|
|
@ -167,6 +167,9 @@
|
|||
# define _DARWIN_USE_64_BIT_INODE 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if mbrtowc attempts to convert invalid UTF-8 sequences */
|
||||
#cmakedefine HAVE_BROKEN_MBRTOWC_UTF8 1
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
#ifndef __warn_unused
|
||||
#define __warn_unused __attribute__ ((warn_unused_result))
|
||||
|
|
|
@ -266,10 +266,13 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) {
|
|||
size_t ret = 0;
|
||||
wchar_t wc = 0;
|
||||
|
||||
if ((in[in_pos] & 0xF8) == 0xF8) {
|
||||
if (false) {
|
||||
#if defined(HAVE_BROKEN_MBRTOWC_UTF8)
|
||||
} else if ((in[in_pos] & 0xF8) == 0xF8) {
|
||||
// Protect against broken std::mbrtowc() implementations which attempt to encode UTF-8
|
||||
// sequences longer than four bytes (e.g., OS X Snow Leopard).
|
||||
use_encode_direct = true;
|
||||
#endif
|
||||
} else if (sizeof(wchar_t) == 2 && //!OCLINT(constant if expression)
|
||||
(in[in_pos] & 0xF8) == 0xF0) {
|
||||
// Assume we are in a UTF-16 environment (e.g., Cygwin) using a UTF-8 encoding.
|
||||
|
|
Loading…
Reference in a new issue