From 44249098010c154e7dc24589a84db529687758fe Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Fri, 3 Jun 2016 19:05:13 -0700 Subject: [PATCH] simplify, and fix, setting the current locale Fix test setup bogosities. Specifically, they weren't hermetic with respect to locale env vars. Rewrite the handling of locale vars to simplify the code and make it more like the pattern most programs employ. Fixes #3110 --- src/builtin_printf.cpp | 40 ++------ src/common.cpp | 37 ++++---- src/common.h | 10 +- src/env.cpp | 201 +++++++++++++++++++--------------------- src/fish.cpp | 5 +- src/fish_indent.cpp | 12 ++- src/fish_key_reader.cpp | 4 +- tests/test_util.fish | 41 ++++---- 8 files changed, 163 insertions(+), 187 deletions(-) diff --git a/src/builtin_printf.cpp b/src/builtin_printf.cpp index d5fa1d638..cfefba89a 100644 --- a/src/builtin_printf.cpp +++ b/src/builtin_printf.cpp @@ -172,35 +172,9 @@ static int octal_to_bin(wchar_t c) } } -/* This message appears in N_() here rather than just in _() below because - the sole use would have been in a #define. */ -static wchar_t const *const cfcc_msg = - N_(L"warning: %ls: character(s) following character constant have been ignored"); - -double C_STRTOD(wchar_t const *nptr, wchar_t **endptr) -{ - double r; - - const wcstring saved_locale = wsetlocale(LC_NUMERIC, NULL); - - if (!saved_locale.empty()) - { - wsetlocale(LC_NUMERIC, L"C"); - } - - r = wcstod(nptr, endptr); - - if (!saved_locale.empty()) - { - wsetlocale(LC_NUMERIC, saved_locale.c_str()); - } - - return r; -} - void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) { - // Don't error twice + // Don't error twice. if (early_exit) return; @@ -279,10 +253,14 @@ uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end) return wcstoull(s, end, 0); } -template<> -long double raw_string_to_scalar_type(const wchar_t *s, wchar_t ** end) -{ - return C_STRTOD(s, end); +template <> +long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) { + // Forcing the locale to C is questionable but it's what the old C_STRTOD() that I inlined here + // as part of changing how locale management is done by fish. + char * old_locale = setlocale(LC_NUMERIC, "C"); + double val = wcstod(s, end); + setlocale(LC_NUMERIC, old_locale); + return val; } template diff --git a/src/common.cpp b/src/common.cpp index b341f226f..49405f1df 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -6,7 +6,24 @@ parts of fish. #include "config.h" - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #ifdef HAVE_SIGINFO_H @@ -533,26 +550,12 @@ wchar_t *quote_end(const wchar_t *pos) } - -wcstring wsetlocale(int category, const wchar_t *locale) -{ - - char *lang = locale ? wcs2str(locale) : NULL; - char *res = setlocale(category, lang); - free(lang); - - /* - Use ellipsis if on known unicode system, otherwise use $ - */ +void fish_setlocale() { + // Use ellipsis if on known unicode system, otherwise use $. ellipsis_char = (wcwidth(L'\x2026') > 0) ? L'\x2026' : L'$'; // U+23CE is the "return" character omitted_newline_char = (wcwidth(L'\x23CE') > 0) ? L'\x23CE' : L'~'; - - if (!res) - return wcstring(); - else - return format_string(L"%s", res); } bool contains_internal(const wchar_t *a, int vararg_handle, ...) diff --git a/src/common.h b/src/common.h index 18b71cb64..7690f40d6 100644 --- a/src/common.h +++ b/src/common.h @@ -763,13 +763,9 @@ wchar_t *quote_end(const wchar_t *in); */ void error_reset(); -/** - This function behaves exactly like a wide character equivalent of - the C function setlocale, except that it will also try to detect if - the user is using a Unicode character set, and if so, use the - unicode ellipsis character as ellipsis, instead of '$'. -*/ -wcstring wsetlocale(int category, const wchar_t *locale); +/// This function should be called after calling `setlocale()` to perform fish specific locale +/// initialization. +void fish_setlocale(); /** Checks if \c needle is included in the list of strings specified. A warning is printed if needle is zero. diff --git a/src/env.cpp b/src/env.cpp index 56c9deb07..b6a402acd 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -3,39 +3,39 @@ */ #include "config.h" // IWYU pragma: keep -#include -#include -#include -#include +#include #include -#include +#include +#include +#include #include #include #include -#include -#include -#include #include -#include +#include +#include +#include +#include +#include #include #include +#include +#include -#include "fallback.h" - -#include "wutil.h" -#include "proc.h" #include "common.h" #include "env.h" -#include "sanity.h" -#include "expand.h" -#include "history.h" -#include "reader.h" #include "env_universal_common.h" -#include "input.h" #include "event.h" -#include "path.h" - +#include "expand.h" +#include "fallback.h" #include "fish_version.h" +#include "history.h" +#include "input.h" +#include "path.h" +#include "proc.h" +#include "reader.h" +#include "sanity.h" +#include "wutil.h" /** Value denoting a null string */ #define ENV_NULL L"\x1d" @@ -168,22 +168,15 @@ static void mark_changed_exported() has_changed_exported = true; } -/** - List of all locale variable names -*/ -static const wchar_t * const locale_variable[] = -{ - L"LANG", - L"LC_ALL", - L"LC_COLLATE", - L"LC_CTYPE", - L"LC_MESSAGES", - L"LC_MONETARY", - L"LC_NUMERIC", - L"LC_TIME", - NULL -}; +/// List of all locale environment variable names. +static const wchar_t *const locale_variable[] = { + L"LANG", L"LANGUAGE", L"LC_ALL", L"LC_ADDRESS", L"LC_COLLATE", + L"LC_CTYPE", L"LC_IDENTIFICATION", L"LC_MEASUREMENT", L"LC_MESSAGES", L"LC_MONETARY", + L"LC_NAME", L"LC_NUMERIC", L"LC_PAPER", L"LC_TELEPHONE", L"LC_TIME", + NULL}; +/// List of all curses environment variable names. +static const wchar_t *const curses_variable[] = {L"TERM", L"TERMINFO", L"TERMINFO_DIRS", NULL}; const var_entry_t *env_node_t::find_entry(const wcstring &key) { @@ -231,65 +224,34 @@ static bool var_is_locale(const wcstring &key) return false; } -/** - Properly sets all locale information -*/ -static void handle_locale() -{ - const env_var_t lc_all = env_get_string(L"LC_ALL"); - const wcstring old_locale = wsetlocale(LC_MESSAGES, NULL); +/// Properly sets all locale information. +static void handle_locale(const wchar_t *env_var_name) { + debug(2, L"handle_locale() called in response to '%ls' changing", env_var_name); + const char *old_msg_locale = setlocale(LC_MESSAGES, NULL); - /* - Array of locale constants corresponding to the local variable names defined in locale_variable - */ - static const int cat[] = - { - 0, - LC_ALL, - LC_COLLATE, - LC_CTYPE, - LC_MESSAGES, - LC_MONETARY, - LC_NUMERIC, - LC_TIME - } - ; - - if (!lc_all.missing()) - { - wsetlocale(LC_ALL, lc_all.c_str()); - } - else - { - const env_var_t lang = env_get_string(L"LANG"); - if (!lang.missing()) - { - wsetlocale(LC_ALL, lang.c_str()); - } - - for (int i=2; locale_variable[i]; i++) - { - const env_var_t val = env_get_string(locale_variable[i]); - - if (!val.missing()) - { - wsetlocale(cat[i], val.c_str()); - } + for (size_t i = 0; locale_variable[i]; i++) { + const wchar_t *key = locale_variable[i]; + const env_var_t var = env_get_string(key); + if (!var.empty()) { + const std::string &name = wcs2string(key); + const std::string &value = wcs2string(var); + setenv(name.c_str(), value.c_str(), 1); + debug(3, L"locale var %s='%s'", name.c_str(), value.c_str()); } } - const wcstring new_locale = wsetlocale(LC_MESSAGES, NULL); - if (old_locale != new_locale) - { - - /* - Try to make change known to gettext. Both changing - _nl_msg_cat_cntr and calling dcgettext might potentially - tell some gettext implementation that the translation - strings should be reloaded. We do both and hope for the - best. - */ + char *locale = setlocale(LC_ALL, ""); + fish_setlocale(); + debug(2, L"handle_locale() setlocale(): '%s'", locale); + const char *new_msg_locale = setlocale(LC_MESSAGES, NULL); + debug(3, L"old LC_MESSAGES locale: '%s'", old_msg_locale); + debug(3, L"new LC_MESSAGES locale: '%s'", new_msg_locale); + if (strcmp(old_msg_locale, new_msg_locale)) { + // Try to make change known to gettext. Both changing _nl_msg_cat_cntr and calling dcgettext + // might potentially tell some gettext implementation that the translation strings should be + // reloaded. We do both and hope for the best. + debug(2, L"changing message locale from '%s' to '%s'", old_msg_locale, new_msg_locale); extern int _nl_msg_cat_cntr; _nl_msg_cat_cntr++; @@ -297,16 +259,43 @@ static void handle_locale() } } - -/** React to modifying the given variable */ -static void react_to_variable_change(const wcstring &key) -{ - if (var_is_locale(key)) - { - handle_locale(); +/// Check if the specified variable is a locale variable. +static bool var_is_curses(const wcstring &key) { + for (size_t i = 0; curses_variable[i]; i++) { + if (key == curses_variable[i]) { + return true; + } } - else if (key == L"fish_term256" || key == L"fish_term24bit") - { + return false; +} + +/// Push all curses/terminfo env vars into the global environment where they can be found by those +/// libraries. +static void handle_curses(const wchar_t *env_var_name) { + debug(2, L"handle_curses() called in response to '%ls' changing", env_var_name); + for (size_t i = 0; curses_variable[i]; i++) { + const wchar_t *key = curses_variable[i]; + const env_var_t var = env_get_string(key); + if (!var.empty()) { + const std::string &name = wcs2string(key); + const std::string &value = wcs2string(var); + setenv(name.c_str(), value.c_str(), 1); + debug(3, L"curses var %s='%s'", name.c_str(), value.c_str()); + } + } + // TODO: Modify input_init() to allow calling it when the terminfo env vars are dynamically + // changed. At the present time it can be called just once. Also, we should really only do this + // if the TERM var is set. + // input_init(); +} + +/// React to modifying the given variable. +static void react_to_variable_change(const wcstring &key) { + if (var_is_locale(key)) { + handle_locale(key.c_str()); + } else if (var_is_curses(key)) { + handle_curses(key.c_str()); + } else if (key == L"fish_term256" || key == L"fish_term24bit") { update_fish_color_support(); reader_react_to_color_change(); } @@ -1119,16 +1108,13 @@ void env_pop() if (&top->env != global) { int i; - int locale_changed = 0; - + const wchar_t *locale_changed = NULL; env_node_t *killme = top; - for (i=0; locale_variable[i]; i++) - { - var_table_t::iterator result = killme->env.find(locale_variable[i]); - if (result != killme->env.end()) - { - locale_changed = 1; + for (i = 0; locale_variable[i]; i++) { + var_table_t::iterator result = killme->env.find(locale_variable[i]); + if (result != killme->env.end()) { + locale_changed = locale_variable[i]; break; } } @@ -1154,8 +1140,7 @@ void env_pop() delete killme; - if (locale_changed) - handle_locale(); + if (locale_changed) handle_locale(locale_changed); } else diff --git a/src/fish.cpp b/src/fish.cpp index 39c9d5eb2..9b73342e2 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -511,11 +511,12 @@ int main(int argc, char **argv) assert(R_SENTINAL >= INPUT_COMMON_BASE && R_SENTINAL <= INPUT_COMMON_END); + program_name = L"fish"; set_main_thread(); setup_fork_guards(); - wsetlocale(LC_ALL, L""); - program_name=L"fish"; + setlocale(LC_ALL, ""); + fish_setlocale(); //struct stat tmp; //stat("----------FISH_HIT_MAIN----------", &tmp); diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 0e31f28e7..a965c4e14 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -319,12 +319,16 @@ static std::string no_colorize(const wcstring &text) int main(int argc, char *argv[]) { + program_name = L"fish_indent"; set_main_thread(); setup_fork_guards(); - - wsetlocale(LC_ALL, L""); - program_name=L"fish_indent"; - + // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's + // because the fish project assumes Unicode UTF-8 encoding in all of its scripts. + // + // TODO: Auto-detect the encoding of the script. We should look for a vim style comment + // (e.g., "# vim: set fileencoding=:") or an emacs style comment + // (e.g., "# -*- coding: -*-"). + setlocale(LC_ALL, ""); env_init(); input_init(); diff --git a/src/fish_key_reader.cpp b/src/fish_key_reader.cpp index 5a33feeec..c0e98832c 100644 --- a/src/fish_key_reader.cpp +++ b/src/fish_key_reader.cpp @@ -146,8 +146,7 @@ void setup_and_process_keys(bool continuous_mode) { is_interactive_session = 1; // by definition this is interactive set_main_thread(); setup_fork_guards(); - wsetlocale(LC_ALL, L"POSIX"); - program_name = L"fish_key_reader"; + setlocale(LC_ALL, "POSIX"); env_init(); reader_init(); input_init(); @@ -176,6 +175,7 @@ void setup_and_process_keys(bool continuous_mode) { } int main(int argc, char **argv) { + program_name = L"fish_key_reader"; bool continuous_mode = false; const char *short_opts = "+c"; const struct option long_opts[] = {{"continuous", no_argument, NULL, 'd'}, {NULL, 0, NULL, 0}}; diff --git a/tests/test_util.fish b/tests/test_util.fish index 7e8f1fd68..22c478524 100644 --- a/tests/test_util.fish +++ b/tests/test_util.fish @@ -1,4 +1,4 @@ -# vim: set ts=4 sw=4 et: +# vim: set ts=4 sw=4 tw=100 et: # Utilities for the test runners if test "$argv[1]" = (status -f) @@ -19,14 +19,12 @@ function die exit 1 end -# Check if we're running in the test environment. -# If not, set it up and rerun fish with exec. -# The test is whether the special var __fish_is_running_tests -# exists and contains the same value as XDG_CONFIG_HOME. It checks -# the value and not just the presence because we're going to delete -# the config directory later if we're exiting successfully. +# Check if we're running in the test environment. If not, set it up and rerun fish with exec. The +# test is whether the special var __fish_is_running_tests exists and contains the same value as +# XDG_CONFIG_HOME. It checks the value and not just the presence because we're going to delete the +# config directory later if we're exiting successfully. if not set -q __fish_is_running_tests - # set up our test environment and re-run the original script + # Set up our test environment and re-run the original script. set -l script $argv[1] switch $script case '/*' @@ -35,8 +33,11 @@ if not set -q __fish_is_running_tests # path is relative, make it absolute set script $PWD/$script end - set -l IFS # clear IFS so cmd substitution doesn't split - cd (dirname $script); or die + + begin + set -l IFS # clear IFS so cmd substitution doesn't split + cd (dirname $script); or die + end set -lx XDG_DATA_HOME ../test/data rm -rf $XDG_DATA_HOME/fish @@ -52,13 +53,21 @@ if not set -q __fish_is_running_tests printf 'set fish_function_path \'%s/functions\' \'%s/share/functions\'\n' $escaped_config $escaped_parent > $XDG_CONFIG_HOME/fish/config.fish; or die set -xl __fish_is_running_tests $XDG_CONFIG_HOME - # set locale information to be consistent - set -lx LANG C - set -lx LC_ALL '' - for var in ALL COLLATE MESSAGES MONETARY NUMERIC TIME - set -lx LC_$var '' + # Set locale information for consistent tests. Fish should work with a lot of locales but the + # tests assume an english UTF-8 locale unless they explicitly override this default. We do not + # want the users locale to affect the tests since they might, for example, change the wording of + # logged messages. + # + # TODO: set LANG to en_US.UTF-8 so we test the locale message conversions (i.e., gettext). + set -e LANGUAGE + set -x LANG C + # Remove "LC_" env vars from the test environment. + for var in (set -xn) + string match -q 'LC_*' $var + and set -e $var end - set -lx LC_CTYPE en_US.UTF-8 + set -x LC_CTYPE en_US.UTF-8 + exec ../test/root/bin/fish $script $args_for_test_script die 'exec failed' else if test "$__fish_is_running_tests" != "$XDG_CONFIG_HOME"