simplify, and fix, setting the current locale

Fix test setup bogosities. Specifically, they weren't hermetic with respect to
locale env vars.

Rewrite the handling of locale vars to simplify the code and make it more like
the pattern most programs employ.

Fixes #3110
This commit is contained in:
Kurtis Rader 2016-06-03 19:05:13 -07:00
parent 32a585a52b
commit 0b385f145c
8 changed files with 77 additions and 87 deletions

View file

@ -197,24 +197,6 @@ static int octal_to_bin(wchar_t c) {
}
}
double C_STRTOD(wchar_t const *nptr, wchar_t **endptr) {
double r;
const wcstring saved_locale = wsetlocale(LC_NUMERIC, NULL);
if (!saved_locale.empty()) {
wsetlocale(LC_NUMERIC, L"C");
}
r = wcstod(nptr, endptr);
if (!saved_locale.empty()) {
wsetlocale(LC_NUMERIC, saved_locale.c_str());
}
return r;
}
void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) {
// Don't error twice.
if (early_exit) return;
@ -283,7 +265,12 @@ uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
template <>
long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) {
return C_STRTOD(s, end);
// Forcing the locale to C is questionable but it's what the old C_STRTOD() that I inlined here
// as part of changing how locale management is done by fish.
char * old_locale = setlocale(LC_NUMERIC, "C");
double val = wcstod(s, end);
setlocale(LC_NUMERIC, old_locale);
return val;
}
template <typename T>

View file

@ -6,7 +6,6 @@
#include <dlfcn.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <math.h>
#include <signal.h>
#include <stdarg.h>
@ -470,19 +469,12 @@ wchar_t *quote_end(const wchar_t *pos) {
return 0;
}
wcstring wsetlocale(int category, const wchar_t *locale) {
char *lang = locale ? wcs2str(locale) : NULL;
char *res = setlocale(category, lang);
free(lang);
void fish_setlocale() {
// Use ellipsis if on known unicode system, otherwise use $.
ellipsis_char = (wcwidth(L'\x2026') > 0) ? L'\x2026' : L'$';
// U+23CE is the "return" character
omitted_newline_char = (wcwidth(L'\x23CE') > 0) ? L'\x23CE' : L'~';
if (!res) return wcstring();
return format_string(L"%s", res);
}
bool contains_internal(const wchar_t *a, int vararg_handle, ...) {

View file

@ -627,10 +627,9 @@ wchar_t *quote_end(const wchar_t *in);
/// interactive command executes, to allow new messages to be printed.
void error_reset();
/// This function behaves exactly like a wide character equivalent of the C function setlocale,
/// except that it will also try to detect if the user is using a Unicode character set, and if so,
/// use the unicode ellipsis character as ellipsis, instead of '$'.
wcstring wsetlocale(int category, const wchar_t *locale);
/// This function should be called after calling `setlocale()` to perform fish specific locale
/// initialization.
void fish_setlocale();
/// Checks if \c needle is included in the list of strings specified. A warning is printed if needle
/// is zero.

View file

@ -9,6 +9,7 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
@ -168,37 +169,33 @@ static bool var_is_locale(const wcstring &key) {
}
/// Properly sets all locale information.
static void handle_locale() {
const env_var_t lc_all = env_get_string(L"LC_ALL");
const wcstring old_locale = wsetlocale(LC_MESSAGES, NULL);
static void handle_locale(const wchar_t *env_var_name) {
debug(2, L"handle_locale() called in response to '%ls' changing", env_var_name);
const char *old_msg_locale = setlocale(LC_MESSAGES, NULL);
// Array of locale constants corresponding to the local variable names defined in
// locale_variable.
static const int cat[] = {0, LC_ALL, LC_COLLATE, LC_CTYPE,
LC_MESSAGES, LC_MONETARY, LC_NUMERIC, LC_TIME};
if (!lc_all.missing()) {
wsetlocale(LC_ALL, lc_all.c_str());
} else {
const env_var_t lang = env_get_string(L"LANG");
if (!lang.missing()) {
wsetlocale(LC_ALL, lang.c_str());
}
for (int i = 2; locale_variable[i]; i++) {
const env_var_t val = env_get_string(locale_variable[i]);
if (!val.missing()) {
wsetlocale(cat[i], val.c_str());
}
for (size_t i = 0; locale_variable[i]; i++) {
const wchar_t *key = locale_variable[i];
const env_var_t var = env_get_string(key);
if (!var.empty()) {
const std::string &name = wcs2string(key);
const std::string &value = wcs2string(var);
setenv(name.c_str(), value.c_str(), 1);
debug(3, L"locale var %s='%s'", name.c_str(), value.c_str());
}
}
const wcstring new_locale = wsetlocale(LC_MESSAGES, NULL);
if (old_locale != new_locale) {
char *locale = setlocale(LC_ALL, "");
fish_setlocale();
debug(2, L"handle_locale() setlocale(): '%s'", locale);
const char *new_msg_locale = setlocale(LC_MESSAGES, NULL);
debug(3, L"old LC_MESSAGES locale: '%s'", old_msg_locale);
debug(3, L"new LC_MESSAGES locale: '%s'", new_msg_locale);
if (strcmp(old_msg_locale, new_msg_locale)) {
// Try to make change known to gettext. Both changing _nl_msg_cat_cntr and calling dcgettext
// might potentially tell some gettext implementation that the translation strings should be
// reloaded. We do both and hope for the best.
debug(2, L"changing message locale from '%s' to '%s'", old_msg_locale, new_msg_locale);
extern int _nl_msg_cat_cntr;
_nl_msg_cat_cntr++;
fish_dcgettext("fish", "Changing language to English", LC_MESSAGES);
@ -217,7 +214,8 @@ static bool var_is_curses(const wcstring &key) {
/// Push all curses/terminfo env vars into the global environment where they can be found by those
/// libraries.
static void handle_curses() {
static void handle_curses(const wchar_t *env_var_name) {
debug(2, L"handle_curses() called in response to '%ls' changing", env_var_name);
for (size_t i = 0; curses_variable[i]; i++) {
const wchar_t *key = curses_variable[i];
const env_var_t var = env_get_string(key);
@ -225,6 +223,7 @@ static void handle_curses() {
const std::string &name = wcs2string(key);
const std::string &value = wcs2string(var);
setenv(name.c_str(), value.c_str(), 1);
debug(3, L"curses var %s='%s'", name.c_str(), value.c_str());
}
}
// TODO: Modify input_init() to allow calling it when the terminfo env vars are dynamically
@ -236,9 +235,9 @@ static void handle_curses() {
/// React to modifying the given variable.
static void react_to_variable_change(const wcstring &key) {
if (var_is_locale(key)) {
handle_locale();
handle_locale(key.c_str());
} else if (var_is_curses(key)) {
handle_curses();
handle_curses(key.c_str());
} else if (key == L"fish_term256" || key == L"fish_term24bit") {
update_fish_color_support();
reader_react_to_color_change();
@ -852,14 +851,13 @@ void env_push(bool new_scope) {
void env_pop() {
if (&top->env != global) {
int i;
int locale_changed = 0;
const wchar_t *locale_changed = NULL;
env_node_t *killme = top;
for (i = 0; locale_variable[i]; i++) {
var_table_t::iterator result = killme->env.find(locale_variable[i]);
if (result != killme->env.end()) {
locale_changed = 1;
locale_changed = locale_variable[i];
break;
}
}
@ -881,7 +879,7 @@ void env_pop() {
delete killme;
if (locale_changed) handle_locale();
if (locale_changed) handle_locale(locale_changed);
} else {
debug(0, _(L"Tried to pop empty environment stack."));

View file

@ -418,11 +418,12 @@ int main(int argc, char **argv) {
assert(ANY_SENTINAL >= WILDCARD_RESERVED_BASE && ANY_SENTINAL <= WILDCARD_RESERVED_END);
assert(R_SENTINAL >= INPUT_COMMON_BASE && R_SENTINAL <= INPUT_COMMON_END);
program_name = L"fish";
set_main_thread();
setup_fork_guards();
wsetlocale(LC_ALL, L"");
program_name = L"fish";
setlocale(LC_ALL, "");
fish_setlocale();
// struct stat tmp;
// stat("----------FISH_HIT_MAIN----------", &tmp);

View file

@ -328,12 +328,16 @@ static std::string html_colorize(const wcstring &text,
static std::string no_colorize(const wcstring &text) { return wcs2string(text); }
int main(int argc, char *argv[]) {
program_name = L"fish_indent";
set_main_thread();
setup_fork_guards();
wsetlocale(LC_ALL, L"");
program_name = L"fish_indent";
// Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's
// because the fish project assumes Unicode UTF-8 encoding in all of its scripts.
//
// TODO: Auto-detect the encoding of the script. We should look for a vim style comment
// (e.g., "# vim: set fileencoding=<encoding-name>:") or an emacs style comment
// (e.g., "# -*- coding: <encoding-name> -*-").
setlocale(LC_ALL, "");
env_init();
input_init();

View file

@ -148,8 +148,7 @@ void setup_and_process_keys(bool continuous_mode) {
is_interactive_session = 1; // by definition this is interactive
set_main_thread();
setup_fork_guards();
wsetlocale(LC_ALL, L"POSIX");
program_name = L"fish_key_reader";
setlocale(LC_ALL, "POSIX");
env_init();
reader_init();
input_init();
@ -178,6 +177,7 @@ void setup_and_process_keys(bool continuous_mode) {
}
int main(int argc, char **argv) {
program_name = L"fish_key_reader";
bool continuous_mode = false;
const char *short_opts = "+c";
const struct option long_opts[] = {{"continuous", no_argument, NULL, 'd'}, {NULL, 0, NULL, 0}};

View file

@ -1,4 +1,4 @@
# vim: set ts=4 sw=4 et:
# vim: set ts=4 sw=4 tw=100 et:
# Utilities for the test runners
if test "$argv[1]" = (status -f)
@ -19,14 +19,12 @@ function die
exit 1
end
# Check if we're running in the test environment.
# If not, set it up and rerun fish with exec.
# The test is whether the special var __fish_is_running_tests
# exists and contains the same value as XDG_CONFIG_HOME. It checks
# the value and not just the presence because we're going to delete
# the config directory later if we're exiting successfully.
# Check if we're running in the test environment. If not, set it up and rerun fish with exec. The
# test is whether the special var __fish_is_running_tests exists and contains the same value as
# XDG_CONFIG_HOME. It checks the value and not just the presence because we're going to delete the
# config directory later if we're exiting successfully.
if not set -q __fish_is_running_tests
# set up our test environment and re-run the original script
# Set up our test environment and re-run the original script.
set -l script $argv[1]
switch $script
case '/*'
@ -35,8 +33,11 @@ if not set -q __fish_is_running_tests
# path is relative, make it absolute
set script $PWD/$script
end
begin
set -l IFS # clear IFS so cmd substitution doesn't split
cd (dirname $script); or die
end
set -lx XDG_DATA_HOME ../test/data
rm -rf $XDG_DATA_HOME/fish
@ -52,13 +53,21 @@ if not set -q __fish_is_running_tests
printf 'set fish_function_path \'%s/functions\' \'%s/share/functions\'\n' $escaped_config $escaped_parent > $XDG_CONFIG_HOME/fish/config.fish; or die
set -xl __fish_is_running_tests $XDG_CONFIG_HOME
# set locale information to be consistent
set -lx LANG C
set -lx LC_ALL ''
for var in ALL COLLATE MESSAGES MONETARY NUMERIC TIME
set -lx LC_$var ''
# Set locale information for consistent tests. Fish should work with a lot of locales but the
# tests assume an english UTF-8 locale unless they explicitly override this default. We do not
# want the users locale to affect the tests since they might, for example, change the wording of
# logged messages.
#
# TODO: set LANG to en_US.UTF-8 so we test the locale message conversions (i.e., gettext).
set -e LANGUAGE
set -x LANG C
# Remove "LC_" env vars from the test environment.
for var in (set -xn)
string match -q 'LC_*' $var
and set -e $var
end
set -lx LC_CTYPE en_US.UTF-8
set -x LC_CTYPE en_US.UTF-8
exec ../test/root/bin/fish $script $args_for_test_script
die 'exec failed'
else if test "$__fish_is_running_tests" != "$XDG_CONFIG_HOME"