Add fish_emoji_width variable to control computed emoji width

This is part of an effort to improve fish's Unicode handling. This commit
attempts to grapple with the fact that, certain characters (principally
emoji) were considered to have a wcwidth of 1 in Unicode 8, but a width of
2 in Unicode 9.

The system wcwidth() here cannot be trusted; terminal emulators do not
respect it. iTerm2 even allows this to be set in preferences.

This commit introduces a new function is_width_2_in_Uni9_but_1_in_Uni8() to
detect characters of version-ambiguous width. For these characters, it
returns a width guessed based on the value of TERM_PROGRAM and
TERM_VERSION, defaulting to 1. This value can be overridden by setting the
value of a new variable fish_emoji_width (presumably either to 1 or 2).

Fixes #4539, #2652.
This commit is contained in:
ridiculousfish 2018-02-25 17:43:29 -08:00
parent 7bd4af51a1
commit 5282d3e711
4 changed files with 92 additions and 0 deletions

View file

@ -847,6 +847,8 @@ The user can change the settings of `fish` by changing the values of certain var
- A large number of variable starting with the prefixes `fish_color` and `fish_pager_color.` See <a href='#variables-color'>Variables for changing highlighting colors</a> for more information.
- `fish_emoji_width` controls the computed width of certain characters, in particular emoji, whose rendered width varies across terminal emulators. This should be set to 1 if your terminal emulator renders emoji single-width, or 2 if double-width. Set this only if you see graphical glitching when printing emoji.
- `fish_escape_delay_ms` overrides the default timeout of 300ms (default key bindings) or 10ms (vi key bindings) after seeing an escape character before giving up on matching a key binding. See the documentation for the <a href='bind.html#special-case-escape'>bind</a> builtin command. This delay facilitates using escape as a meta key.
- `fish_greeting`, the greeting message printed on startup.

View file

@ -571,6 +571,30 @@ static void init_path_vars() {
}
}
/// Update the value of g_guessed_fish_emoji_width
static void guess_emoji_width() {
wcstring term;
if (auto term_var = env_get(L"TERM_PROGRAM")) {
term = term_var->as_string();
}
double version = 0;
if (auto version_var = env_get(L"TERM_PROGRAM_VERSION")) {
std::string narrow_version = wcs2string(version_var->as_string());
version = strtod(narrow_version.c_str(), NULL);
}
// iTerm2 defaults to Unicode 8 sizes.
// See https://gitlab.com/gnachman/iterm2/wikis/unicodeversionswitching
if (term == L"Apple_Terminal" && version >= 400) {
// Apple Terminal on High Sierra
g_guessed_fish_emoji_width = 2;
} else {
g_guessed_fish_emoji_width = 1;
}
}
/// Initialize the curses subsystem.
static void init_curses() {
for (const auto &var_name : curses_variables) {
@ -798,6 +822,14 @@ static void handle_escape_delay_change(const wcstring &op, const wcstring &var_n
update_wait_on_escape_ms();
}
static void handle_change_emoji_width(const wcstring &op, const wcstring &var_name) {
int new_width = 0;
if (auto width_str = env_get(L"fish_emoji_width")) {
new_width = fish_wcstol(width_str->as_string().c_str());
}
g_fish_emoji_width = std::max(0, new_width);
}
static void handle_term_size_change(const wcstring &op, const wcstring &var_name) {
UNUSED(op);
UNUSED(var_name);
@ -847,6 +879,7 @@ static void handle_locale_change(const wcstring &op, const wcstring &var_name) {
static void handle_curses_change(const wcstring &op, const wcstring &var_name) {
UNUSED(op);
UNUSED(var_name);
guess_emoji_width();
init_curses();
}
@ -868,6 +901,7 @@ static void setup_var_dispatch_table() {
var_dispatch_table.emplace(L"fish_term256", handle_fish_term_change);
var_dispatch_table.emplace(L"fish_term24bit", handle_fish_term_change);
var_dispatch_table.emplace(L"fish_escape_delay_ms", handle_escape_delay_change);
var_dispatch_table.emplace(L"fish_emoji_width", handle_change_emoji_width);
var_dispatch_table.emplace(L"LINES", handle_term_size_change);
var_dispatch_table.emplace(L"COLUMNS", handle_term_size_change);
var_dispatch_table.emplace(L"fish_complete_path", handle_complete_path_change);
@ -925,6 +959,7 @@ void env_init(const struct config_paths_t *paths /* or NULL */) {
init_curses();
init_input();
init_path_vars();
guess_emoji_width();
// Set up the USER and PATH variables
setup_path();

View file

@ -20,6 +20,7 @@
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#include <algorithm>
#if HAVE_GETTEXT
#include <libintl.h>
#endif
@ -263,6 +264,18 @@ int killpg(int pgr, int sig) {
}
#endif
int g_fish_emoji_width = 0;
// 1 is the typical emoji width in Unicode 8.
int g_guessed_fish_emoji_width = 1;
int fish_get_emoji_width(wchar_t c) {
// Respect an explicit value. If we don't have one, use the guessed value. Do not try to fall
// back to wcwidth(), it's hopeless.
if (g_fish_emoji_width > 0) return g_fish_emoji_width;
return g_guessed_fish_emoji_width;
}
// Big hack to use our versions of wcswidth where we know them to be broken, which is
// EVERYWHERE (https://github.com/fish-shell/fish-shell/issues/2199)
#ifndef HAVE_BROKEN_WCWIDTH
@ -276,10 +289,43 @@ int fish_wcswidth(const wchar_t *str, size_t n) { return wcswidth(str, n); }
#include "wcwidth9/wcwidth9.h"
// This is the sort listed of inclusive ranges of characters whose width was 1 in Unicode 8, but was
// changed to width 2 in Unicode 9. Note that no characters became narrower from Unicode 8 to 9.
static bool is_width_2_in_Uni9_but_1_in_Uni8(wchar_t c) {
const struct pair_t {
int lo;
int hi;
} pairs[] = {{0x0231A, 0x0231B}, {0x023E9, 0x023EC}, {0x023F0, 0x023F0}, {0x023F3, 0x023F3},
{0x025FD, 0x025FE}, {0x02614, 0x02615}, {0x02648, 0x02653}, {0x0267F, 0x0267F},
{0x02693, 0x02693}, {0x026A1, 0x026A1}, {0x026AA, 0x026AB}, {0x026BD, 0x026BE},
{0x026C4, 0x026C5}, {0x026CE, 0x026CE}, {0x026D4, 0x026D4}, {0x026EA, 0x026EA},
{0x026F2, 0x026F3}, {0x026F5, 0x026F5}, {0x026FA, 0x026FA}, {0x026FD, 0x026FD},
{0x02705, 0x02705}, {0x0270A, 0x0270B}, {0x02728, 0x02728}, {0x0274C, 0x0274C},
{0x0274E, 0x0274E}, {0x02753, 0x02755}, {0x02757, 0x02757}, {0x02795, 0x02797},
{0x027B0, 0x027B0}, {0x027BF, 0x027BF}, {0x02B1B, 0x02B1C}, {0x02B50, 0x02B50},
{0x02B55, 0x02B55}, {0x16FE0, 0x16FE0}, {0x17000, 0x187EC}, {0x18800, 0x18AF2},
{0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
{0x1F23B, 0x1F23B}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C},
{0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0},
{0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC},
{0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A},
{0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5},
{0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6F6},
{0x1F910, 0x1F91E}, {0x1F920, 0x1F927}, {0x1F930, 0x1F930}, {0x1F933, 0x1F93E},
{0x1F940, 0x1F94B}, {0x1F950, 0x1F95E}, {0x1F980, 0x1F991}, {0x1F9C0, 0x1F9C0}};
auto where = std::lower_bound(std::begin(pairs), std::end(pairs), c,
[](pair_t p, wchar_t c) { return p.hi < c; });
assert((where == std::end(pairs) || where->hi >= c) && "unexpected binary search result");
return where != std::end(pairs) && where->lo <= c;
}
// Possible negative return values from wcwidth9()
enum { width_non_printable = -1, width_ambiguous = -2, width_private_use = -3 };
int fish_wcwidth(wchar_t wc) {
// Check for certain characters whose width is terminal emulator dependent.
if (is_width_2_in_Uni9_but_1_in_Uni8(wc)) return fish_get_emoji_width(wc);
int w9_width = wcwidth9(wc);
if (w9_width >= 0) return w9_width;

View file

@ -16,6 +16,15 @@
// substitution if wchar.h is included after this header.
#include <wchar.h> // IWYU pragma: keep
/// The column width of emoji characters. This must be configurable because the value changed
/// between Unicode 8 and Unicode 9, wcwidth() is emoji-ignorant, and terminal emulators do
/// different things. See issues like #4539 and https://github.com/neovim/neovim/issues/4976 for how
/// painful this is. A value of 0 means to use the guessed value.
extern int g_fish_emoji_width;
/// The guessed value of the emoji width based on TERM.
extern int g_guessed_fish_emoji_width;
/// fish's internal versions of wcwidth and wcswidth, which can use an internal implementation if
/// the system one is busted.
int fish_wcwidth(wchar_t wc);