f-k-r should use the user's locale

I did some research and experiments. For good or bad the `bind` command
requires the use of wide char codepoints (e.g., \u1234) for non-ASCII
chars.  So don't force the use of the POSIX locale, but do provide it as
an option for people who want to see the individual bytes rather than a
decoded wide char.

Simplify the format of the information displayed for each character. There
really isn't much point in providing decimal, octal, and hexadecimal. Just
print hex and symbolic representations.

Add an example `bind` command that a user can copy/paste.

Closes #3183
This commit is contained in:
Kurtis Rader 2016-06-29 19:46:11 -07:00
parent 262452d0b1
commit 68e167d576
3 changed files with 121 additions and 59 deletions

View file

@ -8,7 +8,6 @@
// Type "exit" or "quit" to terminate the program. // Type "exit" or "quit" to terminate the program.
#include "config.h" // IWYU pragma: keep #include "config.h" // IWYU pragma: keep
#include <string>
#include <errno.h> #include <errno.h>
#include <getopt.h> #include <getopt.h>
#include <signal.h> #include <signal.h>
@ -38,7 +37,8 @@ static const char *ctrl_symbolic_names[] = {NULL, NULL, NULL, NULL, NULL, N
static bool keep_running = true; static bool keep_running = true;
/// Return true if the recent sequence of characters indicates the user wants to exit the program. /// Return true if the recent sequence of characters indicates the user wants to exit the program.
static bool should_exit(unsigned char c) { static bool should_exit(wchar_t wc) {
unsigned char c = wc < 0x80 ? wc : 0;
static unsigned char recent_chars[4] = {0}; static unsigned char recent_chars[4] = {0};
recent_chars[0] = recent_chars[1]; recent_chars[0] = recent_chars[1];
@ -51,7 +51,8 @@ static bool should_exit(unsigned char c) {
} }
/// Return the key name if the recent sequence of characters matches a known terminfo sequence. /// Return the key name if the recent sequence of characters matches a known terminfo sequence.
static char *const key_name(unsigned char c) { static char *const key_name(wchar_t wc) {
unsigned char c = wc < 0x80 ? wc : 0;
static char recent_chars[8] = {0}; static char recent_chars[8] = {0};
recent_chars[0] = recent_chars[1]; recent_chars[0] = recent_chars[1];
@ -75,34 +76,96 @@ static char *const key_name(unsigned char c) {
return NULL; return NULL;
} }
static void output_info_about_char(unsigned char c) { /// Return true if the character must be escaped in the sequence of chars to be bound in `bind`
printf("dec: %3u oct: %03o hex: %02X char: ", c, c, c); /// command.
if (c < 32) { static bool must_escape(wchar_t wc) {
// Control characters. switch (wc) {
printf("\\c%c", c + 64); case '[':
if (ctrl_symbolic_names[c]) printf(" (or %s)", ctrl_symbolic_names[c]); case ']':
} else if (c == 32) { case '(':
// The "space" character. case ')':
printf("\\%03o (aka \"space\")", c); case '<':
} else if (c == 0x7F) { case '>':
// The "del" character. case '{':
printf("\\%03o (aka \"del\")", c); case '}':
} else if (c >= 128) { case '*':
// Non-ASCII characters (i.e., those with bit 7 set). case '\\':
printf("\\%03o (aka non-ASCII)", c); case '?':
} else { case '$':
// ASCII characters that are not control characters. case '#':
printf("%c", c); case ';':
case '&':
case '|':
case '\'':
case '"':
return true;
default:
return false;
} }
putchar('\n');
} }
static void output_matching_key_name(unsigned char c) { static char *char_to_symbol(wchar_t wc, bool bind_friendly) {
char *name = key_name(c); static char buf[128];
if (name) {
printf("Sequence matches bind key name \"%s\"\n", name); if (wc < ' ') {
free(name); // ASCII control character.
if (ctrl_symbolic_names[wc]) {
if (bind_friendly) {
snprintf(buf, sizeof(buf), "%s", ctrl_symbolic_names[wc]);
} else {
snprintf(buf, sizeof(buf), "\\c%c (or %s)", wc + 64, ctrl_symbolic_names[wc]);
}
} else {
snprintf(buf, sizeof(buf), "\\c%c", wc + 64);
}
} else if (wc == ' ') {
// The "space" character.
snprintf(buf, sizeof(buf), "\\x%X (aka \"space\")", wc);
} else if (wc == 0x7F) {
// The "del" character.
snprintf(buf, sizeof(buf), "\\x%X (aka \"del\")", wc);
} else if (wc < 0x80) {
// ASCII characters that are not control characters.
if (bind_friendly && must_escape(wc)) {
snprintf(buf, sizeof(buf), "\\%c", wc);
} else {
snprintf(buf, sizeof(buf), "%c", wc);
}
} else if (wc <= 0xFFFF) {
snprintf(buf, sizeof(buf), "\\u%04X", wc);
} else {
snprintf(buf, sizeof(buf), "\\U%06X", wc);
} }
return buf;
}
static void add_char_to_bind_command(wchar_t wc, std::vector<wchar_t> &bind_chars) {
bind_chars.push_back(wc);
}
static void output_bind_command(std::vector<wchar_t> &bind_chars) {
if (bind_chars.size()) {
fputs("bind ", stdout);
for (int i = 0; i < bind_chars.size(); i++) {
fputs(char_to_symbol(bind_chars[i], true), stdout);
}
fputs(" 'do something'\n", stdout);
bind_chars.clear();
}
}
static void output_info_about_char(wchar_t wc) {
printf("hex: %4X char: %s\n", wc, char_to_symbol(wc, false));
}
static bool output_matching_key_name(wchar_t wc) {
char *name = key_name(wc);
if (name) {
printf("bind -k %s 'do something'\n", name);
free(name);
return true;
}
return false;
} }
static double output_elapsed_time(double prev_tstamp, bool first_char_seen) { static double output_elapsed_time(double prev_tstamp, bool first_char_seen) {
@ -123,24 +186,28 @@ static double output_elapsed_time(double prev_tstamp, bool first_char_seen) {
static void process_input(bool continuous_mode) { static void process_input(bool continuous_mode) {
bool first_char_seen = false; bool first_char_seen = false;
double prev_tstamp = 0.0; double prev_tstamp = 0.0;
std::vector<wchar_t> bind_chars;
printf("Press a key\n\n"); printf("Press a key\n\n");
while (keep_running) { while (keep_running) {
wchar_t wc = input_common_readch(first_char_seen && !continuous_mode); wchar_t wc = input_common_readch(true);
if (wc == WEOF) { if (wc == WEOF) {
return; output_bind_command(bind_chars);
} else if (wc > 255) { if (first_char_seen && !continuous_mode) {
printf("\nUnexpected wide character from input_common_readch(): %lld / 0x%llx\n", return;
(long long)wc, (long long)wc); } else {
return; continue;
}
} }
unsigned char c = wc;
prev_tstamp = output_elapsed_time(prev_tstamp, first_char_seen); prev_tstamp = output_elapsed_time(prev_tstamp, first_char_seen);
output_info_about_char(c); add_char_to_bind_command(wc, bind_chars);
output_matching_key_name(c); output_info_about_char(wc);
if (output_matching_key_name(wc)) {
output_bind_command(bind_chars);
}
if (should_exit(c)) { if (should_exit(wc)) {
printf("\nExiting at your request.\n"); printf("\nExiting at your request.\n");
break; break;
} }
@ -154,8 +221,8 @@ static void process_input(bool continuous_mode) {
static struct sigaction old_sigactions[32]; static struct sigaction old_sigactions[32];
static void signal_handler(int signo, siginfo_t *siginfo, void *siginfo_arg) { static void signal_handler(int signo, siginfo_t *siginfo, void *siginfo_arg) {
debug(2, L"signal #%d (%ls) received", signo, sig2wcs(signo)); debug(2, L"signal #%d (%ls) received", signo, sig2wcs(signo));
// SIGINT isn't included in the following conditional because it is handled specially by fish; // SIGINT isn't included in the following conditional because it is handled specially by fish.
// i.e., it causes \cC to be reinserted into the tty input stream. // Specifically, it causes \cC to be reinserted into the tty input stream.
if (signo == SIGHUP || signo == SIGTERM || signo == SIGABRT || signo == SIGSEGV) { if (signo == SIGHUP || signo == SIGTERM || signo == SIGABRT || signo == SIGSEGV) {
keep_running = false; keep_running = false;
} }
@ -183,10 +250,10 @@ static void install_our_signal_handlers() {
if (sigaction(signo, &new_sa, &old_sa) != -1) { if (sigaction(signo, &new_sa, &old_sa) != -1) {
memcpy(&old_sigactions[signo], &old_sa, sizeof(old_sa)); memcpy(&old_sigactions[signo], &old_sa, sizeof(old_sa));
if (old_sa.sa_handler == SIG_IGN) { if (old_sa.sa_handler == SIG_IGN) {
debug(2, "signal #%d (%ls) was being ignored", signo, sig2wcs(signo)); debug(3, "signal #%d (%ls) was being ignored", signo, sig2wcs(signo));
} }
if (old_sa.sa_flags && ~SA_SIGINFO != 0) { if (old_sa.sa_flags && ~SA_SIGINFO != 0) {
debug(2, L"signal #%d (%ls) handler had flags 0x%X", signo, sig2wcs(signo), debug(3, L"signal #%d (%ls) handler had flags 0x%X", signo, sig2wcs(signo),
old_sa.sa_flags); old_sa.sa_flags);
} }
} }
@ -196,7 +263,6 @@ static void install_our_signal_handlers() {
/// Setup our environment (e.g., tty modes), process key strokes, then reset the environment. /// Setup our environment (e.g., tty modes), process key strokes, then reset the environment.
static void setup_and_process_keys(bool continuous_mode) { static void setup_and_process_keys(bool continuous_mode) {
is_interactive_session = 1; // by definition this program is interactive is_interactive_session = 1; // by definition this program is interactive
setenv("LC_ALL", "POSIX", 1); // ensure we're in a single-byte locale
set_main_thread(); set_main_thread();
setup_fork_guards(); setup_fork_guards();
env_init(); env_init();

View file

@ -1,5 +1,6 @@
# vim: set filetype=expect: # vim: set filetype=expect:
set ::env(fish_escape_delay_ms) 10
spawn $fish_key_reader -c spawn $fish_key_reader -c
# Do we get the expected startup prompt? # Do we get the expected startup prompt?
@ -11,30 +12,26 @@ expect -ex "Press a key" {
# Is a single control char echoed correctly? # Is a single control char echoed correctly?
send "\x01" send "\x01"
expect -ex "char: \\cA\r\n" { expect -ex "char: \\cA\r\nbind \\cA 'do something'\r\n" {
puts "ctrl-a handled" puts "ctrl-a handled"
} unmatched { } unmatched {
puts stderr "ctrl-a not handled" puts stderr "ctrl-a not handled"
} }
# Is a non-ASCII char echoed correctly? This looks a bit odd but \xE9 # Is a non-ASCII UTF-8 sequence prefaced by an escape char handled correctly?
# when using UTF-8 encoding becomes the two byte sequence \xC3\xA9 (or sleep 0.020
# \303\251). # send "\x1B\xE1\x88\xB4"
send "\xE9" send "\x1B\u1234"
expect -ex "char: \\303 (aka non-ASCII)\r\n" { expect -ex "char: \\u1234\r\nbind \\e\\u1234 'do something'\r\n" {
puts "\\xE9, first byte, handled" puts "unicode char, handled"
} unmatched { } unmatched {
puts stderr "\\xE9, first byte, not handled" puts stderr "unicode char, not handled"
}
expect -ex "char: \\251 (aka non-ASCII)\r\n" {
puts "\\xE9, second byte, handled"
} unmatched {
puts stderr "\\xE9, second byte, not handled"
} }
# Is a NULL char echoed correctly? # Is a NULL char echoed correctly?
sleep 0.020
send -null send -null
expect -ex "char: \\c@\r\n" { expect -ex "char: \\c@\r\nbind \\c@ 'do something'\r\n" {
puts "\\c@ handled" puts "\\c@ handled"
} unmatched { } unmatched {
puts stderr "\\c@ not handled" puts stderr "\\c@ not handled"
@ -59,4 +56,4 @@ expect -ex "Exiting at your request.\r\n" {
puts "exited on seeing valid terminate" puts "exited on seeing valid terminate"
} unmatched { } unmatched {
puts stderr "did not exit on seeing valid terminate sequence" puts stderr "did not exit on seeing valid terminate sequence"
} }

View file

@ -1,7 +1,6 @@
saw expected startup prompt saw expected startup prompt
ctrl-a handled ctrl-a handled
\xE9, first byte, handled unicode char, handled
\xE9, second byte, handled
\c@ handled \c@ handled
invalid terminate sequence handled invalid terminate sequence handled
valid terminate sequence handled valid terminate sequence handled