From 991c900fc6d55de3c11f23d06b5c06393abb1b2d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 29 Sep 2013 02:48:35 -0700 Subject: [PATCH] Set of changes to improve detection of escape sequences for prompt width computation. Addresses #767 --- fish_tests.cpp | 11 ++ screen.cpp | 333 ++++++++++++++++++++++++++----------------------- screen.h | 2 + 3 files changed, 192 insertions(+), 154 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index b47ce3a3a..8b79ef3ac 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -550,6 +550,16 @@ static void test_utils() if (begin != a + wcslen(L"echo (echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); } +static void test_escape_sequences(void) +{ + say(L"Testing escape codes"); + if (escape_code_length(L"") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__); + if (escape_code_length(L"abcd") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__); + if (escape_code_length(L"\x1b[2J") != 4) err(L"test_escape_sequences failed on line %d\n", __LINE__); + if (escape_code_length(L"\x1b[38;5;123mABC") != strlen("\x1b[38;5;123m")) err(L"test_escape_sequences failed on line %d\n", __LINE__); + if (escape_code_length(L"\x1b@") != 2) err(L"test_escape_sequences failed on line %d\n", __LINE__); +} + class lru_node_test_t : public lru_node_t { public: @@ -1834,6 +1844,7 @@ int main(int argc, char **argv) test_fork(); test_parser(); test_utils(); + test_escape_sequences(); test_lru(); test_expand(); test_fuzzy_match(); diff --git a/screen.cpp b/screen.cpp index 8c8438346..5ebe8605d 100644 --- a/screen.cpp +++ b/screen.cpp @@ -92,11 +92,9 @@ public: specified position of the specified wide character string. All of \c seq must match, but str may be longer than seq. */ -static int try_sequence(const char *seq, const wchar_t *str) +static size_t try_sequence(const char *seq, const wchar_t *str) { - int i; - - for (i=0;; i++) + for (size_t i=0; ; i++) { if (!seq[i]) return i; @@ -121,29 +119,6 @@ static size_t next_tab_stop(size_t in) return ((in/tab_width)+1)*tab_width; } -// PCA for term256 support, let's just detect the escape codes directly -static int is_term256_escape(const wchar_t *str) -{ - // An escape code looks like this: \x1b[38;5;m - // or like this: \x1b[48;5;m - - // parse out the required prefix - int len = try_sequence("\x1b[38;5;", str); - if (! len) len = try_sequence("\x1b[48;5;", str); - if (! len) return 0; - - // now try parsing out a string of digits - // we need at least one - if (! iswdigit(str[len])) return 0; - while (iswdigit(str[len])) len++; - - // look for the terminating m - if (str[len++] != L'm') return 0; - - // success - return len; -} - /* Like fish_wcwidth, but returns 0 for control characters instead of -1 */ static int fish_wcwidth_min_0(wchar_t wc) { @@ -157,6 +132,178 @@ static bool allow_soft_wrap(void) return !! auto_right_margin; } + +/* Returns the number of characters in the escape code starting at 'code' (which should initially contain \x1b) */ +size_t escape_code_length(const wchar_t *code) +{ + assert(code != NULL); + + /* The only escape codes we recognize start with \x1b */ + if (code[0] != L'\x1b') + return 0; + + size_t resulting_length = 0; + bool found = false; + + if (cur_term != NULL) + { + /* + Detect these terminfo color escapes with parameter + value 0..7, all of which don't move the cursor + */ + char * const esc[] = + { + set_a_foreground, + set_a_background, + set_foreground, + set_background, + }; + + for (size_t p=0; p < sizeof esc / sizeof *esc && !found; p++) + { + if (!esc[p]) + continue; + + for (size_t k=0; k<8; k++) + { + size_t len = try_sequence(tparm(esc[p],k), code); + if (len) + { + resulting_length = len; + found = true; + break; + } + } + } + } + + if (cur_term != NULL) + { + /* + Detect these semi-common terminfo escapes without any + parameter values, all of which don't move the cursor + */ + char * const esc2[] = + { + enter_bold_mode, + exit_attribute_mode, + enter_underline_mode, + exit_underline_mode, + enter_standout_mode, + exit_standout_mode, + flash_screen, + enter_subscript_mode, + exit_subscript_mode, + enter_superscript_mode, + exit_superscript_mode, + enter_blink_mode, + enter_italics_mode, + exit_italics_mode, + enter_reverse_mode, + enter_shadow_mode, + exit_shadow_mode, + enter_standout_mode, + exit_standout_mode, + enter_secure_mode + }; + + + + for (size_t p=0; p < sizeof esc2 / sizeof *esc2 && !found; p++) + { + if (!esc2[p]) + continue; + /* + Test both padded and unpadded version, just to + be safe. Most versions of tparm don't actually + seem to do anything these days. + */ + size_t len = maxi(try_sequence(tparm(esc2[p]), code), try_sequence(esc2[p], code)); + if (len) + { + resulting_length = len; + found = true; + } + } + } + + if (!found) + { + if (code[1] == L'k') + { + /* This looks like the escape sequence for setting a screen name */ + const env_var_t term_name = env_get_string(L"TERM"); + if (!term_name.missing() && string_prefixes_string(L"screen", term_name)) + { + const wchar_t * const screen_name_end_sentinel = L"\x1b\\"; + const wchar_t *screen_name_end = wcsstr(&code[2], screen_name_end_sentinel); + if (screen_name_end != NULL) + { + const wchar_t *escape_sequence_end = screen_name_end + wcslen(screen_name_end_sentinel); + resulting_length = escape_sequence_end - code; + } + else + { + /* Consider just k to be the code */ + resulting_length = 2; + } + found = true; + } + } + } + + if (! found) + { + /* Generic VT100 one byte sequence: CSI followed by something in the range @ through _ */ + if (code[1] == L'[' && (code[2] >= L'@' && code[2] <= L'_')) + { + resulting_length = 3; + found = true; + } + } + + if (! found) + { + /* Generic VT100 CSI-style sequence. , followed by zero or more ASCII characters NOT in the range [@,_], followed by one character in that range */ + if (code[1] == L'[') + { + // Start at 2 to skip over [ + size_t cursor = 2; + for (; code[cursor] != L'\0'; cursor++) + { + /* Consume a sequence of ASCII characters not in the range [@, ~] */ + wchar_t c = code[cursor]; + + /* If we're not in ASCII, just stop */ + if (c > 127) + break; + + /* If we're the end character, then consume it and then stop */ + if (c >= L'@' && c <= L'~') + { + cursor++; + break; + } + } + /* curs now indexes just beyond the end of the sequence (or at the terminating zero) */ + found = true; + resulting_length = cursor; + } + } + + if (! found) + { + /* Generic VT100 two byte sequence: followed by something in the range @ through _ */ + if (code[1] >= L'@' && code[1] <= L'_') + { + resulting_length = 2; + found = true; + } + } + + return resulting_length; +} + /* Information about a prompt layout */ struct prompt_layout_t { @@ -178,7 +325,7 @@ struct prompt_layout_t static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) { size_t current_line_width = 0; - size_t j, k; + size_t j; prompt_layout_t prompt_layout = {}; prompt_layout.line_count = 1; @@ -187,134 +334,12 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) { if (prompt[j] == L'\x1b') { - /* - This is the start of an escape code. Try to guess its width. - */ - size_t p; - int len=0; - bool found = false; - - /* - Detect these terminfo color escapes with parameter - value 0..7, all of which don't move the cursor - */ - char * const esc[] = + /* This is the start of an escape code. Skip over it if it's at least one character long. */ + size_t escape_len = escape_code_length(&prompt[j]); + if (escape_len > 0) { - set_a_foreground, - set_a_background, - set_foreground, - set_background, + j += escape_len - 1; } - ; - - /* - Detect these semi-common terminfo escapes without any - parameter values, all of which don't move the cursor - */ - char * const esc2[] = - { - enter_bold_mode, - exit_attribute_mode, - enter_underline_mode, - exit_underline_mode, - enter_standout_mode, - exit_standout_mode, - flash_screen, - enter_subscript_mode, - exit_subscript_mode, - enter_superscript_mode, - exit_superscript_mode, - enter_blink_mode, - enter_italics_mode, - exit_italics_mode, - enter_reverse_mode, - enter_shadow_mode, - exit_shadow_mode, - enter_standout_mode, - exit_standout_mode, - enter_secure_mode - } - ; - - for (p=0; p < sizeof esc / sizeof *esc && !found; p++) - { - if (!esc[p]) - continue; - - for (k=0; k<8; k++) - { - len = try_sequence(tparm(esc[p],k), &prompt[j]); - if (len) - { - j += (len-1); - found = true; - break; - } - } - } - - /* PCA for term256 support, let's just detect the escape codes directly */ - if (! found) - { - len = is_term256_escape(&prompt[j]); - if (len) - { - j += (len - 1); - found = true; - } - } - - - for (p=0; p < (sizeof(esc2)/sizeof(char *)) && !found; p++) - { - if (!esc2[p]) - continue; - /* - Test both padded and unpadded version, just to - be safe. Most versions of tparm don't actually - seem to do anything these days. - */ - len = maxi(try_sequence(tparm(esc2[p]), &prompt[j]), - try_sequence(esc2[p], &prompt[j])); - - if (len) - { - j += (len-1); - found = true; - } - } - - if (!found) - { - if (prompt[j+1] == L'k') - { - const env_var_t term_name = env_get_string(L"TERM"); - if (!term_name.missing() && string_prefixes_string(L"screen", term_name)) - { - const wchar_t *end; - j+=2; - found = true; - end = wcsstr(&prompt[j], L"\x1b\\"); - if (end) - { - /* - You'd thing this should be - '(end-prompt)+2', in order to move j - past the end of the string, but there is - a 'j++' at the end of each lap, so j - should always point to the last menged - character, e.g. +1. - */ - j = (end-prompt)+1; - } - else - { - break; - } - } - } - } - } else if (prompt[j] == L'\t') { diff --git a/screen.h b/screen.h index 0307fdd7d..1d9fde2c2 100644 --- a/screen.h +++ b/screen.h @@ -227,5 +227,7 @@ enum screen_reset_mode_t void s_reset(screen_t *s, screen_reset_mode_t mode); +/* Returns the length of an escape code. Exposed for testing purposes only. */ +size_t escape_code_length(const wchar_t *code); #endif