diff --git a/src/env.cpp b/src/env.cpp index 8a98355d1..3de55955d 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -41,6 +41,7 @@ #include "proc.h" #include "reader.h" #include "sanity.h" +#include "screen.h" #include "wutil.h" // IWYU pragma: keep /// Value denoting a null string. @@ -396,6 +397,9 @@ static void handle_curses(const wchar_t *env_var_name) { // if the TERM var is set. // input_init(); term_has_xn = tgetflag((char *)"xn") == 1; // does terminal have the eat_newline_glitch + + // Invalidate the cached escape sequences since they may no longer be valid. + cached_esc_sequences.clear(); } /// React to modifying the given variable. diff --git a/src/screen.cpp b/src/screen.cpp index 7b070ef3f..db4a0f5ef 100644 --- a/src/screen.cpp +++ b/src/screen.cpp @@ -48,6 +48,8 @@ /// A helper value for an invalid location. #define INVALID_LOCATION (screen_data_t::cursor_t(-1, -1)) +enum prompt_type_t { UNKNOWN_PROMPT, LEFT_PROMPT, RIGHT_PROMPT }; + static void invalidate_soft_wrap(screen_t *scr); /// Ugly kludge. The internal buffer used to store output of tputs. Since tputs external function @@ -75,6 +77,9 @@ class scoped_buffer_t { } }; +// Singleton of the cached escape sequences seen in prompts and similar strings. +cached_esc_sequences_t cached_esc_sequences = cached_esc_sequences_t(); + /// Tests if the specified narrow character sequence is present at the specified position of the /// specified wide character string. All of \c seq must match, but str may be longer than seq. static size_t try_sequence(const char *seq, const wchar_t *str) { @@ -83,7 +88,8 @@ static size_t try_sequence(const char *seq, const wchar_t *str) { if (seq[i] != str[i]) return 0; } - return 0; + DIE("unexpectedly fell off end of try_sequence()"); + return 0; // this should never be executed } /// Returns the number of columns left until the next tab stop, given the current cursor postion. @@ -198,7 +204,10 @@ static bool is_csi_style_escape_seq(const wchar_t *code, size_t *resulting_lengt return true; } -// Detect whether the escape sequence sets foreground/background color. +/// Detect whether the escape sequence sets foreground/background color. Note that 24-bit color +/// sequences are detected by `is_csi_style_escape_seq()` if they use the ANSI X3.64 pattern for +/// such sequences. This function only handles those escape sequences for setting color that rely on +/// the terminfo definition and which might use a different pattern. static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) { if (!cur_term) return false; @@ -211,7 +220,7 @@ static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) { for (size_t p = 0; p < sizeof esc / sizeof *esc; p++) { if (!esc[p]) continue; - for (short k = 0; k < max_colors; k++) { + for (int k = 0; k < max_colors; k++) { size_t esc_seq_len = try_sequence(tparm(esc[p], k), code); if (esc_seq_len) { *resulting_length = esc_seq_len; @@ -223,8 +232,8 @@ static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) { return false; } -// Detect whether the escape sequence sets one of the terminal attributes that affects how text is -// displayed other than the color. +/// Detect whether the escape sequence sets one of the terminal attributes that affects how text is +/// displayed other than the color. static bool is_visual_escape_seq(const wchar_t *code, size_t *resulting_length) { if (!cur_term) return false; char *const esc2[] = {enter_bold_mode, exit_attribute_mode, enter_underline_mode, @@ -252,50 +261,57 @@ static bool is_visual_escape_seq(const wchar_t *code, size_t *resulting_length) } /// Returns the number of characters in the escape code starting at 'code'. We only handle sequences -/// that begin with \e. If it doesn't we return zero. +/// that begin with \e. If it doesn't we return zero. We also return zero if we don't recognize the +/// escape sequence based on querying terminfo and other heuristics. size_t escape_code_length(const wchar_t *code) { assert(code != NULL); if (*code != L'\e') return 0; - size_t esc_seq_len; - if (is_color_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_visual_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_screen_name_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_iterm2_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_single_byte_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_csi_style_escape_seq(code, &esc_seq_len)) return esc_seq_len; - if (is_two_byte_escape_seq(code, &esc_seq_len)) return esc_seq_len; + size_t esc_seq_len = cached_esc_sequences.find_entry(code); + if (esc_seq_len) return esc_seq_len; - return 0; + bool found = is_color_escape_seq(code, &esc_seq_len); + if (!found) found = is_visual_escape_seq(code, &esc_seq_len); + if (!found) found = is_screen_name_escape_seq(code, &esc_seq_len); + if (!found) found = is_iterm2_escape_seq(code, &esc_seq_len); + if (!found) found = is_single_byte_escape_seq(code, &esc_seq_len); + if (!found) found = is_csi_style_escape_seq(code, &esc_seq_len); + if (!found) found = is_two_byte_escape_seq(code, &esc_seq_len); + if (found) cached_esc_sequences.add_entry(code, esc_seq_len); + return esc_seq_len; } -// Information about a prompt layout. +// Information about the layout of a prompt. struct prompt_layout_t { - // How many lines the prompt consumes. - size_t line_count; - // Width of the longest line. - size_t max_line_width; - // Width of the last line. - size_t last_line_width; + size_t line_count; // how many lines the prompt consumes + size_t max_line_width; // width of the longest line + size_t last_line_width; // width of the last line }; +// These are used by `calc_prompt_layout()` to avoid redundant calculations. +static const wchar_t *cached_left_prompt = wcsdup(L""); +static const wchar_t *cached_right_prompt = wcsdup(L""); +static prompt_layout_t cached_left_prompt_layout = {1, 0, 0}; +static prompt_layout_t cached_right_prompt_layout = {1, 0, 0}; + /// Calculate layout information for the given prompt. Does some clever magic to detect common -/// escape sequences that may be embeded in a prompt, such as color codes. -static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) { +/// escape sequences that may be embeded in a prompt, such as those to set visual attributes. +static prompt_layout_t calc_prompt_layout(const wchar_t *prompt, prompt_type_t which_prompt) { + if (which_prompt == LEFT_PROMPT && wcscmp(cached_left_prompt, prompt) == 0) { + return cached_left_prompt_layout; + } + if (which_prompt == RIGHT_PROMPT && wcscmp(cached_right_prompt, prompt) == 0) { + return cached_right_prompt_layout; + } + + prompt_layout_t prompt_layout = {1, 0, 0}; size_t current_line_width = 0; - size_t j; - prompt_layout_t prompt_layout = {}; - prompt_layout.line_count = 1; - - for (j = 0; prompt[j]; j++) { + for (int j = 0; prompt[j]; j++) { if (prompt[j] == L'\e') { - // This is the start of an escape code. Skip over it if it's at least one character - // long. - size_t escape_len = escape_code_length(&prompt[j]); - if (escape_len > 0) { - j += escape_len - 1; - } + // This is the start of an escape code. Skip over it if it's at least one char long. + size_t len = escape_code_length(&prompt[j]); + if (len > 0) j += len - 1; } else if (prompt[j] == L'\t') { current_line_width = next_tab_stop(current_line_width); } else if (prompt[j] == L'\n' || prompt[j] == L'\f') { @@ -306,14 +322,25 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) { } else if (prompt[j] == L'\r') { current_line_width = 0; } else { - // Ordinary decent character. Just add width. This returns -1 for a control character - - // don't add that. + // Ordinary char. Add its width with care to ignore control chars which have width -1. current_line_width += fish_wcwidth_min_0(prompt[j]); - prompt_layout.max_line_width = maxi(prompt_layout.max_line_width, current_line_width); + if (current_line_width > prompt_layout.max_line_width) { + prompt_layout.max_line_width = current_line_width; + } } } prompt_layout.last_line_width = current_line_width; + if (which_prompt == LEFT_PROMPT) { + free((void *)cached_left_prompt); + cached_left_prompt = wcsdup(prompt); + cached_left_prompt_layout = prompt_layout; + } + if (which_prompt == RIGHT_PROMPT) { + free((void *)cached_right_prompt); + cached_right_prompt = wcsdup(prompt); + cached_right_prompt_layout = prompt_layout; + } return prompt_layout; } @@ -323,7 +350,7 @@ static size_t calc_prompt_lines(const wcstring &prompt) { // calc_prompt_width_and_lines. size_t result = 1; if (prompt.find(L'\n') != wcstring::npos || prompt.find(L'\f') != wcstring::npos) { - result = calc_prompt_layout(prompt.c_str()).line_count; + result = calc_prompt_layout(prompt.c_str(), UNKNOWN_PROMPT).line_count; } return result; } @@ -677,8 +704,9 @@ static bool test_stuff(screen_t *scr) /// Update the screen to match the desired output. static void s_update(screen_t *scr, const wchar_t *left_prompt, const wchar_t *right_prompt) { // if (test_stuff(scr)) return; - const size_t left_prompt_width = calc_prompt_layout(left_prompt).last_line_width; - const size_t right_prompt_width = calc_prompt_layout(right_prompt).last_line_width; + const size_t left_prompt_width = calc_prompt_layout(left_prompt, LEFT_PROMPT).last_line_width; + const size_t right_prompt_width = + calc_prompt_layout(right_prompt, RIGHT_PROMPT).last_line_width; int screen_width = common_get_width(); @@ -914,8 +942,8 @@ static screen_layout_t compute_layout(screen_t *s, size_t screen_width, const wchar_t *right_prompt = right_prompt_str.c_str(); const wchar_t *autosuggestion = autosuggestion_str.c_str(); - prompt_layout_t left_prompt_layout = calc_prompt_layout(left_prompt); - prompt_layout_t right_prompt_layout = calc_prompt_layout(right_prompt); + prompt_layout_t left_prompt_layout = calc_prompt_layout(left_prompt, LEFT_PROMPT); + prompt_layout_t right_prompt_layout = calc_prompt_layout(right_prompt, RIGHT_PROMPT); size_t left_prompt_width = left_prompt_layout.last_line_width; size_t right_prompt_width = right_prompt_layout.last_line_width; diff --git a/src/screen.h b/src/screen.h index c4eb257f1..21622ba8f 100644 --- a/src/screen.h +++ b/src/screen.h @@ -12,8 +12,11 @@ #include #include #include +#include #include +#include #include + #include "common.h" #include "highlight.h" @@ -192,4 +195,80 @@ bool screen_force_clear_to_end(); /// Returns the length of an escape code. Exposed for testing purposes only. size_t escape_code_length(const wchar_t *code); +// Maintain a mapping of escape sequences to their length for fast lookup. +class cached_esc_sequences_t { + private: + // Cached escape sequences we've already detected in the prompt and similar strings. + std::set cache; + // The escape sequence lengths we've cached. My original implementation used min and max + // length variables. The cache was then iterated over using a loop like this: + // `for (size_t l = min; l <= max; l++)`. + // + // However that is inefficient when there are big gaps in the lengths. This has been observed + // with the BobTheFish theme which has a bunch of 5 and 6 char sequences and 16 to 19 char + // sequences and almost nothing in between. So instead we keep track of only those escape + // sequence lengths we've actually cached to avoid checking for matches of lengths we know are + // not in our cache. + std::vector lengths; + std::map lengths_match_count; + size_t cache_hits; + + public: + explicit cached_esc_sequences_t() : cache(), lengths(), lengths_match_count(), cache_hits(0) {} + + void add_entry(const wchar_t *entry, size_t len) { + auto str = wcstring(entry, len); + +#if 0 + // This is a can't happen scenario. I only wrote this to validate during testing that it + // wouldn't be triggered. I'm leaving it in but commented out in case someone feels the need + // to re-enable the check. + auto match = cache.find(str); + if (match != cache.end()) { + debug(0, "unexpected add_entry() call of a value already in the cache: '%ls'", + escape(str.c_str(), ESCAPE_ALL).c_str()); + return; + } +#endif + + cache.emplace(str); + if (std::find(lengths.begin(), lengths.end(), len) == lengths.end()) { + lengths.push_back(len); + lengths_match_count[len] = 0; + } + } + + size_t find_entry(const wchar_t *entry) { + for (auto len : lengths) { + auto match = cache.find(wcstring(entry, len)); + if (match != cache.end()) { // we found a matching cached sequence + // Periodically sort the sequence lengths so we check for matches going from the + // most frequently matching lengths to least frequent. + lengths_match_count[len]++; + if (++cache_hits % 1000 == 0) { + // std::sort(lengths.begin(), lengths.end(), custom_cmp(lengths_match_count)); + std::sort(lengths.begin(), lengths.end(), [&](size_t l1, size_t l2) { + return lengths_match_count[l1] > lengths_match_count[l2]; + }); + } + + return len; // return the length of the matching cached sequence + } + } + + return 0; // no cached sequence matches the entry + } + + void clear() { + cache.clear(); + lengths.clear(); + lengths_match_count.clear(); + cache_hits = 0; + } +}; + +// Singleton that is exposed so that the cache can be invalidated when terminal related variables +// change by calling `cached_esc_sequences.clear()`. +extern cached_esc_sequences_t cached_esc_sequences; + #endif