cache prompts and escape sequences we've seen

Cache the escape sequences we've seen when checking for those which
don't take any visual space when writing the prompt or similar strings.
This reduces the cost of determining the true cost of such strings by a
full order of magnitude if they include lots of such escape sequences.

Periodically sort the cached escape sequence lengths based on feedback
from cache hits so that we're always checking for the most likely
sequence lengths first.

Also cache the prompt layouts to avoid doing the calculations if the
prompt doesn't change.

Fixes #3793
This commit is contained in:
Kurtis Rader 2017-02-03 19:20:21 -08:00
parent 6d72d538a6
commit c4f2210cc5
3 changed files with 154 additions and 43 deletions

View file

@ -41,6 +41,7 @@
#include "proc.h" #include "proc.h"
#include "reader.h" #include "reader.h"
#include "sanity.h" #include "sanity.h"
#include "screen.h"
#include "wutil.h" // IWYU pragma: keep #include "wutil.h" // IWYU pragma: keep
/// Value denoting a null string. /// Value denoting a null string.
@ -396,6 +397,9 @@ static void handle_curses(const wchar_t *env_var_name) {
// if the TERM var is set. // if the TERM var is set.
// input_init(); // input_init();
term_has_xn = tgetflag((char *)"xn") == 1; // does terminal have the eat_newline_glitch term_has_xn = tgetflag((char *)"xn") == 1; // does terminal have the eat_newline_glitch
// Invalidate the cached escape sequences since they may no longer be valid.
cached_esc_sequences.clear();
} }
/// React to modifying the given variable. /// React to modifying the given variable.

View file

@ -48,6 +48,8 @@
/// A helper value for an invalid location. /// A helper value for an invalid location.
#define INVALID_LOCATION (screen_data_t::cursor_t(-1, -1)) #define INVALID_LOCATION (screen_data_t::cursor_t(-1, -1))
enum prompt_type_t { UNKNOWN_PROMPT, LEFT_PROMPT, RIGHT_PROMPT };
static void invalidate_soft_wrap(screen_t *scr); static void invalidate_soft_wrap(screen_t *scr);
/// Ugly kludge. The internal buffer used to store output of tputs. Since tputs external function /// Ugly kludge. The internal buffer used to store output of tputs. Since tputs external function
@ -75,6 +77,9 @@ class scoped_buffer_t {
} }
}; };
// Singleton of the cached escape sequences seen in prompts and similar strings.
cached_esc_sequences_t cached_esc_sequences = cached_esc_sequences_t();
/// Tests if the specified narrow character sequence is present at the specified position of the /// Tests if the specified narrow character sequence is present at the specified position of the
/// specified wide character string. All of \c seq must match, but str may be longer than seq. /// specified wide character string. All of \c seq must match, but str may be longer than seq.
static size_t try_sequence(const char *seq, const wchar_t *str) { static size_t try_sequence(const char *seq, const wchar_t *str) {
@ -83,7 +88,8 @@ static size_t try_sequence(const char *seq, const wchar_t *str) {
if (seq[i] != str[i]) return 0; if (seq[i] != str[i]) return 0;
} }
return 0; DIE("unexpectedly fell off end of try_sequence()");
return 0; // this should never be executed
} }
/// Returns the number of columns left until the next tab stop, given the current cursor postion. /// Returns the number of columns left until the next tab stop, given the current cursor postion.
@ -198,7 +204,10 @@ static bool is_csi_style_escape_seq(const wchar_t *code, size_t *resulting_lengt
return true; return true;
} }
// Detect whether the escape sequence sets foreground/background color. /// Detect whether the escape sequence sets foreground/background color. Note that 24-bit color
/// sequences are detected by `is_csi_style_escape_seq()` if they use the ANSI X3.64 pattern for
/// such sequences. This function only handles those escape sequences for setting color that rely on
/// the terminfo definition and which might use a different pattern.
static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) { static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) {
if (!cur_term) return false; if (!cur_term) return false;
@ -211,7 +220,7 @@ static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) {
for (size_t p = 0; p < sizeof esc / sizeof *esc; p++) { for (size_t p = 0; p < sizeof esc / sizeof *esc; p++) {
if (!esc[p]) continue; if (!esc[p]) continue;
for (short k = 0; k < max_colors; k++) { for (int k = 0; k < max_colors; k++) {
size_t esc_seq_len = try_sequence(tparm(esc[p], k), code); size_t esc_seq_len = try_sequence(tparm(esc[p], k), code);
if (esc_seq_len) { if (esc_seq_len) {
*resulting_length = esc_seq_len; *resulting_length = esc_seq_len;
@ -223,8 +232,8 @@ static bool is_color_escape_seq(const wchar_t *code, size_t *resulting_length) {
return false; return false;
} }
// Detect whether the escape sequence sets one of the terminal attributes that affects how text is /// Detect whether the escape sequence sets one of the terminal attributes that affects how text is
// displayed other than the color. /// displayed other than the color.
static bool is_visual_escape_seq(const wchar_t *code, size_t *resulting_length) { static bool is_visual_escape_seq(const wchar_t *code, size_t *resulting_length) {
if (!cur_term) return false; if (!cur_term) return false;
char *const esc2[] = {enter_bold_mode, exit_attribute_mode, enter_underline_mode, char *const esc2[] = {enter_bold_mode, exit_attribute_mode, enter_underline_mode,
@ -252,50 +261,57 @@ static bool is_visual_escape_seq(const wchar_t *code, size_t *resulting_length)
} }
/// Returns the number of characters in the escape code starting at 'code'. We only handle sequences /// Returns the number of characters in the escape code starting at 'code'. We only handle sequences
/// that begin with \e. If it doesn't we return zero. /// that begin with \e. If it doesn't we return zero. We also return zero if we don't recognize the
/// escape sequence based on querying terminfo and other heuristics.
size_t escape_code_length(const wchar_t *code) { size_t escape_code_length(const wchar_t *code) {
assert(code != NULL); assert(code != NULL);
if (*code != L'\e') return 0; if (*code != L'\e') return 0;
size_t esc_seq_len; size_t esc_seq_len = cached_esc_sequences.find_entry(code);
if (is_color_escape_seq(code, &esc_seq_len)) return esc_seq_len; if (esc_seq_len) return esc_seq_len;
if (is_visual_escape_seq(code, &esc_seq_len)) return esc_seq_len;
if (is_screen_name_escape_seq(code, &esc_seq_len)) return esc_seq_len;
if (is_iterm2_escape_seq(code, &esc_seq_len)) return esc_seq_len;
if (is_single_byte_escape_seq(code, &esc_seq_len)) return esc_seq_len;
if (is_csi_style_escape_seq(code, &esc_seq_len)) return esc_seq_len;
if (is_two_byte_escape_seq(code, &esc_seq_len)) return esc_seq_len;
return 0; bool found = is_color_escape_seq(code, &esc_seq_len);
if (!found) found = is_visual_escape_seq(code, &esc_seq_len);
if (!found) found = is_screen_name_escape_seq(code, &esc_seq_len);
if (!found) found = is_iterm2_escape_seq(code, &esc_seq_len);
if (!found) found = is_single_byte_escape_seq(code, &esc_seq_len);
if (!found) found = is_csi_style_escape_seq(code, &esc_seq_len);
if (!found) found = is_two_byte_escape_seq(code, &esc_seq_len);
if (found) cached_esc_sequences.add_entry(code, esc_seq_len);
return esc_seq_len;
} }
// Information about a prompt layout. // Information about the layout of a prompt.
struct prompt_layout_t { struct prompt_layout_t {
// How many lines the prompt consumes. size_t line_count; // how many lines the prompt consumes
size_t line_count; size_t max_line_width; // width of the longest line
// Width of the longest line. size_t last_line_width; // width of the last line
size_t max_line_width;
// Width of the last line.
size_t last_line_width;
}; };
// These are used by `calc_prompt_layout()` to avoid redundant calculations.
static const wchar_t *cached_left_prompt = wcsdup(L"");
static const wchar_t *cached_right_prompt = wcsdup(L"");
static prompt_layout_t cached_left_prompt_layout = {1, 0, 0};
static prompt_layout_t cached_right_prompt_layout = {1, 0, 0};
/// Calculate layout information for the given prompt. Does some clever magic to detect common /// Calculate layout information for the given prompt. Does some clever magic to detect common
/// escape sequences that may be embeded in a prompt, such as color codes. /// escape sequences that may be embeded in a prompt, such as those to set visual attributes.
static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) { static prompt_layout_t calc_prompt_layout(const wchar_t *prompt, prompt_type_t which_prompt) {
if (which_prompt == LEFT_PROMPT && wcscmp(cached_left_prompt, prompt) == 0) {
return cached_left_prompt_layout;
}
if (which_prompt == RIGHT_PROMPT && wcscmp(cached_right_prompt, prompt) == 0) {
return cached_right_prompt_layout;
}
prompt_layout_t prompt_layout = {1, 0, 0};
size_t current_line_width = 0; size_t current_line_width = 0;
size_t j;
prompt_layout_t prompt_layout = {}; for (int j = 0; prompt[j]; j++) {
prompt_layout.line_count = 1;
for (j = 0; prompt[j]; j++) {
if (prompt[j] == L'\e') { if (prompt[j] == L'\e') {
// This is the start of an escape code. Skip over it if it's at least one character // This is the start of an escape code. Skip over it if it's at least one char long.
// long. size_t len = escape_code_length(&prompt[j]);
size_t escape_len = escape_code_length(&prompt[j]); if (len > 0) j += len - 1;
if (escape_len > 0) {
j += escape_len - 1;
}
} else if (prompt[j] == L'\t') { } else if (prompt[j] == L'\t') {
current_line_width = next_tab_stop(current_line_width); current_line_width = next_tab_stop(current_line_width);
} else if (prompt[j] == L'\n' || prompt[j] == L'\f') { } else if (prompt[j] == L'\n' || prompt[j] == L'\f') {
@ -306,14 +322,25 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt) {
} else if (prompt[j] == L'\r') { } else if (prompt[j] == L'\r') {
current_line_width = 0; current_line_width = 0;
} else { } else {
// Ordinary decent character. Just add width. This returns -1 for a control character - // Ordinary char. Add its width with care to ignore control chars which have width -1.
// don't add that.
current_line_width += fish_wcwidth_min_0(prompt[j]); current_line_width += fish_wcwidth_min_0(prompt[j]);
prompt_layout.max_line_width = maxi(prompt_layout.max_line_width, current_line_width); if (current_line_width > prompt_layout.max_line_width) {
prompt_layout.max_line_width = current_line_width;
}
} }
} }
prompt_layout.last_line_width = current_line_width; prompt_layout.last_line_width = current_line_width;
if (which_prompt == LEFT_PROMPT) {
free((void *)cached_left_prompt);
cached_left_prompt = wcsdup(prompt);
cached_left_prompt_layout = prompt_layout;
}
if (which_prompt == RIGHT_PROMPT) {
free((void *)cached_right_prompt);
cached_right_prompt = wcsdup(prompt);
cached_right_prompt_layout = prompt_layout;
}
return prompt_layout; return prompt_layout;
} }
@ -323,7 +350,7 @@ static size_t calc_prompt_lines(const wcstring &prompt) {
// calc_prompt_width_and_lines. // calc_prompt_width_and_lines.
size_t result = 1; size_t result = 1;
if (prompt.find(L'\n') != wcstring::npos || prompt.find(L'\f') != wcstring::npos) { if (prompt.find(L'\n') != wcstring::npos || prompt.find(L'\f') != wcstring::npos) {
result = calc_prompt_layout(prompt.c_str()).line_count; result = calc_prompt_layout(prompt.c_str(), UNKNOWN_PROMPT).line_count;
} }
return result; return result;
} }
@ -677,8 +704,9 @@ static bool test_stuff(screen_t *scr)
/// Update the screen to match the desired output. /// Update the screen to match the desired output.
static void s_update(screen_t *scr, const wchar_t *left_prompt, const wchar_t *right_prompt) { static void s_update(screen_t *scr, const wchar_t *left_prompt, const wchar_t *right_prompt) {
// if (test_stuff(scr)) return; // if (test_stuff(scr)) return;
const size_t left_prompt_width = calc_prompt_layout(left_prompt).last_line_width; const size_t left_prompt_width = calc_prompt_layout(left_prompt, LEFT_PROMPT).last_line_width;
const size_t right_prompt_width = calc_prompt_layout(right_prompt).last_line_width; const size_t right_prompt_width =
calc_prompt_layout(right_prompt, RIGHT_PROMPT).last_line_width;
int screen_width = common_get_width(); int screen_width = common_get_width();
@ -914,8 +942,8 @@ static screen_layout_t compute_layout(screen_t *s, size_t screen_width,
const wchar_t *right_prompt = right_prompt_str.c_str(); const wchar_t *right_prompt = right_prompt_str.c_str();
const wchar_t *autosuggestion = autosuggestion_str.c_str(); const wchar_t *autosuggestion = autosuggestion_str.c_str();
prompt_layout_t left_prompt_layout = calc_prompt_layout(left_prompt); prompt_layout_t left_prompt_layout = calc_prompt_layout(left_prompt, LEFT_PROMPT);
prompt_layout_t right_prompt_layout = calc_prompt_layout(right_prompt); prompt_layout_t right_prompt_layout = calc_prompt_layout(right_prompt, RIGHT_PROMPT);
size_t left_prompt_width = left_prompt_layout.last_line_width; size_t left_prompt_width = left_prompt_layout.last_line_width;
size_t right_prompt_width = right_prompt_layout.last_line_width; size_t right_prompt_width = right_prompt_layout.last_line_width;

View file

@ -12,8 +12,11 @@
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <algorithm>
#include <memory> #include <memory>
#include <set>
#include <vector> #include <vector>
#include "common.h" #include "common.h"
#include "highlight.h" #include "highlight.h"
@ -192,4 +195,80 @@ bool screen_force_clear_to_end();
/// Returns the length of an escape code. Exposed for testing purposes only. /// Returns the length of an escape code. Exposed for testing purposes only.
size_t escape_code_length(const wchar_t *code); size_t escape_code_length(const wchar_t *code);
// Maintain a mapping of escape sequences to their length for fast lookup.
class cached_esc_sequences_t {
private:
// Cached escape sequences we've already detected in the prompt and similar strings.
std::set<wcstring> cache;
// The escape sequence lengths we've cached. My original implementation used min and max
// length variables. The cache was then iterated over using a loop like this:
// `for (size_t l = min; l <= max; l++)`.
//
// However that is inefficient when there are big gaps in the lengths. This has been observed
// with the BobTheFish theme which has a bunch of 5 and 6 char sequences and 16 to 19 char
// sequences and almost nothing in between. So instead we keep track of only those escape
// sequence lengths we've actually cached to avoid checking for matches of lengths we know are
// not in our cache.
std::vector<size_t> lengths;
std::map<size_t, size_t> lengths_match_count;
size_t cache_hits;
public:
explicit cached_esc_sequences_t() : cache(), lengths(), lengths_match_count(), cache_hits(0) {}
void add_entry(const wchar_t *entry, size_t len) {
auto str = wcstring(entry, len);
#if 0
// This is a can't happen scenario. I only wrote this to validate during testing that it
// wouldn't be triggered. I'm leaving it in but commented out in case someone feels the need
// to re-enable the check.
auto match = cache.find(str);
if (match != cache.end()) {
debug(0, "unexpected add_entry() call of a value already in the cache: '%ls'",
escape(str.c_str(), ESCAPE_ALL).c_str());
return;
}
#endif
cache.emplace(str);
if (std::find(lengths.begin(), lengths.end(), len) == lengths.end()) {
lengths.push_back(len);
lengths_match_count[len] = 0;
}
}
size_t find_entry(const wchar_t *entry) {
for (auto len : lengths) {
auto match = cache.find(wcstring(entry, len));
if (match != cache.end()) { // we found a matching cached sequence
// Periodically sort the sequence lengths so we check for matches going from the
// most frequently matching lengths to least frequent.
lengths_match_count[len]++;
if (++cache_hits % 1000 == 0) {
// std::sort(lengths.begin(), lengths.end(), custom_cmp(lengths_match_count));
std::sort(lengths.begin(), lengths.end(), [&](size_t l1, size_t l2) {
return lengths_match_count[l1] > lengths_match_count[l2];
});
}
return len; // return the length of the matching cached sequence
}
}
return 0; // no cached sequence matches the entry
}
void clear() {
cache.clear();
lengths.clear();
lengths_match_count.clear();
cache_hits = 0;
}
};
// Singleton that is exposed so that the cache can be invalidated when terminal related variables
// change by calling `cached_esc_sequences.clear()`.
extern cached_esc_sequences_t cached_esc_sequences;
#endif #endif