Set of changes to improve detection of escape sequences for prompt width

computation. Addresses #767
This commit is contained in:
ridiculousfish 2013-09-29 02:48:35 -07:00
parent 0d2af9e742
commit 991c900fc6
3 changed files with 192 additions and 154 deletions

View file

@ -550,6 +550,16 @@ static void test_utils()
if (begin != a + wcslen(L"echo (echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); if (begin != a + wcslen(L"echo (echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__);
} }
static void test_escape_sequences(void)
{
say(L"Testing escape codes");
if (escape_code_length(L"") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
if (escape_code_length(L"abcd") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
if (escape_code_length(L"\x1b[2J") != 4) err(L"test_escape_sequences failed on line %d\n", __LINE__);
if (escape_code_length(L"\x1b[38;5;123mABC") != strlen("\x1b[38;5;123m")) err(L"test_escape_sequences failed on line %d\n", __LINE__);
if (escape_code_length(L"\x1b@") != 2) err(L"test_escape_sequences failed on line %d\n", __LINE__);
}
class lru_node_test_t : public lru_node_t class lru_node_test_t : public lru_node_t
{ {
public: public:
@ -1834,6 +1844,7 @@ int main(int argc, char **argv)
test_fork(); test_fork();
test_parser(); test_parser();
test_utils(); test_utils();
test_escape_sequences();
test_lru(); test_lru();
test_expand(); test_expand();
test_fuzzy_match(); test_fuzzy_match();

View file

@ -92,11 +92,9 @@ public:
specified position of the specified wide character string. All of specified position of the specified wide character string. All of
\c seq must match, but str may be longer than seq. \c seq must match, but str may be longer than seq.
*/ */
static int try_sequence(const char *seq, const wchar_t *str) static size_t try_sequence(const char *seq, const wchar_t *str)
{ {
int i; for (size_t i=0; ; i++)
for (i=0;; i++)
{ {
if (!seq[i]) if (!seq[i])
return i; return i;
@ -121,29 +119,6 @@ static size_t next_tab_stop(size_t in)
return ((in/tab_width)+1)*tab_width; return ((in/tab_width)+1)*tab_width;
} }
// PCA for term256 support, let's just detect the escape codes directly
static int is_term256_escape(const wchar_t *str)
{
// An escape code looks like this: \x1b[38;5;<num>m
// or like this: \x1b[48;5;<num>m
// parse out the required prefix
int len = try_sequence("\x1b[38;5;", str);
if (! len) len = try_sequence("\x1b[48;5;", str);
if (! len) return 0;
// now try parsing out a string of digits
// we need at least one
if (! iswdigit(str[len])) return 0;
while (iswdigit(str[len])) len++;
// look for the terminating m
if (str[len++] != L'm') return 0;
// success
return len;
}
/* Like fish_wcwidth, but returns 0 for control characters instead of -1 */ /* Like fish_wcwidth, but returns 0 for control characters instead of -1 */
static int fish_wcwidth_min_0(wchar_t wc) static int fish_wcwidth_min_0(wchar_t wc)
{ {
@ -157,43 +132,21 @@ static bool allow_soft_wrap(void)
return !! auto_right_margin; return !! auto_right_margin;
} }
/* Information about a prompt layout */
struct prompt_layout_t /* Returns the number of characters in the escape code starting at 'code' (which should initially contain \x1b) */
size_t escape_code_length(const wchar_t *code)
{ {
/* How many lines the prompt consumes */ assert(code != NULL);
size_t line_count;
/* Width of the longest line */ /* The only escape codes we recognize start with \x1b */
size_t max_line_width; if (code[0] != L'\x1b')
return 0;
/* Width of the last line */ size_t resulting_length = 0;
size_t last_line_width;
};
/**
Calculate layout information for the given prompt. Does some clever magic
to detect common escape sequences that may be embeded in a prompt,
such as color codes.
*/
static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
{
size_t current_line_width = 0;
size_t j, k;
prompt_layout_t prompt_layout = {};
prompt_layout.line_count = 1;
for (j=0; prompt[j]; j++)
{
if (prompt[j] == L'\x1b')
{
/*
This is the start of an escape code. Try to guess its width.
*/
size_t p;
int len=0;
bool found = false; bool found = false;
if (cur_term != NULL)
{
/* /*
Detect these terminfo color escapes with parameter Detect these terminfo color escapes with parameter
value 0..7, all of which don't move the cursor value 0..7, all of which don't move the cursor
@ -204,9 +157,28 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
set_a_background, set_a_background,
set_foreground, set_foreground,
set_background, set_background,
} };
;
for (size_t p=0; p < sizeof esc / sizeof *esc && !found; p++)
{
if (!esc[p])
continue;
for (size_t k=0; k<8; k++)
{
size_t len = try_sequence(tparm(esc[p],k), code);
if (len)
{
resulting_length = len;
found = true;
break;
}
}
}
}
if (cur_term != NULL)
{
/* /*
Detect these semi-common terminfo escapes without any Detect these semi-common terminfo escapes without any
parameter values, all of which don't move the cursor parameter values, all of which don't move the cursor
@ -233,39 +205,11 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
enter_standout_mode, enter_standout_mode,
exit_standout_mode, exit_standout_mode,
enter_secure_mode enter_secure_mode
} };
;
for (p=0; p < sizeof esc / sizeof *esc && !found; p++)
{
if (!esc[p])
continue;
for (k=0; k<8; k++)
{
len = try_sequence(tparm(esc[p],k), &prompt[j]);
if (len)
{
j += (len-1);
found = true;
break;
}
}
}
/* PCA for term256 support, let's just detect the escape codes directly */
if (! found)
{
len = is_term256_escape(&prompt[j]);
if (len)
{
j += (len - 1);
found = true;
}
}
for (p=0; p < (sizeof(esc2)/sizeof(char *)) && !found; p++)
for (size_t p=0; p < sizeof esc2 / sizeof *esc2 && !found; p++)
{ {
if (!esc2[p]) if (!esc2[p])
continue; continue;
@ -274,47 +218,128 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
be safe. Most versions of tparm don't actually be safe. Most versions of tparm don't actually
seem to do anything these days. seem to do anything these days.
*/ */
len = maxi(try_sequence(tparm(esc2[p]), &prompt[j]), size_t len = maxi(try_sequence(tparm(esc2[p]), code), try_sequence(esc2[p], code));
try_sequence(esc2[p], &prompt[j]));
if (len) if (len)
{ {
j += (len-1); resulting_length = len;
found = true;
}
}
}
if (!found)
{
if (code[1] == L'k')
{
/* This looks like the escape sequence for setting a screen name */
const env_var_t term_name = env_get_string(L"TERM");
if (!term_name.missing() && string_prefixes_string(L"screen", term_name))
{
const wchar_t * const screen_name_end_sentinel = L"\x1b\\";
const wchar_t *screen_name_end = wcsstr(&code[2], screen_name_end_sentinel);
if (screen_name_end != NULL)
{
const wchar_t *escape_sequence_end = screen_name_end + wcslen(screen_name_end_sentinel);
resulting_length = escape_sequence_end - code;
}
else
{
/* Consider just <esc>k to be the code */
resulting_length = 2;
}
found = true;
}
}
}
if (! found)
{
/* Generic VT100 one byte sequence: CSI followed by something in the range @ through _ */
if (code[1] == L'[' && (code[2] >= L'@' && code[2] <= L'_'))
{
resulting_length = 3;
found = true; found = true;
} }
} }
if (! found) if (! found)
{ {
if (prompt[j+1] == L'k') /* Generic VT100 CSI-style sequence. <esc>, followed by zero or more ASCII characters NOT in the range [@,_], followed by one character in that range */
if (code[1] == L'[')
{ {
const env_var_t term_name = env_get_string(L"TERM"); // Start at 2 to skip over <esc>[
if (!term_name.missing() && string_prefixes_string(L"screen", term_name)) size_t cursor = 2;
for (; code[cursor] != L'\0'; cursor++)
{ {
const wchar_t *end; /* Consume a sequence of ASCII characters not in the range [@, ~] */
j+=2; wchar_t c = code[cursor];
found = true;
end = wcsstr(&prompt[j], L"\x1b\\"); /* If we're not in ASCII, just stop */
if (end) if (c > 127)
{ break;
/*
You'd thing this should be /* If we're the end character, then consume it and then stop */
'(end-prompt)+2', in order to move j if (c >= L'@' && c <= L'~')
past the end of the string, but there is
a 'j++' at the end of each lap, so j
should always point to the last menged
character, e.g. +1.
*/
j = (end-prompt)+1;
}
else
{ {
cursor++;
break; break;
} }
} }
/* curs now indexes just beyond the end of the sequence (or at the terminating zero) */
found = true;
resulting_length = cursor;
} }
} }
if (! found)
{
/* Generic VT100 two byte sequence: <esc> followed by something in the range @ through _ */
if (code[1] >= L'@' && code[1] <= L'_')
{
resulting_length = 2;
found = true;
}
}
return resulting_length;
}
/* Information about a prompt layout */
struct prompt_layout_t
{
/* How many lines the prompt consumes */
size_t line_count;
/* Width of the longest line */
size_t max_line_width;
/* Width of the last line */
size_t last_line_width;
};
/**
Calculate layout information for the given prompt. Does some clever magic
to detect common escape sequences that may be embeded in a prompt,
such as color codes.
*/
static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
{
size_t current_line_width = 0;
size_t j;
prompt_layout_t prompt_layout = {};
prompt_layout.line_count = 1;
for (j=0; prompt[j]; j++)
{
if (prompt[j] == L'\x1b')
{
/* This is the start of an escape code. Skip over it if it's at least one character long. */
size_t escape_len = escape_code_length(&prompt[j]);
if (escape_len > 0)
{
j += escape_len - 1;
}
} }
else if (prompt[j] == L'\t') else if (prompt[j] == L'\t')
{ {

View file

@ -227,5 +227,7 @@ enum screen_reset_mode_t
void s_reset(screen_t *s, screen_reset_mode_t mode); void s_reset(screen_t *s, screen_reset_mode_t mode);
/* Returns the length of an escape code. Exposed for testing purposes only. */
size_t escape_code_length(const wchar_t *code);
#endif #endif