Set of changes to improve detection of escape sequences for prompt width

computation. Addresses #767
2024-12-26 12:53:13 +00:00 · 2013-09-29 02:48:35 -07:00 · 2013-09-29 02:48:35 -07:00 · 991c900fc6
commit 991c900fc6
parent 0d2af9e742
3 changed files with 192 additions and 154 deletions
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -550,6 +550,16 @@ static void test_utils()
    if (begin != a + wcslen(L"echo (echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__);
 }

+static void test_escape_sequences(void)
+{
+    say(L"Testing escape codes");
+    if (escape_code_length(L"") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
+    if (escape_code_length(L"abcd") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
+    if (escape_code_length(L"\x1b[2J") != 4) err(L"test_escape_sequences failed on line %d\n", __LINE__);
+    if (escape_code_length(L"\x1b[38;5;123mABC") != strlen("\x1b[38;5;123m")) err(L"test_escape_sequences failed on line %d\n", __LINE__);
+    if (escape_code_length(L"\x1b@") != 2) err(L"test_escape_sequences failed on line %d\n", __LINE__);
+}
+
 class lru_node_test_t : public lru_node_t
 {
 public:
@ -1834,6 +1844,7 @@ int main(int argc, char **argv)
    test_fork();
    test_parser();
    test_utils();
+    test_escape_sequences();
    test_lru();
    test_expand();
    test_fuzzy_match();
--- a/screen.cpp
+++ b/screen.cpp
@ -92,11 +92,9 @@ public:
   specified position of the specified wide character string. All of
   \c seq must match, but str may be longer than seq.
 */
-static int try_sequence(const char *seq, const wchar_t *str)
+static size_t try_sequence(const char *seq, const wchar_t *str)
 {
-    int i;
-
-    for (i=0;; i++)
+    for (size_t i=0; ; i++)
    {
        if (!seq[i])
            return i;
@ -121,29 +119,6 @@ static size_t next_tab_stop(size_t in)
    return ((in/tab_width)+1)*tab_width;
 }

-// PCA for term256 support, let's just detect the escape codes directly
-static int is_term256_escape(const wchar_t *str)
-{
-    // An escape code looks like this: \x1b[38;5;<num>m
-    // or like this: \x1b[48;5;<num>m
-
-    // parse out the required prefix
-    int len = try_sequence("\x1b[38;5;", str);
-    if (! len) len = try_sequence("\x1b[48;5;", str);
-    if (! len) return 0;
-
-    // now try parsing out a string of digits
-    // we need at least one
-    if (! iswdigit(str[len])) return 0;
-    while (iswdigit(str[len])) len++;
-
-    // look for the terminating m
-    if (str[len++] != L'm') return 0;
-
-    // success
-    return len;
-}
-
 /* Like fish_wcwidth, but returns 0 for control characters instead of -1 */
 static int fish_wcwidth_min_0(wchar_t wc)
 {
@ -157,43 +132,21 @@ static bool allow_soft_wrap(void)
    return !! auto_right_margin;
 }

-/* Information about a prompt layout */
-struct prompt_layout_t
+
+/* Returns the number of characters in the escape code starting at 'code' (which should initially contain \x1b) */
+size_t escape_code_length(const wchar_t *code)
 {
-    /* How many lines the prompt consumes */
-    size_t line_count;
+    assert(code != NULL);
    
-    /* Width of the longest line */
-    size_t max_line_width;
+    /* The only escape codes we recognize start with \x1b */
+    if (code[0] != L'\x1b')
+        return 0;
    
-    /* Width of the last line */
-    size_t last_line_width;
-};
-
-/**
-   Calculate layout information for the given prompt. Does some clever magic
-   to detect common escape sequences that may be embeded in a prompt,
-   such as color codes.
-*/
-static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
-{
-    size_t current_line_width = 0;
-    size_t j, k;
-
-    prompt_layout_t prompt_layout = {};
-    prompt_layout.line_count = 1;
-
-    for (j=0; prompt[j]; j++)
-    {
-        if (prompt[j] == L'\x1b')
-        {
-            /*
-             This is the start of an escape code. Try to guess its width.
-             */
-            size_t p;
-            int len=0;
+    size_t resulting_length = 0;
    bool found = false;
    
+    if (cur_term != NULL)
+    {
        /*
         Detect these terminfo color escapes with parameter
         value 0..7, all of which don't move the cursor
@ -204,9 +157,28 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
            set_a_background,
            set_foreground,
            set_background,
-            }
-            ;
+        };
    
+        for (size_t p=0; p < sizeof esc / sizeof *esc && !found; p++)
+        {
+            if (!esc[p])
+                continue;
+            
+            for (size_t k=0; k<8; k++)
+            {
+                size_t len = try_sequence(tparm(esc[p],k), code);
+                if (len)
+                {
+                    resulting_length = len;
+                    found = true;
+                    break;
+                }
+            }
+        }
+    }
+    
+    if (cur_term != NULL)
+    {
        /*
         Detect these semi-common terminfo escapes without any
         parameter values, all of which don't move the cursor
@ -233,39 +205,11 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
            enter_standout_mode,
            exit_standout_mode,
            enter_secure_mode
-            }
-            ;
-
-            for (p=0; p < sizeof esc / sizeof *esc && !found; p++)
-            {
-                if (!esc[p])
-                    continue;
-
-                for (k=0; k<8; k++)
-                {
-                    len = try_sequence(tparm(esc[p],k), &prompt[j]);
-                    if (len)
-                    {
-                        j += (len-1);
-                        found = true;
-                        break;
-                    }
-                }
-            }
-
-            /* PCA for term256 support, let's just detect the escape codes directly */
-            if (! found)
-            {
-                len = is_term256_escape(&prompt[j]);
-                if (len)
-                {
-                    j += (len - 1);
-                    found = true;
-                }
-            }
+        };
        
    
-            for (p=0; p < (sizeof(esc2)/sizeof(char *)) && !found; p++)
+    
+        for (size_t p=0; p < sizeof esc2 / sizeof *esc2 && !found; p++)
        {
            if (!esc2[p])
                continue;
@ -274,47 +218,128 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
             be safe. Most versions of tparm don't actually
             seem to do anything these days.
             */
-                len = maxi(try_sequence(tparm(esc2[p]), &prompt[j]),
-                           try_sequence(esc2[p], &prompt[j]));
-
+            size_t len = maxi(try_sequence(tparm(esc2[p]), code), try_sequence(esc2[p], code));
            if (len)
            {
-                    j += (len-1);
+                resulting_length = len;
+                found = true;
+            }
+        }
+    }
+    
+    if (!found)
+    {
+        if (code[1] == L'k')
+        {
+            /* This looks like the escape sequence for setting a screen name */
+            const env_var_t term_name = env_get_string(L"TERM");
+            if (!term_name.missing() && string_prefixes_string(L"screen", term_name))
+            {
+                const wchar_t * const screen_name_end_sentinel = L"\x1b\\";
+                const wchar_t *screen_name_end = wcsstr(&code[2], screen_name_end_sentinel);
+                if (screen_name_end != NULL)
+                {
+                    const wchar_t *escape_sequence_end = screen_name_end + wcslen(screen_name_end_sentinel);
+                    resulting_length = escape_sequence_end - code;
+                }
+                else
+                {
+                    /* Consider just <esc>k to be the code */
+                    resulting_length = 2;
+                }
+                found = true;
+            }
+        }
+    }
+    
+    if (! found)
+    {
+        /* Generic VT100 one byte sequence: CSI followed by something in the range @ through _ */
+        if (code[1] == L'[' && (code[2] >= L'@' && code[2] <= L'_'))
+        {
+            resulting_length = 3;
            found = true;
        }
    }
    
    if (! found)
    {
-                if (prompt[j+1] == L'k')
+        /* Generic VT100 CSI-style sequence. <esc>, followed by zero or more ASCII characters NOT in the range [@,_], followed by one character in that range */
+        if (code[1] == L'[')
        {
-                    const env_var_t term_name = env_get_string(L"TERM");
-                    if (!term_name.missing() && string_prefixes_string(L"screen", term_name))
+            // Start at 2 to skip over <esc>[
+            size_t cursor = 2;
+            for (; code[cursor] != L'\0'; cursor++)
            {
-                        const wchar_t *end;
-                        j+=2;
-                        found = true;
-                        end = wcsstr(&prompt[j], L"\x1b\\");
-                        if (end)
-                        {
-                            /*
-                             You'd thing this should be
-                             '(end-prompt)+2', in order to move j
-                             past the end of the string, but there is
-                             a 'j++' at the end of each lap, so j
-                             should always point to the last menged
-                             character, e.g. +1.
-                             */
-                            j = (end-prompt)+1;
-                        }
-                        else
+                /* Consume a sequence of ASCII characters not in the range [@, ~] */
+                wchar_t c = code[cursor];
+                
+                /* If we're not in ASCII, just stop */
+                if (c > 127)
+                    break;
+                
+                /* If we're the end character, then consume it and then stop */
+                if (c >= L'@' && c <= L'~')
                {
+                    cursor++;
                    break;
                }
            }
+            /* curs now indexes just beyond the end of the sequence (or at the terminating zero) */
+            found = true;
+            resulting_length = cursor;
        }
    }
    
+    if (! found)
+    {
+        /* Generic VT100 two byte sequence: <esc> followed by something in the range @ through _ */
+        if (code[1] >= L'@' && code[1] <= L'_')
+        {
+            resulting_length = 2;
+            found = true;
+        }
+    }
+    
+    return resulting_length;
+}
+
+/* Information about a prompt layout */
+struct prompt_layout_t
+{
+    /* How many lines the prompt consumes */
+    size_t line_count;
+
+    /* Width of the longest line */
+    size_t max_line_width;
+
+    /* Width of the last line */
+    size_t last_line_width;
+};
+
+/**
+   Calculate layout information for the given prompt. Does some clever magic
+   to detect common escape sequences that may be embeded in a prompt,
+   such as color codes.
+*/
+static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
+{
+    size_t current_line_width = 0;
+    size_t j;
+
+    prompt_layout_t prompt_layout = {};
+    prompt_layout.line_count = 1;
+
+    for (j=0; prompt[j]; j++)
+    {
+        if (prompt[j] == L'\x1b')
+        {
+            /* This is the start of an escape code. Skip over it if it's at least one character long. */
+            size_t escape_len = escape_code_length(&prompt[j]);
+            if (escape_len > 0)
+            {
+                j += escape_len - 1;
+            }
        }
        else if (prompt[j] == L'\t')
        {
--- a/screen.h
+++ b/screen.h
@ -227,5 +227,7 @@ enum screen_reset_mode_t

 void s_reset(screen_t *s, screen_reset_mode_t mode);

+/* Returns the length of an escape code. Exposed for testing purposes only. */
+size_t escape_code_length(const wchar_t *code);

 #endif