From 345a52862587210477f8e5a9a41c21c26a265009 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 6 Jul 2012 14:34:53 -0700 Subject: [PATCH] Untangle unescaping responsibilities in highlight.cpp. Fix cd autosuggestions to properly handle quotes. Factor out some of the quote unescaping behavior from reader.cpp to parse_util.cpp. Add some autosuggestion tests --- common.cpp | 26 +++---- common.h | 24 ++++--- fish_tests.cpp | 113 ++++++++++++++++++++++++----- highlight.cpp | 61 ++++++++++------ highlight.h | 12 +++- parse_util.cpp | 147 ++++++++++++++++++++++++++++++++++++++ parse_util.h | 15 ++++ reader.cpp | 190 +++++-------------------------------------------- 8 files changed, 352 insertions(+), 236 deletions(-) diff --git a/common.cpp b/common.cpp index b6639e100..437a022e9 100644 --- a/common.cpp +++ b/common.cpp @@ -898,13 +898,13 @@ static wchar_t *escape_simple( const wchar_t *in ) return out; } -wchar_t *escape( const wchar_t *in_orig, - int flags ) +wchar_t *escape( const wchar_t *in_orig, escape_flags_t flags ) { const wchar_t *in = in_orig; - int escape_all = flags & ESCAPE_ALL; - int no_quoted = flags & ESCAPE_NO_QUOTED; + bool escape_all = !! (flags & ESCAPE_ALL); + bool no_quoted = !! (flags & ESCAPE_NO_QUOTED); + bool no_tilde = !! (flags & ESCAPE_NO_TILDE); wchar_t *out; wchar_t *pos; @@ -955,8 +955,8 @@ wchar_t *escape( const wchar_t *in_orig, } else { - - switch( *in ) + wchar_t c = *in; + switch( c ) { case L'\t': *(pos++) = L'\\'; @@ -1020,9 +1020,12 @@ wchar_t *escape( const wchar_t *in_orig, case L'%': case L'~': { - need_escape=1; - if( escape_all ) - *pos++ = L'\\'; + if (! no_tilde || c != L'~') + { + need_escape=1; + if( escape_all ) + *pos++ = L'\\'; + } *pos++ = *in; break; } @@ -1076,14 +1079,13 @@ wchar_t *escape( const wchar_t *in_orig, return out; } -wcstring escape_string( const wcstring &in, int escape_all ) { - wchar_t *tmp = escape(in.c_str(), escape_all); +wcstring escape_string( const wcstring &in, escape_flags_t flags ) { + wchar_t *tmp = escape(in.c_str(), flags); wcstring result(tmp); free(tmp); return result; } - wchar_t *unescape( const wchar_t * orig, int flags ) { diff --git a/common.h b/common.h index 5cf541713..774139a5b 100644 --- a/common.h +++ b/common.h @@ -63,14 +63,18 @@ typedef std::vector wcstring_list_t; */ #define UNESCAPE_INCOMPLETE 2 -/** - Escape all characters, including magic characters like the semicolon - */ -#define ESCAPE_ALL 1 -/** - Do not try to use 'simplified' quoted escapes, and do not use empty quotes as the empty string - */ -#define ESCAPE_NO_QUOTED 2 +/* Flags for the escape() and escape_string() functions */ +enum { + /** Escape all characters, including magic characters like the semicolon */ + ESCAPE_ALL = 1 << 0, + + /** Do not try to use 'simplified' quoted escapes, and do not use empty quotes as the empty string */ + ESCAPE_NO_QUOTED = 1 << 1, + + /** Do not escape tildes */ + ESCAPE_NO_TILDE = 1 << 2 +}; +typedef unsigned int escape_flags_t; /** Helper macro for errors @@ -639,8 +643,8 @@ void debug( int level, const wchar_t *msg, ... ); \return The escaped string, or 0 if there is not enough memory */ -wchar_t *escape( const wchar_t *in, int escape_all ); -wcstring escape_string( const wcstring &in, int escape_all ); +wchar_t *escape( const wchar_t *in, escape_flags_t flags ); +wcstring escape_string( const wcstring &in, escape_flags_t flags ); /** Expand backslashed escapes and substitute them with their unescaped diff --git a/fish_tests.cpp b/fish_tests.cpp index efd699c39..0e8591d2e 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -59,6 +59,8 @@ #include "iothread.h" #include "postfork.h" #include "signal.h" +#include "highlight.h" + /** The number of tests to run */ @@ -606,24 +608,24 @@ static void test_is_potential_path() const wcstring_list_t wds(1, wd); wcstring tmp; - assert(is_potential_path(L"al", wds, true, &tmp) && tmp == L"alpha/"); - assert(is_potential_path(L"alpha/", wds, true, &tmp) && tmp == L"alpha/"); - assert(is_potential_path(L"aard", wds, false, &tmp) && tmp == L"aardvark"); + assert(is_potential_path(L"al", wds, PATH_REQUIRE_DIR, &tmp) && tmp == L"alpha/"); + assert(is_potential_path(L"alpha/", wds, PATH_REQUIRE_DIR, &tmp) && tmp == L"alpha/"); + assert(is_potential_path(L"aard", wds, 0, &tmp) && tmp == L"aardvark"); - assert(! is_potential_path(L"balpha/", wds, true, &tmp)); - assert(! is_potential_path(L"aard", wds, true, &tmp)); - assert(! is_potential_path(L"aarde", wds, true, &tmp)); - assert(! is_potential_path(L"aarde", wds, false, &tmp)); + assert(! is_potential_path(L"balpha/", wds, PATH_REQUIRE_DIR, &tmp)); + assert(! is_potential_path(L"aard", wds, PATH_REQUIRE_DIR, &tmp)); + assert(! is_potential_path(L"aarde", wds, PATH_REQUIRE_DIR, &tmp)); + assert(! is_potential_path(L"aarde", wds, 0, &tmp)); - assert(is_potential_path(L"/tmp/is_potential_path_test/aardvark", wds, false, &tmp) && tmp == L"/tmp/is_potential_path_test/aardvark"); - assert(is_potential_path(L"/tmp/is_potential_path_test/al", wds, true, &tmp) && tmp == L"/tmp/is_potential_path_test/alpha/"); - assert(is_potential_path(L"/tmp/is_potential_path_test/aardv", wds, false, &tmp) && tmp == L"/tmp/is_potential_path_test/aardvark"); + assert(is_potential_path(L"/tmp/is_potential_path_test/aardvark", wds, 0, &tmp) && tmp == L"/tmp/is_potential_path_test/aardvark"); + assert(is_potential_path(L"/tmp/is_potential_path_test/al", wds, PATH_REQUIRE_DIR, &tmp) && tmp == L"/tmp/is_potential_path_test/alpha/"); + assert(is_potential_path(L"/tmp/is_potential_path_test/aardv", wds, 0, &tmp) && tmp == L"/tmp/is_potential_path_test/aardvark"); - assert(! is_potential_path(L"/tmp/is_potential_path_test/aardvark", wds, true, &tmp)); - assert(! is_potential_path(L"/tmp/is_potential_path_test/al/", wds, false, &tmp)); - assert(! is_potential_path(L"/tmp/is_potential_path_test/ar", wds, false, &tmp)); + assert(! is_potential_path(L"/tmp/is_potential_path_test/aardvark", wds, PATH_REQUIRE_DIR, &tmp)); + assert(! is_potential_path(L"/tmp/is_potential_path_test/al/", wds, 0, &tmp)); + assert(! is_potential_path(L"/tmp/is_potential_path_test/ar", wds, 0, &tmp)); - assert(is_potential_path(L"/usr", wds, true, &tmp) && tmp == L"/usr/"); + assert(is_potential_path(L"/usr", wds, PATH_REQUIRE_DIR, &tmp) && tmp == L"/usr/"); } @@ -727,9 +729,84 @@ static void test_colors() assert(rgb_color_t(L"mooganta").is_none()); } -/* Testing autosuggestion */ -static void test_autosuggest() { - bool autosuggest_special_validate_from_history(const wcstring &str, const wcstring &working_directory, bool *outSuggestionOK); +static void perform_one_autosuggestion_test(const wcstring &command, const wcstring &wd, const wcstring &expected, long line) +{ + wcstring suggestion; + bool success = autosuggest_suggest_special(command, wd, suggestion); + if (! success) + { + printf("line %ld: autosuggest_suggest_special() failed for command %ls\n", line, command.c_str()); + assert(success); + } + if (suggestion != expected) + { + printf("line %ld: autosuggest_suggest_special() returned the wrong expected string for command %ls\n", line, command.c_str()); + printf(" actual: %ls\n", suggestion.c_str()); + printf("expected: %ls\n", expected.c_str()); + assert(suggestion == expected); + } +} + +/* Testing test_autosuggest_suggest_special, in particular for properly handling quotes and backslashes */ +static void test_autosuggest_suggest_special() { + if (system("mkdir -p '/tmp/autosuggest_test/0foobar'")) err(L"mkdir failed"); + if (system("mkdir -p '/tmp/autosuggest_test/1foo bar'")) err(L"mkdir failed"); + if (system("mkdir -p '/tmp/autosuggest_test/2foo bar'")) err(L"mkdir failed"); + if (system("mkdir -p '/tmp/autosuggest_test/3foo\\bar'")) err(L"mkdir failed"); + if (system("mkdir -p /tmp/autosuggest_test/4foo\\'bar")) err(L"mkdir failed"); //a path with a single quote + if (system("mkdir -p /tmp/autosuggest_test/5foo\\\"bar")) err(L"mkdir failed"); //a path with a double quote + if (system("mkdir -p ~/test_autosuggest_suggest_special/")) err(L"mkdir failed"); //make sure tilde is handled + + const wcstring wd = L"/tmp/autosuggest_test/"; + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/0", wd, L"cd /tmp/autosuggest_test/0foobar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/0", wd, L"cd \"/tmp/autosuggest_test/0foobar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/0", wd, L"cd '/tmp/autosuggest_test/0foobar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 0", wd, L"cd 0foobar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"0", wd, L"cd \"0foobar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '0", wd, L"cd '0foobar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/1", wd, L"cd /tmp/autosuggest_test/1foo\\ bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/1", wd, L"cd \"/tmp/autosuggest_test/1foo bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/1", wd, L"cd '/tmp/autosuggest_test/1foo bar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 1", wd, L"cd 1foo\\ bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"1", wd, L"cd \"1foo bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '1", wd, L"cd '1foo bar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/2", wd, L"cd /tmp/autosuggest_test/2foo\\ \\ bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/2", wd, L"cd \"/tmp/autosuggest_test/2foo bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/2", wd, L"cd '/tmp/autosuggest_test/2foo bar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 2", wd, L"cd 2foo\\ \\ bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"2", wd, L"cd \"2foo bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '2", wd, L"cd '2foo bar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/3", wd, L"cd /tmp/autosuggest_test/3foo\\\\bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/3", wd, L"cd \"/tmp/autosuggest_test/3foo\\bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/3", wd, L"cd '/tmp/autosuggest_test/3foo\\bar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 3", wd, L"cd 3foo\\\\bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"3", wd, L"cd \"3foo\\bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '3", wd, L"cd '3foo\\bar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/4", wd, L"cd /tmp/autosuggest_test/4foo\\'bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/4", wd, L"cd \"/tmp/autosuggest_test/4foo'bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/4", wd, L"cd '/tmp/autosuggest_test/4foo\\'bar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 4", wd, L"cd 4foo\\'bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"4", wd, L"cd \"4foo'bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '4", wd, L"cd '4foo\\'bar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd /tmp/autosuggest_test/5", wd, L"cd /tmp/autosuggest_test/5foo\\\"bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"/tmp/autosuggest_test/5", wd, L"cd \"/tmp/autosuggest_test/5foo\\\"bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '/tmp/autosuggest_test/5", wd, L"cd '/tmp/autosuggest_test/5foo\"bar/'", __LINE__); + perform_one_autosuggestion_test(L"cd 5", wd, L"cd 5foo\\\"bar/", __LINE__); + perform_one_autosuggestion_test(L"cd \"5", wd, L"cd \"5foo\\\"bar/\"", __LINE__); + perform_one_autosuggestion_test(L"cd '5", wd, L"cd '5foo\"bar/'", __LINE__); + + perform_one_autosuggestion_test(L"cd ~/test_autosuggest_suggest_specia", wd, L"cd ~/test_autosuggest_suggest_special/", __LINE__); + + // A single quote should defeat tilde expansion + perform_one_autosuggestion_test(L"cd '~/test_autosuggest_suggest_specia'", wd, L"", __LINE__); + + system("rm -Rf '/tmp/autosuggest_test/'"); + system("rm -Rf ~/test_autosuggest_suggest_special/"); } @@ -1134,7 +1211,7 @@ int main( int argc, char **argv ) test_path(); test_is_potential_path(); test_colors(); - test_autosuggest(); + test_autosuggest_suggest_special(); history_tests_t::test_history(); history_tests_t::test_history_merge(); history_tests_t::test_history_formats(); diff --git a/highlight.cpp b/highlight.cpp index 549159c5c..bc811e71e 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -127,28 +127,28 @@ bool fs_is_case_insensitive(const wcstring &path, int fd, case_sensitivity_cache } /* Tests whether the specified string cpath is the prefix of anything we could cd to. directories is a list of possible parent directories (typically either the working directory, or the cdpath). This does I/O! + + We expect the path to already be unescaped. */ -bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, bool require_dir, wcstring *out_path) +bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path) { ASSERT_IS_BACKGROUND_THREAD(); - const wchar_t *unescaped, *in; + const bool require_dir = !! (flags & PATH_REQUIRE_DIR); wcstring clean_path; int has_magic = 0; bool result = false; wcstring path(const_path); - expand_tilde(path); - if (! unescape_string(path, 1)) - return false; - - unescaped = path.c_str(); + if (flags & PATH_EXPAND_TILDE) + expand_tilde(path); // debug( 1, L"%ls -> %ls ->%ls", path, tilde, unescaped ); - for( in = unescaped; *in; in++ ) + for( size_t i=0; i < path.size(); i++) { - switch( *in ) + wchar_t c = path.at(i); + switch( c ) { case PROCESS_EXPAND: case VARIABLE_EXPAND: @@ -171,7 +171,7 @@ bool is_potential_path(const wcstring &const_path, const wcstring_list_t &direct default: { - clean_path.append(in, 1); + clean_path.push_back(c); break; } @@ -274,8 +274,8 @@ bool is_potential_path(const wcstring &const_path, const wcstring_list_t &direct } -/* Given a string, return whether it prefixes a path that we could cd into. Return that path in out_path */ -static bool is_potential_cd_path(const wcstring &path, const wcstring &working_directory, wcstring *out_path) { +/* Given a string, return whether it prefixes a path that we could cd into. Return that path in out_path. Expects path to be unescaped. */ +static bool is_potential_cd_path(const wcstring &path, const wcstring &working_directory, path_flags_t flags, wcstring *out_path) { wcstring_list_t directories; if (string_prefixes_string(L"./", path)) { @@ -298,7 +298,7 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d } /* Call is_potential_path with all of these directories */ - bool result = is_potential_path(path, directories, true /* require_dir */, out_path); + bool result = is_potential_path(path, directories, flags | PATH_REQUIRE_DIR, out_path); #if 0 if (out_path) { printf("%ls -> %ls\n", path.c_str(), out_path->c_str()); @@ -671,7 +671,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command bool had_cmd = false; tokenizer tok; - for (tok_init( &tok, str.c_str(), TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok)) + for (tok_init( &tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok)) { int last_type = tok_last_type(&tok); @@ -681,7 +681,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command { if( had_cmd ) { - /* Parameter to the command */ + /* Parameter to the command. We store these escaped. */ args.push_back(tok_last(&tok)); arg_pos = tok_get_pos(&tok); } @@ -776,6 +776,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command } +/* We have to return an escaped string here */ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &outSuggestion) { if (str.empty()) return false; @@ -792,18 +793,34 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di bool result = false; if (parsed_command == L"cd" && ! parsed_arguments.empty()) { /* We can possibly handle this specially */ - wcstring dir = parsed_arguments.back(); + const wcstring escaped_dir = parsed_arguments.back(); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ result = true; outSuggestion.clear(); + + /* Unescape the parameter */ + wcstring unescaped_dir = escaped_dir; + bool unescaped = unescape_string(unescaped_dir, UNESCAPE_INCOMPLETE); + + /* Determine the quote type we got from the input directory. */ + wchar_t quote = L'\0'; + parse_util_get_parameter_info(escaped_dir, 0, "e, NULL, NULL); + + /* Big hack to avoid expanding a tilde inside quotes */ + path_flags_t path_flags = (quote == L'\0') ? PATH_EXPAND_TILDE : 0; + if (unescaped && is_potential_cd_path(unescaped_dir, working_directory, path_flags, &suggested_path)) { + + /* Note: this looks really wrong for strings that have an "unescapable" character in them, e.g. a \t, because parse_util_escape_string_with_quote will insert that character */ + wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); - if (is_potential_cd_path(dir, working_directory, &suggested_path)) { - /* Success */ + /* Return it */ outSuggestion = str; outSuggestion.erase(parsed_last_arg_pos); - outSuggestion.append(suggested_path); + if (quote != L'\0') outSuggestion.push_back(quote); + outSuggestion.append(escaped_suggested_path); + if (quote != L'\0') outSuggestion.push_back(quote); } } else { /* Either an error or some other command, so we don't handle it specially */ @@ -945,7 +962,7 @@ static void tokenize( const wchar_t * const buff, std::vector &color, const if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { int is_help = string_prefixes_string(dir, L"--help") || string_prefixes_string(dir, L"-h"); - if( !is_help && ! is_potential_cd_path(dir, working_directory, NULL)) + if( !is_help && ! is_potential_cd_path(dir, working_directory, PATH_EXPAND_TILDE, NULL)) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; } @@ -1339,9 +1356,9 @@ void highlight_shell( const wcstring &buff, std::vector &color, int pos, wc parse_util_token_extent( cbuff, pos, &tok_begin, &tok_end, 0, 0 ); if( tok_begin && tok_end ) { - const wcstring token(tok_begin, tok_end-tok_begin); + wcstring token(tok_begin, tok_end-tok_begin); const wcstring_list_t working_directory_list(1, working_directory); - if (is_potential_path(token, working_directory_list)) + if (unescape_string(token, 1) && is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE)) { for( ptrdiff_t i=tok_begin-cbuff; i < (tok_end-cbuff); i++ ) { diff --git a/highlight.h b/highlight.h index 339929cf3..7e30fe086 100644 --- a/highlight.h +++ b/highlight.h @@ -114,7 +114,7 @@ rgb_color_t highlight_get_color( int highlight, bool is_background ); */ bool autosuggest_validate_from_history(const history_item_t &item, file_detection_context_t &detector, const wcstring &working_directory, const env_vars &vars); -/** Given the command line contents 'str', return via reference a suggestion by specially recognizing the command. Returns true if we recognized the command (even if we couldn't think of a suggestion for it). +/** Given the command line contents 'str', return via reference a suggestion by specially recognizing the command. The suggestion is escaped. Returns true if we recognized the command (even if we couldn't think of a suggestion for it). */ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &outString); @@ -122,7 +122,15 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di This is used only internally to this file, and is exposed only for testing. */ -bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, bool require_dir = false, wcstring *out_path = NULL); +enum { + /* The path must be to a directory */ + PATH_REQUIRE_DIR = 1 << 0, + + /* Expand any leading tilde in the path */ + PATH_EXPAND_TILDE = 1 << 1 +}; +typedef unsigned int path_flags_t; +bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path = NULL); #endif diff --git a/parse_util.cpp b/parse_util.cpp index e474b4df4..47c99b1a1 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -708,3 +708,150 @@ wchar_t *parse_util_unescape_wildcards( const wchar_t *str ) return unescaped; } + +/** + Find the outermost quoting style of current token. Returns 0 if + token is not quoted. + +*/ +static wchar_t get_quote( const wchar_t *cmd, int len ) +{ + int i=0; + wchar_t res=0; + + while( 1 ) + { + if( !cmd[i] ) + break; + + if( cmd[i] == L'\\' ) + { + i++; + if( !cmd[i] ) + break; + i++; + } + else + { + if( cmd[i] == L'\'' || cmd[i] == L'\"' ) + { + const wchar_t *end = quote_end( &cmd[i] ); + //fwprintf( stderr, L"Jump %d\n", end-cmd ); + if(( end == 0 ) || (!*end) || (end-cmd > len)) + { + res = cmd[i]; + break; + } + i = end-cmd+1; + } + else + i++; + } + } + + return res; +} + +void parse_util_get_parameter_info( const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, int *type ) +{ + size_t prev_pos=0; + wchar_t last_quote = '\0'; + int unfinished; + + tokenizer tok; + tok_init( &tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS ); + + for( ; tok_has_next( &tok ); tok_next( &tok ) ) + { + if( tok_get_pos( &tok ) > pos ) + break; + + if( tok_last_type( &tok ) == TOK_STRING ) + last_quote = get_quote( tok_last( &tok ), + pos - tok_get_pos( &tok ) ); + + if( type != NULL ) + *type = tok_last_type( &tok ); + + prev_pos = tok_get_pos( &tok ); + } + + tok_destroy( &tok ); + + wchar_t *cmd_tmp = wcsdup(cmd.c_str()); + cmd_tmp[pos]=0; + int cmdlen = wcslen( cmd_tmp ); + unfinished = (cmdlen==0); + if( !unfinished ) + { + unfinished = (quote != 0); + + if( !unfinished ) + { + if( wcschr( L" \t\n\r", cmd_tmp[cmdlen-1] ) != 0 ) + { + if( ( cmdlen == 1) || (cmd_tmp[cmdlen-2] != L'\\') ) + { + unfinished=1; + } + } + } + } + + if( quote ) + *quote = last_quote; + + if( offset != 0 ) + { + if( !unfinished ) + { + while( (cmd_tmp[prev_pos] != 0) && (wcschr( L";|",cmd_tmp[prev_pos])!= 0) ) + prev_pos++; + + *offset = prev_pos; + } + else + { + *offset = pos; + } + } + free(cmd_tmp); +} + +wcstring parse_util_escape_string_with_quote( const wcstring &cmd, wchar_t quote) +{ + wcstring result; + if( quote == L'\0' ) + { + result = escape_string( cmd, ESCAPE_ALL | ESCAPE_NO_QUOTED | ESCAPE_NO_TILDE ); + } + else + { + bool unescapable = false; + for (size_t i = 0; i < cmd.size(); i++) + { + wchar_t c = cmd.at(i); + switch (c) + { + case L'\n': + case L'\t': + case L'\b': + case L'\r': + unescapable = true; + break; + default: + if (c == quote) + result.push_back(L'\\'); + result.push_back(c); + break; + } + } + + if (unescapable) + { + result = escape_string(cmd, ESCAPE_ALL | ESCAPE_NO_QUOTED); + result.insert(0, "e, 1); + } + } + return result; +} diff --git a/parse_util.h b/parse_util.h index 241790cc5..a3ffc193d 100644 --- a/parse_util.h +++ b/parse_util.h @@ -124,6 +124,21 @@ void parse_util_set_argv( const wchar_t * const *argv, const wcstring_list_t &na */ wchar_t *parse_util_unescape_wildcards( const wchar_t *in ); +/** + Calculates information on the parameter at the specified index. + + \param cmd The command to be analyzed + \param pos An index in the string which is inside the parameter + \param quote If not NULL, store the type of quote this parameter has, can be either ', " or \\0, meaning the string is not quoted. + \param offset If not NULL, get_param will store the offset to the beginning of the parameter. + \param type If not NULL, get_param will store the token type as returned by tok_last. +*/ +void parse_util_get_parameter_info( const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, int *type ); + +/** + Attempts to escape the string 'cmd' using the given quote type, as determined by the quote character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and thus escaping should be with backslashes). +*/ +wcstring parse_util_escape_string_with_quote( const wcstring &cmd, wchar_t quote); #endif diff --git a/reader.cpp b/reader.cpp index 99755f199..e68677a2e 100644 --- a/reader.cpp +++ b/reader.cpp @@ -802,130 +802,6 @@ static int comp_ilen( const wchar_t *a, const wchar_t *b ) return i; } -/** - Find the outermost quoting style of current token. Returns 0 if - token is not quoted. - -*/ -static wchar_t get_quote( wchar_t *cmd, int len ) -{ - int i=0; - wchar_t res=0; - - while( 1 ) - { - if( !cmd[i] ) - break; - - if( cmd[i] == L'\\' ) - { - i++; - if( !cmd[i] ) - break; - i++; - } - else - { - if( cmd[i] == L'\'' || cmd[i] == L'\"' ) - { - const wchar_t *end = quote_end( &cmd[i] ); - //fwprintf( stderr, L"Jump %d\n", end-cmd ); - if(( end == 0 ) || (!*end) || (end-cmd > len)) - { - res = cmd[i]; - break; - } - i = end-cmd+1; - } - else - i++; - } - } - - return res; -} - -/** - Calculates information on the parameter at the specified index. - - \param cmd The command to be analyzed - \param pos An index in the string which is inside the parameter - \param quote If not 0, store the type of quote this parameter has, can be either ', " or \\0, meaning the string is not quoted. - \param offset If not 0, get_param will store a pointer to the beginning of the parameter. - \param string If not 0, get_parm will store a copy of the parameter string as returned by the tokenizer. - \param type If not 0, get_param will store the token type as returned by tok_last. -*/ -static void get_param( const wchar_t *cmd, - const int pos, - wchar_t *quote, - const wchar_t **offset, - wchar_t **string, - int *type ) -{ - int prev_pos=0; - wchar_t last_quote = '\0'; - int unfinished; - - tokenizer tok; - tok_init( &tok, cmd, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS ); - - for( ; tok_has_next( &tok ); tok_next( &tok ) ) - { - if( tok_get_pos( &tok ) > pos ) - break; - - if( tok_last_type( &tok ) == TOK_STRING ) - last_quote = get_quote( tok_last( &tok ), - pos - tok_get_pos( &tok ) ); - - if( type != 0 ) - *type = tok_last_type( &tok ); - if( string != 0 ) - wcscpy( *string, tok_last( &tok ) ); - - prev_pos = tok_get_pos( &tok ); - } - - tok_destroy( &tok ); - - wchar_t *cmd_tmp = wcsdup(cmd); - cmd_tmp[pos]=0; - int cmdlen = wcslen( cmd_tmp ); - unfinished = (cmdlen==0); - if( !unfinished ) - { - unfinished = (quote != 0); - - if( !unfinished ) - { - if( wcschr( L" \t\n\r", cmd_tmp[cmdlen-1] ) != 0 ) - { - if( ( cmdlen == 1) || (cmd_tmp[cmdlen-2] != L'\\') ) - { - unfinished=1; - } - } - } - } - - if( quote ) - *quote = last_quote; - - if( offset != 0 ) - { - if( !unfinished ) - { - while( (cmd_tmp[prev_pos] != 0) && (wcschr( L";|",cmd_tmp[prev_pos])!= 0) ) - prev_pos++; - - *offset = cmd + prev_pos; - } - else - { - *offset = cmd + pos; - } - } -} /** Insert the string in the given command line at the given cursor @@ -991,36 +867,8 @@ static wcstring completion_apply_to_command_line(const wcstring &val_str, int fl wcstring replaced; if( do_escape ) { - get_param(command_line.c_str(), cursor_pos, "e, 0, 0, 0); - if( quote == L'\0' ) - { - replaced = escape_string( val, ESCAPE_ALL | ESCAPE_NO_QUOTED ); - } - else - { - bool unescapable = false; - for (const wchar_t *pin = val; *pin; pin++) - { - switch (*pin ) - { - case L'\n': - case L'\t': - case L'\b': - case L'\r': - unescapable = true; - break; - default: - replaced.push_back(*pin); - break; - } - } - - if (unescapable) - { - replaced = escape_string(val, ESCAPE_ALL | ESCAPE_NO_QUOTED); - replaced.insert(0, "e, 1); - } - } + parse_util_get_parameter_info(command_line, cursor_pos, "e, NULL, NULL); + replaced = parse_util_escape_string_with_quote(val_str, quote); } else { @@ -1274,9 +1122,13 @@ struct autosuggestion_context_t { // If the line ends with a space, and the cursor is not at the end, // don't use completion autosuggestions. It ends up being pretty weird seeing stuff get spammed on the right // while you go back to edit a line - const bool line_ends_with_space = iswspace(search_string.at(search_string.size() - 1)); + const wchar_t last_char = search_string.at(search_string.size() - 1); const bool cursor_at_end = (this->cursor_pos == search_string.size()); - if (line_ends_with_space && ! cursor_at_end) + if (! cursor_at_end && iswspace(last_char)) + return 0; + + // On the other hand, if the line ends with a quote, don't go dumping stuff after the quote + if (wcschr(L"'\"", last_char) && cursor_at_end) return 0; /* Try normal completions */ @@ -1586,35 +1438,29 @@ static int handle_completions( const std::vector &comp ) There is no common prefix in the completions, and show_list is true, so we print the list */ - int len; + size_t len, prefix_start = 0; wcstring prefix; - const wchar_t * prefix_start; - const wchar_t *buff = data->command_line.c_str(); - get_param( buff, - data->buff_pos, - 0, - &prefix_start, - 0, - 0 ); + parse_util_get_parameter_info(data->command_line, data->buff_pos, NULL, &prefix_start, NULL); - len = &buff[data->buff_pos]-prefix_start; + assert(data->buff_pos >= prefix_start); + len = data->buff_pos - prefix_start; if( len <= PREFIX_MAX_LEN ) - { - prefix.append(prefix_start, len); + { + prefix.append(data->command_line, prefix_start, len); } else { + // append just the end of the string prefix = wcstring(&ellipsis_char, 1); - prefix.append(prefix_start + (len - PREFIX_MAX_LEN)); - } + prefix.append(data->command_line, prefix_start + len - PREFIX_MAX_LEN, wcstring::npos); + } { int is_quoted; wchar_t quote; - const wchar_t *buff = data->command_line.c_str(); - get_param( buff, data->buff_pos, "e, 0, 0, 0 ); + parse_util_get_parameter_info(data->command_line, data->buff_pos, "e, NULL, NULL); is_quoted = (quote != L'\0'); write_loop(1, "\n", 1 );