diff --git a/src/common.cpp b/src/common.cpp index 9f7d9c504..22235cdad 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1935,3 +1935,17 @@ long convert_digit(wchar_t d, int base) { return res; } + +// Test if the specified character is in a range that fish uses interally to store special tokens. +// +// NOTE: This is used when tokenizing the input. It is also used when reading input, before +// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted +// string. We don't want external input to be able to feed reserved characters into our lexer/parser +// or code evaluator. +// +// TODO: Actually implement the replacement as documented above. +bool fish_reserved_codepoint(wchar_t c) { + return (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) || + (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END) || + (c >= INPUT_COMMON_BASE && c < INPUT_COMMON_END); +} diff --git a/src/common.h b/src/common.h index 0cf6a3bc0..2467b2429 100644 --- a/src/common.h +++ b/src/common.h @@ -776,3 +776,6 @@ long convert_digit(wchar_t d, int base); } while (0) #endif + +// Return true if the character is in a range reserved for fish's private use. +bool fish_reserved_codepoint(wchar_t c); diff --git a/src/reader.cpp b/src/reader.cpp index d044a1753..88f13cee2 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -2351,20 +2351,6 @@ static int can_read(int fd) { return select(fd + 1, &fds, 0, 0, &can_read_timeout) == 1; } -// Test if the specified character is in a range that fish uses interally to store special tokens. -// -// NOTE: This is used when tokenizing the input. It is also used when reading input, before -// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted -// string. We don't want external input to be able to feed reserved characters into our lexer/parser -// or code evaluator. -// -// TODO: Actually implement the replacement as documented above. -static int wchar_private(wchar_t c) { - return (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) || - (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END) || - (c >= INPUT_COMMON_BASE && c < INPUT_COMMON_END); -} - /// Test if the specified character in the specified string is backslashed. pos may be at the end of /// the string, which indicates if there is a trailing backslash. static bool is_backslashed(const wcstring &str, size_t pos) { @@ -2452,7 +2438,7 @@ const wchar_t *reader_readline(int nchars) { is_interactive_read = was_interactive_read; // fprintf(stderr, "C: %lx\n", (long)c); - if (((!wchar_private(c))) && (c > 31) && (c != 127)) { + if (((!fish_reserved_codepoint(c))) && (c > 31) && (c != 127)) { if (can_read(0)) { wchar_t arr[READAHEAD_MAX + 1]; size_t i; @@ -2472,7 +2458,7 @@ const wchar_t *reader_readline(int nchars) { // need to insert on the commandline that the commmand might need to be able // to see. c = input_readch(false); - if ((!wchar_private(c)) && (c > 31) && (c != 127)) { + if ((!fish_reserved_codepoint(c)) && (c > 31) && (c != 127)) { arr[i] = c; c = 0; } else @@ -3260,7 +3246,8 @@ const wchar_t *reader_readline(int nchars) { } default: { // Other, if a normal character, we add it to the command. - if (!wchar_private(c) && (c >= L' ' || c == L'\n' || c == L'\r') && c != 0x7F) { + if (!fish_reserved_codepoint(c) && (c >= L' ' || c == L'\n' || c == L'\r') && + c != 0x7F) { bool allow_expand_abbreviations = false; if (data->is_navigating_pager_contents()) { data->pager.set_search_field_shown(true); diff --git a/src/wutil.cpp b/src/wutil.cpp index 0461b9a38..608caf7f8 100644 --- a/src/wutil.cpp +++ b/src/wutil.cpp @@ -448,14 +448,6 @@ int wrename(const wcstring &old, const wcstring &newv) { return rename(old_narrow.c_str(), new_narrow.c_str()); } -/// Return one if the code point is in the range we reserve for internal use. -int fish_is_reserved_codepoint(wint_t wc) { - if (RESERVED_CHAR_BASE <= wc && wc < RESERVED_CHAR_END) return 1; - if (EXPAND_RESERVED_BASE <= wc && wc < EXPAND_RESERVED_END) return 1; - if (WILDCARD_RESERVED_BASE <= wc && wc < WILDCARD_RESERVED_END) return 1; - return 0; -} - /// Return one if the code point is in a Unicode private use area. int fish_is_pua(wint_t wc) { if (PUA1_START <= wc && wc < PUA1_END) return 1; @@ -467,7 +459,7 @@ int fish_is_pua(wint_t wc) { /// We need this because there are too many implementations that don't return the proper answer for /// some code points. See issue #3050. int fish_iswalnum(wint_t wc) { - if (fish_is_reserved_codepoint(wc)) return 0; + if (fish_reserved_codepoint(wc)) return 0; if (fish_is_pua(wc)) return 0; return iswalnum(wc); } @@ -475,7 +467,7 @@ int fish_iswalnum(wint_t wc) { /// We need this because there are too many implementations that don't return the proper answer for /// some code points. See issue #3050. int fish_iswalpha(wint_t wc) { - if (fish_is_reserved_codepoint(wc)) return 0; + if (fish_reserved_codepoint(wc)) return 0; if (fish_is_pua(wc)) return 0; return iswalpha(wc); } @@ -483,7 +475,7 @@ int fish_iswalpha(wint_t wc) { /// We need this because there are too many implementations that don't return the proper answer for /// some code points. See issue #3050. int fish_iswgraph(wint_t wc) { - if (fish_is_reserved_codepoint(wc)) return 0; + if (fish_reserved_codepoint(wc)) return 0; if (fish_is_pua(wc)) return 1; return iswgraph(wc); }