mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-13 21:44:16 +00:00
reduce number of Unicode private-use characters
This narrows the range of Unicode codepoints fish reserves for its own use from U+E000 thru U+F8FE (6399 codepoints) to U+F600 thru U+F73F (320 codepoints). This is still not ideal since fish shouldn't be using any Unicode private-use codepoints but it's a step in the right direction. This partially addresses issue #2684.
This commit is contained in:
parent
b41b962336
commit
f2246dfb34
10 changed files with 184 additions and 186 deletions
|
@ -215,6 +215,11 @@ cd /usr/local/bin
|
|||
rm -f fish fish_indent
|
||||
\endfish
|
||||
|
||||
<hr>
|
||||
\section faq-reserved-chars Unicode private-use characters reserved by fish
|
||||
|
||||
Fish reserves the <a href="http://www.unicode.org/faq/private_use.html">Unicode private-use character range</a> from U+F600 thru U+F73F for internal use. Any attempt to feed characters in that range to fish will result in them being replaced by the Unicode "replacement character" U+FFFD. This includes both interactive input as well as any file read by fish (but not programs run by fish).
|
||||
|
||||
\htmlonly[block]
|
||||
</div>
|
||||
\endhtmlonly
|
||||
|
|
65
src/common.h
65
src/common.h
|
@ -35,34 +35,59 @@
|
|||
typedef std::wstring wcstring;
|
||||
typedef std::vector<wcstring> wcstring_list_t;
|
||||
|
||||
/**
|
||||
Maximum number of bytes used by a single utf-8 character
|
||||
*/
|
||||
// Maximum number of bytes used by a single utf-8 character.
|
||||
#define MAX_UTF8_BYTES 6
|
||||
|
||||
/**
|
||||
This is in the unicode private use area.
|
||||
*/
|
||||
#define ENCODE_DIRECT_BASE 0xf100
|
||||
|
||||
/**
|
||||
Highest legal ascii value
|
||||
*/
|
||||
// Highest legal ASCII value.
|
||||
#define ASCII_MAX 127u
|
||||
|
||||
/**
|
||||
Highest legal 16-bit unicode value
|
||||
*/
|
||||
#define UCS2_MAX 0xffffu
|
||||
// Highest legal 16-bit Unicode value.
|
||||
#define UCS2_MAX 0xFFFFu
|
||||
|
||||
/**
|
||||
Highest legal byte value
|
||||
*/
|
||||
#define BYTE_MAX 0xffu
|
||||
// Highest legal byte value.
|
||||
#define BYTE_MAX 0xFFu
|
||||
|
||||
/** BOM value */
|
||||
// Unicode BOM value.
|
||||
#define UTF8_BOM_WCHAR 0xFEFFu
|
||||
|
||||
// Unicode replacement character.
|
||||
#define REPLACEMENT_WCHAR 0xFFFDu
|
||||
|
||||
// Use Unicode "noncharacters" for internal characters as much as we can. This
|
||||
// gives us 32 "characters" for internal use that we can guarantee should not
|
||||
// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
|
||||
#define RESERVED_CHAR_BASE 0xFDD0u
|
||||
#define RESERVED_CHAR_END 0xFDF0u
|
||||
// Split the available noncharacter values into two ranges to ensure there are
|
||||
// no conflicts among the places we use these special characters.
|
||||
#define EXPAND_RESERVED_BASE RESERVED_CHAR_BASE
|
||||
#define EXPAND_RESERVED_END (EXPAND_RESERVED_BASE + 16)
|
||||
#define WILDCARD_RESERVED_BASE EXPAND_RESERVED_END
|
||||
#define WILDCARD_RESERVED_END (WILDCARD_RESERVED_BASE + 16)
|
||||
// Make sure the ranges defined above don't exceed the range for noncharacters.
|
||||
// This is to make sure we didn't do something stupid in subdividing the
|
||||
// Unicode range for our needs.
|
||||
#if WILDCARD_RESERVED_END > RESERVED_CHAR_END
|
||||
#error
|
||||
#endif
|
||||
|
||||
// These are in the Unicode private-use range. We really shouldn't use this
|
||||
// range but have little choice in the matter given how our lexer/parser works.
|
||||
// We can't use non-characters for these two ranges because there are only 66 of
|
||||
// them and we need at least 256 + 64.
|
||||
//
|
||||
// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
|
||||
// would result in fish having different behavior on machines with 16 versus 32
|
||||
// bit wchar_t. It's better that fish behave the same on both types of systems.
|
||||
//
|
||||
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
|
||||
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
|
||||
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
|
||||
#define ENCODE_DIRECT_BASE 0xF600u
|
||||
#define ENCODE_DIRECT_END (ENCODE_DIRECT_BASE + 256)
|
||||
#define INPUT_COMMON_BASE 0xF700u
|
||||
#define INPUT_COMMON_END (INPUT_COMMON_BASE + 64)
|
||||
|
||||
/* Flags for unescape_string functions */
|
||||
enum
|
||||
{
|
||||
|
|
90
src/expand.h
90
src/expand.h
|
@ -26,92 +26,64 @@
|
|||
|
||||
enum
|
||||
{
|
||||
/** Flag specifying that cmdsubst expansion should be skipped */
|
||||
// Flag specifying that cmdsubst expansion should be skipped.
|
||||
EXPAND_SKIP_CMDSUBST = 1 << 0,
|
||||
|
||||
/** Flag specifying that variable expansion should be skipped */
|
||||
// Flag specifying that variable expansion should be skipped.
|
||||
EXPAND_SKIP_VARIABLES = 1 << 1,
|
||||
|
||||
/** Flag specifying that wildcard expansion should be skipped */
|
||||
// Flag specifying that wildcard expansion should be skipped.
|
||||
EXPAND_SKIP_WILDCARDS = 1 << 2,
|
||||
|
||||
/**
|
||||
The expansion is being done for tab or auto completions. Returned completions may have the wildcard as a prefix instead of a match.
|
||||
*/
|
||||
// The expansion is being done for tab or auto completions. Returned
|
||||
// completions may have the wildcard as a prefix instead of a match.
|
||||
EXPAND_FOR_COMPLETIONS = 1 << 3,
|
||||
|
||||
/** Only match files that are executable by the current user. Only applicable together with ACCEPT_INCOMPLETE. */
|
||||
// Only match files that are executable by the current user. Only
|
||||
// applicable together with ACCEPT_INCOMPLETE.
|
||||
EXECUTABLES_ONLY = 1 << 4,
|
||||
|
||||
/** Only match directories. Only applicable together with ACCEPT_INCOMPLETE. */
|
||||
// Only match directories. Only applicable together with ACCEPT_INCOMPLETE.
|
||||
DIRECTORIES_ONLY = 1 << 5,
|
||||
|
||||
/** Don't generate descriptions */
|
||||
// Don't generate descriptions.
|
||||
EXPAND_NO_DESCRIPTIONS = 1 << 6,
|
||||
|
||||
/** Don't expand jobs (but you can still expand processes). This is because job expansion is not thread safe. */
|
||||
// Don't expand jobs (but you can still expand processes). This is because
|
||||
// job expansion is not thread safe.
|
||||
EXPAND_SKIP_JOBS = 1 << 7,
|
||||
|
||||
/** Don't expand home directories */
|
||||
// Don't expand home directories.
|
||||
EXPAND_SKIP_HOME_DIRECTORIES = 1 << 8,
|
||||
|
||||
/** Allow fuzzy matching */
|
||||
// Allow fuzzy matching.
|
||||
EXPAND_FUZZY_MATCH = 1 << 9,
|
||||
|
||||
/** Disallow directory abbreviations like /u/l/b for /usr/local/bin. Only applicable if EXPAND_FUZZY_MATCH is set. */
|
||||
// Disallow directory abbreviations like /u/l/b for /usr/local/bin. Only
|
||||
// applicable if EXPAND_FUZZY_MATCH is set.
|
||||
EXPAND_NO_FUZZY_DIRECTORIES = 1 << 10,
|
||||
|
||||
/** Do expansions specifically to support cd (CDPATH, etc) */
|
||||
// Do expansions specifically to support cd (CDPATH, etc).
|
||||
EXPAND_SPECIAL_CD = 1 << 11
|
||||
};
|
||||
typedef int expand_flags_t;
|
||||
|
||||
/**
|
||||
Use unencoded private-use keycodes for internal characters
|
||||
*/
|
||||
#define EXPAND_RESERVED 0xf000
|
||||
/**
|
||||
End of range reserved for expand
|
||||
*/
|
||||
#define EXPAND_RESERVED_END 0xf000f
|
||||
|
||||
class completion_t;
|
||||
|
||||
enum
|
||||
{
|
||||
/** Character represeting a home directory */
|
||||
HOME_DIRECTORY = EXPAND_RESERVED,
|
||||
|
||||
/** Character represeting process expansion */
|
||||
// Character representing a home directory.
|
||||
HOME_DIRECTORY = EXPAND_RESERVED_BASE,
|
||||
// Character representing process expansion.
|
||||
PROCESS_EXPAND,
|
||||
|
||||
/** Character representing variable expansion */
|
||||
// Character representing variable expansion.
|
||||
VARIABLE_EXPAND,
|
||||
|
||||
/** Character rpresenting variable expansion into a single element*/
|
||||
// Character representing variable expansion into a single element.
|
||||
VARIABLE_EXPAND_SINGLE,
|
||||
|
||||
/** Character representing the start of a bracket expansion */
|
||||
// Character representing the start of a bracket expansion.
|
||||
BRACKET_BEGIN,
|
||||
|
||||
/** Character representing the end of a bracket expansion */
|
||||
// Character representing the end of a bracket expansion.
|
||||
BRACKET_END,
|
||||
|
||||
/** Character representing separation between two bracket elements */
|
||||
// Character representing separation between two bracket elements.
|
||||
BRACKET_SEP,
|
||||
/**
|
||||
Separate subtokens in a token with this character.
|
||||
*/
|
||||
// Separate subtokens in a token with this character.
|
||||
INTERNAL_SEPARATOR,
|
||||
|
||||
/**
|
||||
Character representing an empty variable expansion.
|
||||
Only used transitively while expanding variables.
|
||||
*/
|
||||
// Character representing an empty variable expansion. Only used
|
||||
// transitively while expanding variables.
|
||||
VARIABLE_EXPAND_EMPTY,
|
||||
}
|
||||
;
|
||||
|
||||
// This is a special psuedo-char that is not used other than to mark the
|
||||
// end of the the special characters so we can sanity check the enum range.
|
||||
EXPAND_SENTINAL
|
||||
};
|
||||
|
||||
/** These are the possible return values for expand_string. Note how zero value is the only error. */
|
||||
enum expand_error_t
|
||||
|
|
11
src/fish.cpp
11
src/fish.cpp
|
@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|||
|
||||
#include "config.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
@ -63,6 +64,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|||
#include "input.h"
|
||||
#include "io.h"
|
||||
#include "fish_version.h"
|
||||
#include "input_common.h"
|
||||
#include "wildcard.h"
|
||||
|
||||
/* PATH_MAX may not exist */
|
||||
#ifndef PATH_MAX
|
||||
|
@ -484,6 +487,14 @@ int main(int argc, char **argv)
|
|||
int res=1;
|
||||
int my_optind=0;
|
||||
|
||||
// We can't do this at compile time due to the use of enum symbols.
|
||||
assert(EXPAND_SENTINAL >= EXPAND_RESERVED_BASE &&
|
||||
EXPAND_SENTINAL <= EXPAND_RESERVED_END);
|
||||
assert(ANY_SENTINAL >= WILDCARD_RESERVED_BASE &&
|
||||
ANY_SENTINAL <= WILDCARD_RESERVED_END);
|
||||
assert(R_SENTINAL >= INPUT_COMMON_BASE &&
|
||||
R_SENTINAL <= INPUT_COMMON_END);
|
||||
|
||||
set_main_thread();
|
||||
setup_fork_guards();
|
||||
|
||||
|
|
|
@ -335,7 +335,7 @@ static bool has_expand_reserved(const wcstring &str)
|
|||
for (size_t i=0; i < str.size(); i++)
|
||||
{
|
||||
wchar_t wc = str.at(i);
|
||||
if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END)
|
||||
if (wc >= EXPAND_RESERVED_BASE && wc <= EXPAND_RESERVED_END)
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
|
|
66
src/input.h
66
src/input.h
|
@ -16,77 +16,11 @@ inputrc information for key bindings.
|
|||
#include "env.h"
|
||||
#include "input_common.h"
|
||||
|
||||
|
||||
#define DEFAULT_BIND_MODE L"default"
|
||||
#define FISH_BIND_MODE_VAR L"fish_bind_mode"
|
||||
|
||||
/**
|
||||
Key codes for inputrc-style keyboard functions that are passed on
|
||||
to the caller of input_read()
|
||||
|
||||
NOTE: IF YOU MODIFY THIS YOU MUST UPDATE THE name_arr AND code_arr VARIABLES TO MATCH!
|
||||
*/
|
||||
enum
|
||||
{
|
||||
R_BEGINNING_OF_LINE = R_NULL+10, /* This give input_common ten slots for lowlevel keycodes */
|
||||
R_END_OF_LINE,
|
||||
R_FORWARD_CHAR,
|
||||
R_BACKWARD_CHAR,
|
||||
R_FORWARD_WORD,
|
||||
R_BACKWARD_WORD,
|
||||
R_FORWARD_BIGWORD,
|
||||
R_BACKWARD_BIGWORD,
|
||||
R_HISTORY_SEARCH_BACKWARD,
|
||||
R_HISTORY_SEARCH_FORWARD,
|
||||
R_DELETE_CHAR,
|
||||
R_BACKWARD_DELETE_CHAR,
|
||||
R_KILL_LINE,
|
||||
R_YANK,
|
||||
R_YANK_POP,
|
||||
R_COMPLETE,
|
||||
R_COMPLETE_AND_SEARCH,
|
||||
R_BEGINNING_OF_HISTORY,
|
||||
R_END_OF_HISTORY,
|
||||
R_BACKWARD_KILL_LINE,
|
||||
R_KILL_WHOLE_LINE,
|
||||
R_KILL_WORD,
|
||||
R_KILL_BIGWORD,
|
||||
R_BACKWARD_KILL_WORD,
|
||||
R_BACKWARD_KILL_PATH_COMPONENT,
|
||||
R_BACKWARD_KILL_BIGWORD,
|
||||
R_HISTORY_TOKEN_SEARCH_BACKWARD,
|
||||
R_HISTORY_TOKEN_SEARCH_FORWARD,
|
||||
R_SELF_INSERT,
|
||||
R_TRANSPOSE_CHARS,
|
||||
R_TRANSPOSE_WORDS,
|
||||
R_UPCASE_WORD,
|
||||
R_DOWNCASE_WORD,
|
||||
R_CAPITALIZE_WORD,
|
||||
R_VI_ARG_DIGIT,
|
||||
R_VI_DELETE_TO,
|
||||
R_EXECUTE,
|
||||
R_BEGINNING_OF_BUFFER,
|
||||
R_END_OF_BUFFER,
|
||||
R_REPAINT,
|
||||
R_FORCE_REPAINT,
|
||||
R_UP_LINE,
|
||||
R_DOWN_LINE,
|
||||
R_SUPPRESS_AUTOSUGGESTION,
|
||||
R_ACCEPT_AUTOSUGGESTION,
|
||||
R_BEGIN_SELECTION,
|
||||
R_END_SELECTION,
|
||||
R_KILL_SELECTION,
|
||||
R_FORWARD_JUMP,
|
||||
R_BACKWARD_JUMP,
|
||||
R_AND,
|
||||
R_CANCEL
|
||||
};
|
||||
|
||||
wcstring describe_char(wint_t c);
|
||||
|
||||
#define R_MIN R_NULL
|
||||
#define R_MAX R_CANCEL
|
||||
|
||||
/**
|
||||
Initialize the terminal by calling setupterm, and set up arrays
|
||||
used by readch to detect escape sequences for special keys.
|
||||
|
|
|
@ -8,22 +8,77 @@ Header file for the low level input library
|
|||
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
Use unencoded private-use keycodes for internal characters
|
||||
*/
|
||||
#define INPUT_COMMON_RESERVED 0xe000
|
||||
#include "common.h"
|
||||
|
||||
enum
|
||||
{
|
||||
/**
|
||||
R_NULL is sometimes returned by the input when a character was
|
||||
requested but none could be delivered, or when an exception
|
||||
happened.
|
||||
*/
|
||||
R_NULL = INPUT_COMMON_RESERVED,
|
||||
R_EOF
|
||||
}
|
||||
;
|
||||
R_MIN = INPUT_COMMON_BASE,
|
||||
// R_NULL is sometimes returned by the input when a character was requested
|
||||
// but none could be delivered, or when an exception happened.
|
||||
R_NULL = R_MIN,
|
||||
R_EOF,
|
||||
// Key codes for inputrc-style keyboard functions that are passed on
|
||||
// to the caller of input_read().
|
||||
//
|
||||
// NOTE: If you modify this sequence of symbols you must update the
|
||||
// name_arr, code_arr and desc_arr variables in input.cpp to match!
|
||||
R_BEGINNING_OF_LINE,
|
||||
R_END_OF_LINE,
|
||||
R_FORWARD_CHAR,
|
||||
R_BACKWARD_CHAR,
|
||||
R_FORWARD_WORD,
|
||||
R_BACKWARD_WORD,
|
||||
R_FORWARD_BIGWORD,
|
||||
R_BACKWARD_BIGWORD,
|
||||
R_HISTORY_SEARCH_BACKWARD,
|
||||
R_HISTORY_SEARCH_FORWARD,
|
||||
R_DELETE_CHAR,
|
||||
R_BACKWARD_DELETE_CHAR,
|
||||
R_KILL_LINE,
|
||||
R_YANK,
|
||||
R_YANK_POP,
|
||||
R_COMPLETE,
|
||||
R_COMPLETE_AND_SEARCH,
|
||||
R_BEGINNING_OF_HISTORY,
|
||||
R_END_OF_HISTORY,
|
||||
R_BACKWARD_KILL_LINE,
|
||||
R_KILL_WHOLE_LINE,
|
||||
R_KILL_WORD,
|
||||
R_KILL_BIGWORD,
|
||||
R_BACKWARD_KILL_WORD,
|
||||
R_BACKWARD_KILL_PATH_COMPONENT,
|
||||
R_BACKWARD_KILL_BIGWORD,
|
||||
R_HISTORY_TOKEN_SEARCH_BACKWARD,
|
||||
R_HISTORY_TOKEN_SEARCH_FORWARD,
|
||||
R_SELF_INSERT,
|
||||
R_TRANSPOSE_CHARS,
|
||||
R_TRANSPOSE_WORDS,
|
||||
R_UPCASE_WORD,
|
||||
R_DOWNCASE_WORD,
|
||||
R_CAPITALIZE_WORD,
|
||||
R_VI_ARG_DIGIT,
|
||||
R_VI_DELETE_TO,
|
||||
R_EXECUTE,
|
||||
R_BEGINNING_OF_BUFFER,
|
||||
R_END_OF_BUFFER,
|
||||
R_REPAINT,
|
||||
R_FORCE_REPAINT,
|
||||
R_UP_LINE,
|
||||
R_DOWN_LINE,
|
||||
R_SUPPRESS_AUTOSUGGESTION,
|
||||
R_ACCEPT_AUTOSUGGESTION,
|
||||
R_BEGIN_SELECTION,
|
||||
R_END_SELECTION,
|
||||
R_KILL_SELECTION,
|
||||
R_FORWARD_JUMP,
|
||||
R_BACKWARD_JUMP,
|
||||
R_AND,
|
||||
R_CANCEL,
|
||||
R_MAX = R_CANCEL,
|
||||
// This is a special psuedo-char that is not used other than to mark the
|
||||
// end of the the special characters so we can sanity check the enum range.
|
||||
R_SENTINAL
|
||||
};
|
||||
|
||||
/**
|
||||
Init the library
|
||||
|
|
|
@ -2964,16 +2964,20 @@ static int can_read(int fd)
|
|||
return select(fd + 1, &fds, 0, 0, &can_read_timeout) == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
Test if the specified character is in the private use area that
|
||||
fish uses to store internal characters
|
||||
|
||||
Note: Allow U+F8FF because that's the Apple symbol, which is in the
|
||||
OS X US keyboard layout.
|
||||
*/
|
||||
// Test if the specified character is in a range that fish uses interally to
|
||||
// store special tokens.
|
||||
//
|
||||
// NOTE: This is used when tokenizing the input. It is also used when reading
|
||||
// input, before tokenization, to replace such chars with REPLACEMENT_WCHAR if
|
||||
// they're not part of a quoted string. We don't want external input to be able
|
||||
// to feed reserved characters into our lexer/parser or code evaluator.
|
||||
//
|
||||
// TODO: Actually implement the replacement as documented above.
|
||||
static int wchar_private(wchar_t c)
|
||||
{
|
||||
return ((c >= 0xe000) && (c < 0xf8ff));
|
||||
return ((c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) ||
|
||||
(c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END) ||
|
||||
(c >= INPUT_COMMON_BASE && c < INPUT_COMMON_END));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -632,8 +632,8 @@ void tokenizer_t::tok_next()
|
|||
/*fwprintf( stderr, L"End of string\n" );*/
|
||||
this->has_next = false;
|
||||
break;
|
||||
case 13: // carriage return
|
||||
case L'\n':
|
||||
case L'\r': // carriage-return
|
||||
case L'\n': // newline
|
||||
case L';':
|
||||
this->last_type = TOK_END;
|
||||
this->buff++;
|
||||
|
|
|
@ -18,27 +18,19 @@
|
|||
#include "expand.h"
|
||||
#include "complete.h"
|
||||
|
||||
/*
|
||||
Use unencoded private-use keycodes for internal characters
|
||||
*/
|
||||
|
||||
#define WILDCARD_RESERVED 0xf400
|
||||
|
||||
/**
|
||||
Enumeration of all wildcard types
|
||||
*/
|
||||
// Enumeration of all wildcard types
|
||||
enum
|
||||
{
|
||||
/** Character representing any character except '/' */
|
||||
ANY_CHAR = WILDCARD_RESERVED,
|
||||
|
||||
/** Character representing any character string not containing '/' (A slash) */
|
||||
// Character representing any character except '/' (slash).
|
||||
ANY_CHAR = WILDCARD_RESERVED_BASE,
|
||||
// Character representing any character string not containing '/' (slash).
|
||||
ANY_STRING,
|
||||
|
||||
/** Character representing any character string */
|
||||
// Character representing any character string.
|
||||
ANY_STRING_RECURSIVE,
|
||||
}
|
||||
;
|
||||
// This is a special psuedo-char that is not used other than to mark the
|
||||
// end of the the special characters so we can sanity check the enum range.
|
||||
ANY_SENTINAL
|
||||
};
|
||||
|
||||
/**
|
||||
Expand the wildcard by matching against the filesystem.
|
||||
|
|
Loading…
Reference in a new issue