diff --git a/doc_src/faq.hdr b/doc_src/faq.hdr
index 5ec125526..abbedb8b1 100644
--- a/doc_src/faq.hdr
+++ b/doc_src/faq.hdr
@@ -215,6 +215,11 @@ cd /usr/local/bin
rm -f fish fish_indent
\endfish
+
+\section faq-reserved-chars Unicode private-use characters reserved by fish
+
+Fish reserves the Unicode private-use character range from U+F600 thru U+F73F for internal use. Any attempt to feed characters in that range to fish will result in them being replaced by the Unicode "replacement character" U+FFFD. This includes both interactive input as well as any file read by fish (but not programs run by fish).
+
\htmlonly[block]
\endhtmlonly
diff --git a/src/common.h b/src/common.h
index d255ac62d..74640988c 100644
--- a/src/common.h
+++ b/src/common.h
@@ -35,34 +35,59 @@
typedef std::wstring wcstring;
typedef std::vector wcstring_list_t;
-/**
- Maximum number of bytes used by a single utf-8 character
-*/
+// Maximum number of bytes used by a single utf-8 character.
#define MAX_UTF8_BYTES 6
-/**
- This is in the unicode private use area.
-*/
-#define ENCODE_DIRECT_BASE 0xf100
-
-/**
- Highest legal ascii value
-*/
+// Highest legal ASCII value.
#define ASCII_MAX 127u
-/**
- Highest legal 16-bit unicode value
-*/
-#define UCS2_MAX 0xffffu
+// Highest legal 16-bit Unicode value.
+#define UCS2_MAX 0xFFFFu
-/**
- Highest legal byte value
-*/
-#define BYTE_MAX 0xffu
+// Highest legal byte value.
+#define BYTE_MAX 0xFFu
-/** BOM value */
+// Unicode BOM value.
#define UTF8_BOM_WCHAR 0xFEFFu
+// Unicode replacement character.
+#define REPLACEMENT_WCHAR 0xFFFDu
+
+// Use Unicode "noncharacters" for internal characters as much as we can. This
+// gives us 32 "characters" for internal use that we can guarantee should not
+// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
+#define RESERVED_CHAR_BASE 0xFDD0u
+#define RESERVED_CHAR_END 0xFDF0u
+// Split the available noncharacter values into two ranges to ensure there are
+// no conflicts among the places we use these special characters.
+#define EXPAND_RESERVED_BASE RESERVED_CHAR_BASE
+#define EXPAND_RESERVED_END (EXPAND_RESERVED_BASE + 16)
+#define WILDCARD_RESERVED_BASE EXPAND_RESERVED_END
+#define WILDCARD_RESERVED_END (WILDCARD_RESERVED_BASE + 16)
+// Make sure the ranges defined above don't exceed the range for noncharacters.
+// This is to make sure we didn't do something stupid in subdividing the
+// Unicode range for our needs.
+#if WILDCARD_RESERVED_END > RESERVED_CHAR_END
+#error
+#endif
+
+// These are in the Unicode private-use range. We really shouldn't use this
+// range but have little choice in the matter given how our lexer/parser works.
+// We can't use non-characters for these two ranges because there are only 66 of
+// them and we need at least 256 + 64.
+//
+// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
+// would result in fish having different behavior on machines with 16 versus 32
+// bit wchar_t. It's better that fish behave the same on both types of systems.
+//
+// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
+// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
+// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
+#define ENCODE_DIRECT_BASE 0xF600u
+#define ENCODE_DIRECT_END (ENCODE_DIRECT_BASE + 256)
+#define INPUT_COMMON_BASE 0xF700u
+#define INPUT_COMMON_END (INPUT_COMMON_BASE + 64)
+
/* Flags for unescape_string functions */
enum
{
diff --git a/src/expand.h b/src/expand.h
index 2c526358f..731acd711 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -26,92 +26,64 @@
enum
{
- /** Flag specifying that cmdsubst expansion should be skipped */
+ // Flag specifying that cmdsubst expansion should be skipped.
EXPAND_SKIP_CMDSUBST = 1 << 0,
-
- /** Flag specifying that variable expansion should be skipped */
+ // Flag specifying that variable expansion should be skipped.
EXPAND_SKIP_VARIABLES = 1 << 1,
-
- /** Flag specifying that wildcard expansion should be skipped */
+ // Flag specifying that wildcard expansion should be skipped.
EXPAND_SKIP_WILDCARDS = 1 << 2,
-
- /**
- The expansion is being done for tab or auto completions. Returned completions may have the wildcard as a prefix instead of a match.
- */
+ // The expansion is being done for tab or auto completions. Returned
+ // completions may have the wildcard as a prefix instead of a match.
EXPAND_FOR_COMPLETIONS = 1 << 3,
-
- /** Only match files that are executable by the current user. Only applicable together with ACCEPT_INCOMPLETE. */
+ // Only match files that are executable by the current user. Only
+ // applicable together with ACCEPT_INCOMPLETE.
EXECUTABLES_ONLY = 1 << 4,
-
- /** Only match directories. Only applicable together with ACCEPT_INCOMPLETE. */
+ // Only match directories. Only applicable together with ACCEPT_INCOMPLETE.
DIRECTORIES_ONLY = 1 << 5,
-
- /** Don't generate descriptions */
+ // Don't generate descriptions.
EXPAND_NO_DESCRIPTIONS = 1 << 6,
-
- /** Don't expand jobs (but you can still expand processes). This is because job expansion is not thread safe. */
+ // Don't expand jobs (but you can still expand processes). This is because
+ // job expansion is not thread safe.
EXPAND_SKIP_JOBS = 1 << 7,
-
- /** Don't expand home directories */
+ // Don't expand home directories.
EXPAND_SKIP_HOME_DIRECTORIES = 1 << 8,
-
- /** Allow fuzzy matching */
+ // Allow fuzzy matching.
EXPAND_FUZZY_MATCH = 1 << 9,
-
- /** Disallow directory abbreviations like /u/l/b for /usr/local/bin. Only applicable if EXPAND_FUZZY_MATCH is set. */
+ // Disallow directory abbreviations like /u/l/b for /usr/local/bin. Only
+ // applicable if EXPAND_FUZZY_MATCH is set.
EXPAND_NO_FUZZY_DIRECTORIES = 1 << 10,
-
- /** Do expansions specifically to support cd (CDPATH, etc) */
+ // Do expansions specifically to support cd (CDPATH, etc).
EXPAND_SPECIAL_CD = 1 << 11
};
typedef int expand_flags_t;
-/**
- Use unencoded private-use keycodes for internal characters
-*/
-#define EXPAND_RESERVED 0xf000
-/**
- End of range reserved for expand
- */
-#define EXPAND_RESERVED_END 0xf000f
-
class completion_t;
enum
{
- /** Character represeting a home directory */
- HOME_DIRECTORY = EXPAND_RESERVED,
-
- /** Character represeting process expansion */
+ // Character representing a home directory.
+ HOME_DIRECTORY = EXPAND_RESERVED_BASE,
+ // Character representing process expansion.
PROCESS_EXPAND,
-
- /** Character representing variable expansion */
+ // Character representing variable expansion.
VARIABLE_EXPAND,
-
- /** Character rpresenting variable expansion into a single element*/
+ // Character representing variable expansion into a single element.
VARIABLE_EXPAND_SINGLE,
-
- /** Character representing the start of a bracket expansion */
+ // Character representing the start of a bracket expansion.
BRACKET_BEGIN,
-
- /** Character representing the end of a bracket expansion */
+ // Character representing the end of a bracket expansion.
BRACKET_END,
-
- /** Character representing separation between two bracket elements */
+ // Character representing separation between two bracket elements.
BRACKET_SEP,
- /**
- Separate subtokens in a token with this character.
- */
+ // Separate subtokens in a token with this character.
INTERNAL_SEPARATOR,
-
- /**
- Character representing an empty variable expansion.
- Only used transitively while expanding variables.
- */
+ // Character representing an empty variable expansion. Only used
+ // transitively while expanding variables.
VARIABLE_EXPAND_EMPTY,
-}
-;
-
+ // This is a special psuedo-char that is not used other than to mark the
+ // end of the the special characters so we can sanity check the enum range.
+ EXPAND_SENTINAL
+};
/** These are the possible return values for expand_string. Note how zero value is the only error. */
enum expand_error_t
diff --git a/src/fish.cpp b/src/fish.cpp
index 69b4b0de0..916102321 100644
--- a/src/fish.cpp
+++ b/src/fish.cpp
@@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "config.h"
+#include
#include
#include
#include
@@ -63,6 +64,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "input.h"
#include "io.h"
#include "fish_version.h"
+#include "input_common.h"
+#include "wildcard.h"
/* PATH_MAX may not exist */
#ifndef PATH_MAX
@@ -484,6 +487,14 @@ int main(int argc, char **argv)
int res=1;
int my_optind=0;
+ // We can't do this at compile time due to the use of enum symbols.
+ assert(EXPAND_SENTINAL >= EXPAND_RESERVED_BASE &&
+ EXPAND_SENTINAL <= EXPAND_RESERVED_END);
+ assert(ANY_SENTINAL >= WILDCARD_RESERVED_BASE &&
+ ANY_SENTINAL <= WILDCARD_RESERVED_END);
+ assert(R_SENTINAL >= INPUT_COMMON_BASE &&
+ R_SENTINAL <= INPUT_COMMON_END);
+
set_main_thread();
setup_fork_guards();
diff --git a/src/highlight.cpp b/src/highlight.cpp
index 90426803c..3014ebdfa 100644
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@@ -335,7 +335,7 @@ static bool has_expand_reserved(const wcstring &str)
for (size_t i=0; i < str.size(); i++)
{
wchar_t wc = str.at(i);
- if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END)
+ if (wc >= EXPAND_RESERVED_BASE && wc <= EXPAND_RESERVED_END)
{
result = true;
break;
diff --git a/src/input.h b/src/input.h
index 36d49a51d..47315c1c4 100644
--- a/src/input.h
+++ b/src/input.h
@@ -16,77 +16,11 @@ inputrc information for key bindings.
#include "env.h"
#include "input_common.h"
-
#define DEFAULT_BIND_MODE L"default"
#define FISH_BIND_MODE_VAR L"fish_bind_mode"
-/**
- Key codes for inputrc-style keyboard functions that are passed on
- to the caller of input_read()
-
- NOTE: IF YOU MODIFY THIS YOU MUST UPDATE THE name_arr AND code_arr VARIABLES TO MATCH!
-*/
-enum
-{
- R_BEGINNING_OF_LINE = R_NULL+10, /* This give input_common ten slots for lowlevel keycodes */
- R_END_OF_LINE,
- R_FORWARD_CHAR,
- R_BACKWARD_CHAR,
- R_FORWARD_WORD,
- R_BACKWARD_WORD,
- R_FORWARD_BIGWORD,
- R_BACKWARD_BIGWORD,
- R_HISTORY_SEARCH_BACKWARD,
- R_HISTORY_SEARCH_FORWARD,
- R_DELETE_CHAR,
- R_BACKWARD_DELETE_CHAR,
- R_KILL_LINE,
- R_YANK,
- R_YANK_POP,
- R_COMPLETE,
- R_COMPLETE_AND_SEARCH,
- R_BEGINNING_OF_HISTORY,
- R_END_OF_HISTORY,
- R_BACKWARD_KILL_LINE,
- R_KILL_WHOLE_LINE,
- R_KILL_WORD,
- R_KILL_BIGWORD,
- R_BACKWARD_KILL_WORD,
- R_BACKWARD_KILL_PATH_COMPONENT,
- R_BACKWARD_KILL_BIGWORD,
- R_HISTORY_TOKEN_SEARCH_BACKWARD,
- R_HISTORY_TOKEN_SEARCH_FORWARD,
- R_SELF_INSERT,
- R_TRANSPOSE_CHARS,
- R_TRANSPOSE_WORDS,
- R_UPCASE_WORD,
- R_DOWNCASE_WORD,
- R_CAPITALIZE_WORD,
- R_VI_ARG_DIGIT,
- R_VI_DELETE_TO,
- R_EXECUTE,
- R_BEGINNING_OF_BUFFER,
- R_END_OF_BUFFER,
- R_REPAINT,
- R_FORCE_REPAINT,
- R_UP_LINE,
- R_DOWN_LINE,
- R_SUPPRESS_AUTOSUGGESTION,
- R_ACCEPT_AUTOSUGGESTION,
- R_BEGIN_SELECTION,
- R_END_SELECTION,
- R_KILL_SELECTION,
- R_FORWARD_JUMP,
- R_BACKWARD_JUMP,
- R_AND,
- R_CANCEL
-};
-
wcstring describe_char(wint_t c);
-#define R_MIN R_NULL
-#define R_MAX R_CANCEL
-
/**
Initialize the terminal by calling setupterm, and set up arrays
used by readch to detect escape sequences for special keys.
diff --git a/src/input_common.h b/src/input_common.h
index 72ccc073e..e605d18b7 100644
--- a/src/input_common.h
+++ b/src/input_common.h
@@ -8,22 +8,77 @@ Header file for the low level input library
#include
-/**
- Use unencoded private-use keycodes for internal characters
-*/
-#define INPUT_COMMON_RESERVED 0xe000
+#include "common.h"
enum
{
- /**
- R_NULL is sometimes returned by the input when a character was
- requested but none could be delivered, or when an exception
- happened.
- */
- R_NULL = INPUT_COMMON_RESERVED,
- R_EOF
-}
-;
+ R_MIN = INPUT_COMMON_BASE,
+ // R_NULL is sometimes returned by the input when a character was requested
+ // but none could be delivered, or when an exception happened.
+ R_NULL = R_MIN,
+ R_EOF,
+ // Key codes for inputrc-style keyboard functions that are passed on
+ // to the caller of input_read().
+ //
+ // NOTE: If you modify this sequence of symbols you must update the
+ // name_arr, code_arr and desc_arr variables in input.cpp to match!
+ R_BEGINNING_OF_LINE,
+ R_END_OF_LINE,
+ R_FORWARD_CHAR,
+ R_BACKWARD_CHAR,
+ R_FORWARD_WORD,
+ R_BACKWARD_WORD,
+ R_FORWARD_BIGWORD,
+ R_BACKWARD_BIGWORD,
+ R_HISTORY_SEARCH_BACKWARD,
+ R_HISTORY_SEARCH_FORWARD,
+ R_DELETE_CHAR,
+ R_BACKWARD_DELETE_CHAR,
+ R_KILL_LINE,
+ R_YANK,
+ R_YANK_POP,
+ R_COMPLETE,
+ R_COMPLETE_AND_SEARCH,
+ R_BEGINNING_OF_HISTORY,
+ R_END_OF_HISTORY,
+ R_BACKWARD_KILL_LINE,
+ R_KILL_WHOLE_LINE,
+ R_KILL_WORD,
+ R_KILL_BIGWORD,
+ R_BACKWARD_KILL_WORD,
+ R_BACKWARD_KILL_PATH_COMPONENT,
+ R_BACKWARD_KILL_BIGWORD,
+ R_HISTORY_TOKEN_SEARCH_BACKWARD,
+ R_HISTORY_TOKEN_SEARCH_FORWARD,
+ R_SELF_INSERT,
+ R_TRANSPOSE_CHARS,
+ R_TRANSPOSE_WORDS,
+ R_UPCASE_WORD,
+ R_DOWNCASE_WORD,
+ R_CAPITALIZE_WORD,
+ R_VI_ARG_DIGIT,
+ R_VI_DELETE_TO,
+ R_EXECUTE,
+ R_BEGINNING_OF_BUFFER,
+ R_END_OF_BUFFER,
+ R_REPAINT,
+ R_FORCE_REPAINT,
+ R_UP_LINE,
+ R_DOWN_LINE,
+ R_SUPPRESS_AUTOSUGGESTION,
+ R_ACCEPT_AUTOSUGGESTION,
+ R_BEGIN_SELECTION,
+ R_END_SELECTION,
+ R_KILL_SELECTION,
+ R_FORWARD_JUMP,
+ R_BACKWARD_JUMP,
+ R_AND,
+ R_CANCEL,
+ R_MAX = R_CANCEL,
+ // This is a special psuedo-char that is not used other than to mark the
+ // end of the the special characters so we can sanity check the enum range.
+ R_SENTINAL
+};
/**
Init the library
diff --git a/src/reader.cpp b/src/reader.cpp
index 3998e5a01..a7e8ec7e5 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -2964,16 +2964,20 @@ static int can_read(int fd)
return select(fd + 1, &fds, 0, 0, &can_read_timeout) == 1;
}
-/**
- Test if the specified character is in the private use area that
- fish uses to store internal characters
-
- Note: Allow U+F8FF because that's the Apple symbol, which is in the
- OS X US keyboard layout.
-*/
+// Test if the specified character is in a range that fish uses interally to
+// store special tokens.
+//
+// NOTE: This is used when tokenizing the input. It is also used when reading
+// input, before tokenization, to replace such chars with REPLACEMENT_WCHAR if
+// they're not part of a quoted string. We don't want external input to be able
+// to feed reserved characters into our lexer/parser or code evaluator.
+//
+// TODO: Actually implement the replacement as documented above.
static int wchar_private(wchar_t c)
{
- return ((c >= 0xe000) && (c < 0xf8ff));
+ return ((c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) ||
+ (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END) ||
+ (c >= INPUT_COMMON_BASE && c < INPUT_COMMON_END));
}
/**
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index ce28ecf07..4075c73b2 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -632,8 +632,8 @@ void tokenizer_t::tok_next()
/*fwprintf( stderr, L"End of string\n" );*/
this->has_next = false;
break;
- case 13: // carriage return
- case L'\n':
+ case L'\r': // carriage-return
+ case L'\n': // newline
case L';':
this->last_type = TOK_END;
this->buff++;
diff --git a/src/wildcard.h b/src/wildcard.h
index 3a9a869fe..3e1d2b6f8 100644
--- a/src/wildcard.h
+++ b/src/wildcard.h
@@ -18,27 +18,19 @@
#include "expand.h"
#include "complete.h"
-/*
- Use unencoded private-use keycodes for internal characters
-*/
-
-#define WILDCARD_RESERVED 0xf400
-
-/**
- Enumeration of all wildcard types
-*/
+// Enumeration of all wildcard types
enum
{
- /** Character representing any character except '/' */
- ANY_CHAR = WILDCARD_RESERVED,
-
- /** Character representing any character string not containing '/' (A slash) */
+ // Character representing any character except '/' (slash).
+ ANY_CHAR = WILDCARD_RESERVED_BASE,
+ // Character representing any character string not containing '/' (slash).
ANY_STRING,
-
- /** Character representing any character string */
+ // Character representing any character string.
ANY_STRING_RECURSIVE,
-}
-;
+ // This is a special psuedo-char that is not used other than to mark the
+ // end of the the special characters so we can sanity check the enum range.
+ ANY_SENTINAL
+};
/**
Expand the wildcard by matching against the filesystem.