mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-13 05:28:49 +00:00
deal with broken unicode implementations
Both GNU and BSD have bugs regarding the classification of non-characters and private use area characters. Provide wrappers around iswalnum(), iswalpha(), and isgraph() to provide a consistent experience. We don't bother to autoconf the use of these wrappers for several reasons. Including the fact that a binary built for one distro release should behave correctly on another release (e.g., FreeBSD 10 does the right thing while FreeBSD 11 and 12 do not with respect to iswalnum() of code points in the range 0xFDD0..0xFDFF). Also move a few functions from common.* to wutil.* because they are wide char specific and really belong in the latter module. Fixes #3050
This commit is contained in:
parent
01fa31f313
commit
92dd6de73c
4 changed files with 104 additions and 41 deletions
|
@ -428,26 +428,6 @@ void append_format(wcstring &str, const wchar_t *format, ...) {
|
|||
va_end(va);
|
||||
}
|
||||
|
||||
const wchar_t *wcsvarname(const wchar_t *str) {
|
||||
while (*str) {
|
||||
if ((!iswalnum(*str)) && (*str != L'_')) {
|
||||
return str;
|
||||
}
|
||||
str++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const wchar_t *wcsvarname(const wcstring &str) { return wcsvarname(str.c_str()); }
|
||||
|
||||
const wchar_t *wcsfuncname(const wcstring &str) { return wcschr(str.c_str(), L'/'); }
|
||||
|
||||
bool wcsvarchr(wchar_t chr) { return iswalnum(chr) || chr == L'_'; }
|
||||
|
||||
int fish_wcswidth(const wchar_t *str) { return fish_wcswidth(str, wcslen(str)); }
|
||||
|
||||
int fish_wcswidth(const wcstring &str) { return fish_wcswidth(str.c_str(), str.size()); }
|
||||
|
||||
wchar_t *quote_end(const wchar_t *pos) {
|
||||
wchar_t c = *pos;
|
||||
|
||||
|
|
21
src/common.h
21
src/common.h
|
@ -617,27 +617,6 @@ wcstring vformat_string(const wchar_t *format, va_list va_orig);
|
|||
void append_format(wcstring &str, const wchar_t *format, ...);
|
||||
void append_formatv(wcstring &str, const wchar_t *format, va_list ap);
|
||||
|
||||
/// Test if the given string is a valid variable name.
|
||||
///
|
||||
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
|
||||
const wchar_t *wcsvarname(const wchar_t *str);
|
||||
const wchar_t *wcsvarname(const wcstring &str);
|
||||
|
||||
/// Test if the given string is a valid function name.
|
||||
///
|
||||
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
|
||||
const wchar_t *wcsfuncname(const wcstring &str);
|
||||
|
||||
/// Test if the given string is valid in a variable name.
|
||||
///
|
||||
/// \return true if this is a valid name, false otherwise.
|
||||
bool wcsvarchr(wchar_t chr);
|
||||
|
||||
/// Convenience variants on fish_wcwswidth().
|
||||
///
|
||||
/// See fallback.h for the normal definitions.
|
||||
int fish_wcswidth(const wchar_t *str);
|
||||
int fish_wcswidth(const wcstring &str);
|
||||
|
||||
/// This functions returns the end of the quoted substring beginning at \c in. The type of quoting
|
||||
/// character is detemrined by examining \c in. Returns 0 on error.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// Wide character equivalents of various standard unix functions.
|
||||
#define FISH_NO_ISW_WRAPPERS
|
||||
#include "config.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
@ -470,6 +471,84 @@ int wrename(const wcstring &old, const wcstring &newv) {
|
|||
return rename(old_narrow.c_str(), new_narrow.c_str());
|
||||
}
|
||||
|
||||
/// Return one if the code point is in the range we reserve for internal use.
|
||||
int fish_is_reserved_codepoint(wint_t wc) {
|
||||
if (RESERVED_CHAR_BASE <= wc && wc < RESERVED_CHAR_END) return 1;
|
||||
if (EXPAND_RESERVED_BASE <= wc && wc < EXPAND_RESERVED_END) return 1;
|
||||
if (WILDCARD_RESERVED_BASE <= wc && wc < WILDCARD_RESERVED_END) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Return one if the code point is in a Unicode private use area.
|
||||
int fish_is_pua(wint_t wc) {
|
||||
if (PUA1_START <= wc && wc < PUA1_END) return 1;
|
||||
if (PUA2_START <= wc && wc < PUA2_END) return 1;
|
||||
if (PUA3_START <= wc && wc < PUA3_END) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// We need this because there are too many implementations that don't return the proper answer for
|
||||
/// some code points. See issue #3050.
|
||||
int fish_iswalnum(wint_t wc) {
|
||||
if (fish_is_reserved_codepoint(wc)) return 0;
|
||||
if (fish_is_pua(wc)) return 0;
|
||||
return iswalnum(wc);
|
||||
}
|
||||
|
||||
/// We need this because there are too many implementations that don't return the proper answer for
|
||||
/// some code points. See issue #3050.
|
||||
int fish_iswalpha(wint_t wc) {
|
||||
if (fish_is_reserved_codepoint(wc)) return 0;
|
||||
if (fish_is_pua(wc)) return 0;
|
||||
return iswalpha(wc);
|
||||
}
|
||||
|
||||
/// We need this because there are too many implementations that don't return the proper answer for
|
||||
/// some code points. See issue #3050.
|
||||
int fish_iswgraph(wint_t wc) {
|
||||
if (fish_is_reserved_codepoint(wc)) return 0;
|
||||
if (fish_is_pua(wc)) return 1;
|
||||
return iswgraph(wc);
|
||||
}
|
||||
|
||||
/// Test if the given string is a valid variable name.
|
||||
///
|
||||
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
|
||||
const wchar_t *wcsvarname(const wchar_t *str) {
|
||||
while (*str) {
|
||||
if ((!fish_iswalnum(*str)) && (*str != L'_')) {
|
||||
return str;
|
||||
}
|
||||
str++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// Test if the given string is a valid variable name.
|
||||
///
|
||||
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
|
||||
const wchar_t *wcsvarname(const wcstring &str) { return wcsvarname(str.c_str()); }
|
||||
|
||||
/// Test if the given string is a valid function name.
|
||||
///
|
||||
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
|
||||
const wchar_t *wcsfuncname(const wcstring &str) { return wcschr(str.c_str(), L'/'); }
|
||||
|
||||
/// Test if the given string is valid in a variable name.
|
||||
///
|
||||
/// \return true if this is a valid name, false otherwise.
|
||||
bool wcsvarchr(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
|
||||
|
||||
/// Convenience variants on fish_wcwswidth().
|
||||
///
|
||||
/// See fallback.h for the normal definitions.
|
||||
int fish_wcswidth(const wchar_t *str) { return fish_wcswidth(str, wcslen(str)); }
|
||||
|
||||
/// Convenience variants on fish_wcwswidth().
|
||||
///
|
||||
/// See fallback.h for the normal definitions.
|
||||
int fish_wcswidth(const wcstring &str) { return fish_wcswidth(str.c_str(), str.size()); }
|
||||
|
||||
file_id_t file_id_t::file_id_from_stat(const struct stat *buf) {
|
||||
assert(buf != NULL);
|
||||
|
||||
|
|
25
src/wutil.h
25
src/wutil.h
|
@ -59,6 +59,31 @@ int wmkdir(const wcstring &dir, int mode);
|
|||
|
||||
int wrename(const wcstring &oldName, const wcstring &newName);
|
||||
|
||||
#define PUA1_START 0xE000
|
||||
#define PUA1_END 0xF900
|
||||
#define PUA2_START 0xF0000
|
||||
#define PUA2_END 0xFFFFE
|
||||
#define PUA3_START 0x100000
|
||||
#define PUA3_END 0x10FFFE
|
||||
|
||||
// We need this because there are too many implementations that don't return the proper answer for
|
||||
// some code points. See issue #3050.
|
||||
#ifndef FISH_NO_ISW_WRAPPERS
|
||||
#define iswalnum fish_iswalnum
|
||||
#define iswalpha fish_iswalpha
|
||||
#define iswgraph fish_iswgraph
|
||||
#endif
|
||||
int fish_iswalnum(wint_t wc);
|
||||
int fish_iswalpha(wint_t wc);
|
||||
int fish_iswgraph(wint_t wc);
|
||||
|
||||
const wchar_t *wcsvarname(const wchar_t *str);
|
||||
const wchar_t *wcsvarname(const wcstring &str);
|
||||
const wchar_t *wcsfuncname(const wcstring &str);
|
||||
bool wcsvarchr(wchar_t chr);
|
||||
int fish_wcswidth(const wchar_t *str);
|
||||
int fish_wcswidth(const wcstring &str);
|
||||
|
||||
/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among
|
||||
/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux
|
||||
/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted (ABA
|
||||
|
|
Loading…
Reference in a new issue