mirror of
https://github.com/fish-shell/fish-shell
synced 2024-11-15 01:17:45 +00:00
Fully migrate to Rust escape string tests and code
Co-Authored-By: Mahmoud Al-Qudsi <mqudsi@neosmart.net>
This commit is contained in:
parent
0a4bcf7430
commit
595d593732
3 changed files with 49 additions and 291 deletions
|
@ -2375,16 +2375,31 @@ mod common_ffi {
|
||||||
type escape_string_style_t = crate::ffi::escape_string_style_t;
|
type escape_string_style_t = crate::ffi::escape_string_style_t;
|
||||||
}
|
}
|
||||||
extern "Rust" {
|
extern "Rust" {
|
||||||
fn rust_unescape_string(
|
#[cxx_name = "rust_unescape_string"]
|
||||||
|
fn unescape_string_ffi(
|
||||||
input: *const wchar_t,
|
input: *const wchar_t,
|
||||||
len: usize,
|
len: usize,
|
||||||
escape_special: u32,
|
escape_special: u32,
|
||||||
style: escape_string_style_t,
|
style: escape_string_style_t,
|
||||||
) -> UniquePtr<CxxWString>;
|
) -> UniquePtr<CxxWString>;
|
||||||
|
|
||||||
|
#[cxx_name = "rust_escape_string_script"]
|
||||||
|
fn escape_string_script_ffi(
|
||||||
|
input: *const wchar_t,
|
||||||
|
len: usize,
|
||||||
|
flags: u32,
|
||||||
|
) -> UniquePtr<CxxWString>;
|
||||||
|
|
||||||
|
#[cxx_name = "rust_escape_string_url"]
|
||||||
|
fn escape_string_url_ffi(input: *const wchar_t, len: usize) -> UniquePtr<CxxWString>;
|
||||||
|
|
||||||
|
#[cxx_name = "rust_escape_string_var"]
|
||||||
|
fn escape_string_var_ffi(input: *const wchar_t, len: usize) -> UniquePtr<CxxWString>;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rust_unescape_string(
|
fn unescape_string_ffi(
|
||||||
input: *const ffi::wchar_t,
|
input: *const ffi::wchar_t,
|
||||||
len: usize,
|
len: usize,
|
||||||
escape_special: u32,
|
escape_special: u32,
|
||||||
|
@ -2405,3 +2420,26 @@ fn rust_unescape_string(
|
||||||
None => UniquePtr::null(),
|
None => UniquePtr::null(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn escape_string_script_ffi(
|
||||||
|
input: *const ffi::wchar_t,
|
||||||
|
len: usize,
|
||||||
|
flags: u32,
|
||||||
|
) -> UniquePtr<CxxWString> {
|
||||||
|
let input = unsafe { slice::from_raw_parts(input, len) };
|
||||||
|
escape_string_script(
|
||||||
|
wstr::from_slice(input).unwrap(),
|
||||||
|
EscapeFlags::from_bits(flags).unwrap(),
|
||||||
|
)
|
||||||
|
.to_ffi()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn escape_string_var_ffi(input: *const ffi::wchar_t, len: usize) -> UniquePtr<CxxWString> {
|
||||||
|
let input = unsafe { slice::from_raw_parts(input, len) };
|
||||||
|
escape_string_var(wstr::from_slice(input).unwrap()).to_ffi()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn escape_string_url_ffi(input: *const ffi::wchar_t, len: usize) -> UniquePtr<CxxWString> {
|
||||||
|
let input = unsafe { slice::from_raw_parts(input, len) };
|
||||||
|
escape_string_url(wstr::from_slice(input).unwrap()).to_ffi()
|
||||||
|
}
|
||||||
|
|
223
src/common.cpp
223
src/common.cpp
|
@ -117,9 +117,6 @@ long convert_digit(wchar_t d, int base) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
|
|
||||||
static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
|
|
||||||
|
|
||||||
bool is_windows_subsystem_for_linux() {
|
bool is_windows_subsystem_for_linux() {
|
||||||
#if defined(WSL)
|
#if defined(WSL)
|
||||||
return true;
|
return true;
|
||||||
|
@ -723,51 +720,17 @@ wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize) {
|
||||||
|
|
||||||
/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
|
/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
|
||||||
static void escape_string_url(const wcstring &in, wcstring &out) {
|
static void escape_string_url(const wcstring &in, wcstring &out) {
|
||||||
const std::string narrow = wcs2string(in);
|
auto result = rust_escape_string_url(in.c_str(), in.size());
|
||||||
for (auto &c1 : narrow) {
|
if (result) {
|
||||||
// This silliness is so we get the correct result whether chars are signed or unsigned.
|
out = *result;
|
||||||
unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
|
|
||||||
if (!(c2 & 0x80) &&
|
|
||||||
(isalnum(c2) || c2 == '/' || c2 == '.' || c2 == '~' || c2 == '-' || c2 == '_')) {
|
|
||||||
// The above characters don't need to be encoded.
|
|
||||||
out.push_back(static_cast<wchar_t>(c2));
|
|
||||||
} else {
|
|
||||||
// All other chars need to have their UTF-8 representation encoded in hex.
|
|
||||||
wchar_t buf[4];
|
|
||||||
swprintf(buf, sizeof buf / sizeof buf[0], L"%%%02X", c2);
|
|
||||||
out.append(buf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
|
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
|
||||||
static void escape_string_var(const wcstring &in, wcstring &out) {
|
static void escape_string_var(const wcstring &in, wcstring &out) {
|
||||||
bool prev_was_hex_encoded = false;
|
auto result = rust_escape_string_var(in.c_str(), in.size());
|
||||||
const std::string narrow = wcs2string(in);
|
if (result) {
|
||||||
for (auto c1 : narrow) {
|
out = *result;
|
||||||
// This silliness is so we get the correct result whether chars are signed or unsigned.
|
|
||||||
unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
|
|
||||||
if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
|
|
||||||
// ASCII alphanumerics don't need to be encoded.
|
|
||||||
if (prev_was_hex_encoded) {
|
|
||||||
out.push_back(L'_');
|
|
||||||
prev_was_hex_encoded = false;
|
|
||||||
}
|
|
||||||
out.push_back(static_cast<wchar_t>(c2));
|
|
||||||
} else if (c2 == '_') {
|
|
||||||
// Underscores are encoded by doubling them.
|
|
||||||
out.append(L"__");
|
|
||||||
prev_was_hex_encoded = false;
|
|
||||||
} else {
|
|
||||||
// All other chars need to have their UTF-8 representation encoded in hex.
|
|
||||||
wchar_t buf[4];
|
|
||||||
swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
|
|
||||||
out.append(buf);
|
|
||||||
prev_was_hex_encoded = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (prev_was_hex_encoded) {
|
|
||||||
out.push_back(L'_');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -790,177 +753,9 @@ wcstring escape_string_for_double_quotes(wcstring in) {
|
||||||
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
|
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
|
||||||
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
|
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
|
||||||
escape_flags_t flags) {
|
escape_flags_t flags) {
|
||||||
const wchar_t *in = orig_in;
|
auto result = rust_escape_string_script(orig_in, in_len, flags);
|
||||||
const bool escape_printables = !(flags & ESCAPE_NO_PRINTABLES);
|
if (result) {
|
||||||
const bool no_quoted = static_cast<bool>(flags & ESCAPE_NO_QUOTED);
|
out = *result;
|
||||||
const bool no_tilde = static_cast<bool>(flags & ESCAPE_NO_TILDE);
|
|
||||||
const bool no_qmark = feature_test(feature_flag_t::qmark_noglob);
|
|
||||||
const bool symbolic = static_cast<bool>(flags & ESCAPE_SYMBOLIC) && (MB_CUR_MAX > 1);
|
|
||||||
assert((!symbolic || !escape_printables) && "symbolic implies escape-no-printables");
|
|
||||||
|
|
||||||
bool need_escape = false;
|
|
||||||
bool need_complex_escape = false;
|
|
||||||
|
|
||||||
if (!no_quoted && in_len == 0) {
|
|
||||||
out.assign(L"''");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < in_len; i++) {
|
|
||||||
if ((*in >= ENCODE_DIRECT_BASE) && (*in < ENCODE_DIRECT_BASE + 256)) {
|
|
||||||
int val = *in - ENCODE_DIRECT_BASE;
|
|
||||||
int tmp;
|
|
||||||
|
|
||||||
out += L'\\';
|
|
||||||
out += L'X';
|
|
||||||
|
|
||||||
tmp = val / 16;
|
|
||||||
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
|
||||||
|
|
||||||
tmp = val % 16;
|
|
||||||
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
wchar_t c = *in;
|
|
||||||
switch (c) {
|
|
||||||
case L'\t': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'␉';
|
|
||||||
else
|
|
||||||
out += L"\\t";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\n': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'';
|
|
||||||
else
|
|
||||||
out += L"\\n";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\b': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'␈';
|
|
||||||
else
|
|
||||||
out += L"\\b";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\r': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'␍';
|
|
||||||
else
|
|
||||||
out += L"\\r";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\x1B': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'␛';
|
|
||||||
else
|
|
||||||
out += L"\\e";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\x7F': {
|
|
||||||
if (symbolic)
|
|
||||||
out += L'␡';
|
|
||||||
else
|
|
||||||
out += L"\\x7f";
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case L'\\':
|
|
||||||
case L'\'': {
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
if (escape_printables || (c == L'\\' && !symbolic)) out += L'\\';
|
|
||||||
out += *in;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ANY_CHAR: {
|
|
||||||
// See #1614
|
|
||||||
out += L'?';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ANY_STRING: {
|
|
||||||
out += L'*';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ANY_STRING_RECURSIVE: {
|
|
||||||
out += L"**";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case L'&':
|
|
||||||
case L'$':
|
|
||||||
case L' ':
|
|
||||||
case L'#':
|
|
||||||
case L'<':
|
|
||||||
case L'>':
|
|
||||||
case L'(':
|
|
||||||
case L')':
|
|
||||||
case L'[':
|
|
||||||
case L']':
|
|
||||||
case L'{':
|
|
||||||
case L'}':
|
|
||||||
case L'?':
|
|
||||||
case L'*':
|
|
||||||
case L'|':
|
|
||||||
case L';':
|
|
||||||
case L'"':
|
|
||||||
case L'%':
|
|
||||||
case L'~': {
|
|
||||||
bool char_is_normal = (c == L'~' && no_tilde) || (c == L'?' && no_qmark);
|
|
||||||
if (!char_is_normal) {
|
|
||||||
need_escape = true;
|
|
||||||
if (escape_printables) out += L'\\';
|
|
||||||
}
|
|
||||||
out += *in;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default: {
|
|
||||||
if (*in >= 0 && *in < 32) {
|
|
||||||
need_escape = need_complex_escape = true;
|
|
||||||
|
|
||||||
if (symbolic) {
|
|
||||||
out += L'\u2400' + *in;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*in < 27 && *in != 0) {
|
|
||||||
out += L'\\';
|
|
||||||
out += L'c';
|
|
||||||
out += L'a' + *in - 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tmp = (*in) % 16;
|
|
||||||
out += L'\\';
|
|
||||||
out += L'x';
|
|
||||||
out += ((*in > 15) ? L'1' : L'0');
|
|
||||||
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
|
||||||
} else {
|
|
||||||
out += *in;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
in++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use quoted escaping if possible, since most people find it easier to read.
|
|
||||||
if (!no_quoted && need_escape && !need_complex_escape && escape_printables) {
|
|
||||||
wchar_t single_quote = L'\'';
|
|
||||||
out.clear();
|
|
||||||
out.reserve(2 + in_len);
|
|
||||||
out.push_back(single_quote);
|
|
||||||
out.append(orig_in, in_len);
|
|
||||||
out.push_back(single_quote);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -367,79 +367,6 @@ static void test_enum_array() {
|
||||||
do_test(es.at(test_enum::gamma) == "def");
|
do_test(es.at(test_enum::gamma) == "def");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test sane escapes.
|
|
||||||
static void test_unescape_sane() {
|
|
||||||
const struct test_t {
|
|
||||||
const wchar_t *input;
|
|
||||||
const wchar_t *expected;
|
|
||||||
} tests[] = {
|
|
||||||
{L"abcd", L"abcd"}, {L"'abcd'", L"abcd"},
|
|
||||||
{L"'abcd\\n'", L"abcd\\n"}, {L"\"abcd\\n\"", L"abcd\\n"},
|
|
||||||
{L"\"abcd\\n\"", L"abcd\\n"}, {L"\\143", L"c"},
|
|
||||||
{L"'\\143'", L"\\143"}, {L"\\n", L"\n"} // \n normally becomes newline
|
|
||||||
};
|
|
||||||
for (const auto &test : tests) {
|
|
||||||
auto output = unescape_string(test.input, UNESCAPE_DEFAULT);
|
|
||||||
if (!output) {
|
|
||||||
err(L"Failed to unescape '%ls'\n", test.input);
|
|
||||||
} else if (*output != test.expected) {
|
|
||||||
err(L"In unescaping '%ls', expected '%ls' but got '%ls'\n", test.input, test.expected,
|
|
||||||
output->c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for overflow.
|
|
||||||
if (unescape_string(L"echo \\UFFFFFF", UNESCAPE_DEFAULT)) {
|
|
||||||
err(L"Should not have been able to unescape \\UFFFFFF\n");
|
|
||||||
}
|
|
||||||
if (unescape_string(L"echo \\U110000", UNESCAPE_DEFAULT)) {
|
|
||||||
err(L"Should not have been able to unescape \\U110000\n");
|
|
||||||
}
|
|
||||||
#if WCHAR_MAX != 0xffff
|
|
||||||
// TODO: Make this work on MS Windows.
|
|
||||||
if (!unescape_string(L"echo \\U10FFFF", UNESCAPE_DEFAULT)) {
|
|
||||||
err(L"Should have been able to unescape \\U10FFFF\n");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Test the escaping/unescaping code by escaping/unescaping random strings and verifying that the
|
|
||||||
/// original string comes back.
|
|
||||||
static void test_escape_crazy() {
|
|
||||||
say(L"Testing escaping and unescaping");
|
|
||||||
wcstring random_string;
|
|
||||||
wcstring escaped_string;
|
|
||||||
for (size_t i = 0; i < ESCAPE_TEST_COUNT; i++) {
|
|
||||||
random_string.clear();
|
|
||||||
while (random() % ESCAPE_TEST_LENGTH) {
|
|
||||||
random_string.push_back((random() % ESCAPE_TEST_CHAR) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
escaped_string = escape_string(random_string);
|
|
||||||
auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
|
|
||||||
|
|
||||||
if (!unescaped_string) {
|
|
||||||
err(L"Failed to unescape string <%ls>", escaped_string.c_str());
|
|
||||||
break;
|
|
||||||
} else if (*unescaped_string != random_string) {
|
|
||||||
err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
|
|
||||||
random_string.c_str(), unescaped_string->c_str());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892.
|
|
||||||
random_string = L"line 1\\n\nline 2";
|
|
||||||
escaped_string = escape_string(random_string, ESCAPE_NO_PRINTABLES | ESCAPE_NO_QUOTED);
|
|
||||||
auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
|
|
||||||
if (!unescaped_string) {
|
|
||||||
err(L"Failed to unescape string <%ls>", escaped_string.c_str());
|
|
||||||
} else if (*unescaped_string != random_string) {
|
|
||||||
err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
|
|
||||||
random_string.c_str(), unescaped_string->c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_format() {
|
static void test_format() {
|
||||||
say(L"Testing formatting functions");
|
say(L"Testing formatting functions");
|
||||||
struct {
|
struct {
|
||||||
|
@ -6216,8 +6143,6 @@ static const test_t s_tests[]{
|
||||||
{TEST_GROUP("new_parser_ad_hoc"), test_new_parser_ad_hoc},
|
{TEST_GROUP("new_parser_ad_hoc"), test_new_parser_ad_hoc},
|
||||||
{TEST_GROUP("new_parser_errors"), test_new_parser_errors},
|
{TEST_GROUP("new_parser_errors"), test_new_parser_errors},
|
||||||
{TEST_GROUP("error_messages"), test_error_messages},
|
{TEST_GROUP("error_messages"), test_error_messages},
|
||||||
{TEST_GROUP("escape"), test_unescape_sane},
|
|
||||||
{TEST_GROUP("escape"), test_escape_crazy},
|
|
||||||
{TEST_GROUP("format"), test_format},
|
{TEST_GROUP("format"), test_format},
|
||||||
{TEST_GROUP("convert"), test_convert},
|
{TEST_GROUP("convert"), test_convert},
|
||||||
{TEST_GROUP("convert"), test_convert_private_use},
|
{TEST_GROUP("convert"), test_convert_private_use},
|
||||||
|
|
Loading…
Reference in a new issue