diff --git a/fish-rust/src/common.rs b/fish-rust/src/common.rs index b934660fa..816b212c0 100644 --- a/fish-rust/src/common.rs +++ b/fish-rust/src/common.rs @@ -2064,308 +2064,6 @@ pub fn fputws(s: &wstr, fd: RawFd) { wwrite_to_fd(s, fd); } -mod tests { - use super::*; - use crate::wchar::widestrs; - use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX}; - use rand::random; - - #[widestrs] - pub fn test_escape_string() { - let regex = |input| escape_string(input, EscapeStringStyle::Regex); - - // plain text should not be needlessly escaped - assert_eq!(regex("hello world!"L), "hello world!"L); - - // all the following are intended to be ultimately matched literally - even if they - // don't look like that's the intent - so we escape them. - assert_eq!(regex(".ext"L), "\\.ext"L); - assert_eq!(regex("{word}"L), "\\{word\\}"L); - assert_eq!(regex("hola-mundo"L), "hola\\-mundo"L); - assert_eq!( - regex("$17.42 is your total?"L), - "\\$17\\.42 is your total\\?"L - ); - assert_eq!( - regex("not really escaped\\?"L), - "not really escaped\\\\\\?"L - ); - } - - #[widestrs] - pub fn test_unescape_sane() { - const TEST_CASES: &[(&wstr, &wstr)] = &[ - ("abcd"L, "abcd"L), - ("'abcd'"L, "abcd"L), - ("'abcd\\n'"L, "abcd\\n"L), - ("\"abcd\\n\""L, "abcd\\n"L), - ("\"abcd\\n\""L, "abcd\\n"L), - ("\\143"L, "c"L), - ("'\\143'"L, "\\143"L), - ("\\n"L, "\n"L), // \n normally becomes newline - ]; - - for (input, expected) in TEST_CASES { - let Some(output) = unescape_string(input, UnescapeStringStyle::default()) else { - panic!("Failed to unescape string {input:?}"); - }; - - assert_eq!( - output, *expected, - "In unescaping {input:?}, expected {expected:?} but got {output:?}\n" - ); - } - } - - #[widestrs] - pub fn test_escape_var() { - const TEST_CASES: &[(&wstr, &wstr)] = &[ - (" a"L, "_20_a"L), - ("a B "L, "a_20_42_20_"L), - ("a b "L, "a_20_b_20_"L), - (" B"L, "_20_42_"L), - (" f"L, "_20_f"L), - (" 1"L, "_20_31_"L), - ("a\nghi_"L, "a_0A_ghi__"L), - ]; - - for (input, expected) in TEST_CASES { - let output = escape_string(input, EscapeStringStyle::Var); - - assert_eq!( - output, *expected, - "In escaping {input:?} with style var, expected {expected:?} but got {output:?}\n" - ); - } - } - - #[widestrs] - pub fn test_escape_crazy() { - let mut random_string = WString::new(); - let mut escaped_string; - for _ in 0..(ESCAPE_TEST_COUNT as u32) { - random_string.clear(); - while random::<usize>() % ESCAPE_TEST_LENGTH != 0 { - random_string - .push(char::from_u32((random::<u32>() % ESCAPE_TEST_CHAR as u32) + 1).unwrap()); - } - - for (escape_style, unescape_style) in [ - (EscapeStringStyle::default(), UnescapeStringStyle::default()), - (EscapeStringStyle::Var, UnescapeStringStyle::Var), - (EscapeStringStyle::Url, UnescapeStringStyle::Url), - ] { - escaped_string = escape_string(&random_string, escape_style); - let Some(unescaped_string) = unescape_string(&escaped_string, unescape_style) else { - let slice = escaped_string.as_char_slice(); - panic!("Failed to unescape string {slice:?} using style {unescape_style:?}"); - }; - assert_eq!(random_string, unescaped_string, "Escaped and then unescaped string {random_string:?}, but got back a different string {unescaped_string:?}. The intermediate escape looked like {escaped_string:?}. Using escape style {escape_style:?}"); - } - } - - // Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892. - random_string = "line 1\\n\nline 2"L.to_owned(); - escaped_string = escape_string( - &random_string, - EscapeStringStyle::Script(EscapeFlags::NO_PRINTABLES | EscapeFlags::NO_QUOTED), - ); - let Some(unescaped_string) = unescape_string(&escaped_string, UnescapeStringStyle::default()) else { - panic!("Failed to unescape string <{escaped_string}>"); - }; - - assert_eq!(random_string, unescaped_string, "Escaped and then unescaped string '{random_string}', but got back a different string '{unescaped_string}'"); - } - - /// The number of tests to run. - const ESCAPE_TEST_COUNT: usize = 100_000; - /// The average length of strings to unescape. - const ESCAPE_TEST_LENGTH: usize = 100; - /// The highest character number of character to try and escape. - const ESCAPE_TEST_CHAR: usize = 4000; - - /// Helper to convert a narrow string to a sequence of hex digits. - fn str2hex(input: &[u8]) -> String { - let mut output = "".to_string(); - for byte in input { - output += &format!("0x{:2X} ", *byte); - } - output - } - - /// Test wide/narrow conversion by creating random strings and verifying that the original - /// string comes back through double conversion. - pub fn test_convert() { - for _ in 0..ESCAPE_TEST_COUNT { - let mut origin: Vec<u8> = vec![]; - while (random::<usize>() % ESCAPE_TEST_LENGTH) != 0 { - let byte = random(); - origin.push(byte); - } - - let w = str2wcstring(&origin[..]); - let n = wcs2string(&w); - assert_eq!( - origin, - n, - "Conversion cycle of string:\n{:4} chars: {}\n\ - produced different string:\n\ - {:4} chars: {}", - origin.len(), - &str2hex(&origin), - n.len(), - &str2hex(&n) - ); - } - } - - /// Verify that ASCII narrow->wide conversions are correct. - pub fn test_convert_ascii() { - let mut s = vec![b'\0'; 4096]; - for (i, c) in s.iter_mut().enumerate() { - *c = u8::try_from(i % 10).unwrap() + b'0'; - } - - // Test a variety of alignments. - for left in 0..16 { - for right in 0..16 { - let len = s.len() - left - right; - let input = &s[left..left + len]; - let wide = str2wcstring(input); - let narrow = wcs2string(&wide); - assert_eq!(narrow, input); - } - } - - // Put some non-ASCII bytes in and ensure it all still works. - for i in 0..s.len() { - let saved = s[i]; - s[i] = 0xF7; - assert_eq!(wcs2string(&str2wcstring(&s)), s); - s[i] = saved; - } - } - - /// fish uses the private-use range to encode bytes that could not be decoded using the - /// user's locale. If the input could be decoded, but decoded to private-use codepoints, - /// then fish should also use the direct encoding for those bytes. Verify that characters - /// in the private use area are correctly round-tripped. See #7723. - pub fn test_convert_private_use() { - for c in ENCODE_DIRECT_BASE..ENCODE_DIRECT_END { - // Encode the char via the locale. Do not use fish functions which interpret these - // specially. - let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX]; - let mut state = zero_mbstate(); - let len = unsafe { - wcrtomb( - std::ptr::addr_of_mut!(converted[0]).cast(), - c as libc::wchar_t, - &mut state, - ) - }; - if len == 0_usize.wrapping_sub(1) { - // Could not be encoded in this locale. - continue; - } - let s = &converted[..len]; - - // Ask fish to decode this via str2wcstring. - // str2wcstring should notice that the decoded form collides with its private use - // and encode it directly. - let ws = str2wcstring(s); - - // Each byte should be encoded directly, and round tripping should work. - assert_eq!(ws.len(), s.len()); - assert_eq!(wcs2string(&ws), s); - } - } - - #[test] - fn test_scoped_push() { - use super::scoped_push; - struct Context { - value: i32, - } - - let mut value = 0; - let mut ctx = Context { value }; - { - let mut ctx = scoped_push(&mut ctx, |ctx| &mut ctx.value, value + 1); - value = ctx.value; - assert_eq!(value, 1); - { - let mut ctx = scoped_push(&mut ctx, |ctx| &mut ctx.value, value + 1); - assert_eq!(ctx.value, 2); - ctx.value = 5; - assert_eq!(ctx.value, 5); - } - assert_eq!(ctx.value, 1); - } - assert_eq!(ctx.value, 0); - } - - #[test] - fn test_scope_guard() { - use super::ScopeGuard; - let relaxed = std::sync::atomic::Ordering::Relaxed; - let counter = std::sync::atomic::AtomicUsize::new(0); - { - let guard = ScopeGuard::new(123, |arg| { - assert_eq!(*arg, 123); - counter.fetch_add(1, relaxed); - }); - assert_eq!(counter.load(relaxed), 0); - std::mem::drop(guard); - assert_eq!(counter.load(relaxed), 1); - } - // commit also invokes the callback. - { - let guard = ScopeGuard::new(123, |arg| { - assert_eq!(*arg, 123); - counter.fetch_add(1, relaxed); - }); - assert_eq!(counter.load(relaxed), 1); - let val = ScopeGuard::commit(guard); - assert_eq!(counter.load(relaxed), 2); - assert_eq!(val, 123); - } - } - - #[test] - fn test_scope_guard_consume() { - // The following pattern works. - use super::{scoped_push, ScopeGuarding}; - struct Storage { - value: &'static str, - } - let obj = Storage { value: "nu" }; - assert_eq!(obj.value, "nu"); - let obj = scoped_push(obj, |obj| &mut obj.value, "mu"); - assert_eq!(obj.value, "mu"); - let obj = scoped_push(obj, |obj| &mut obj.value, "mu2"); - assert_eq!(obj.value, "mu2"); - let obj = ScopeGuarding::commit(obj); - assert_eq!(obj.value, "mu"); - let obj = ScopeGuarding::commit(obj); - assert_eq!(obj.value, "nu"); - } - - pub fn test_assert_is_locked() { - let lock = std::sync::Mutex::new(()); - let _guard = lock.lock().unwrap(); - assert_is_locked!(&lock); - } -} - -crate::ffi_tests::add_test!("escape_string", tests::test_escape_string); -crate::ffi_tests::add_test!("escape_string", tests::test_escape_crazy); -crate::ffi_tests::add_test!("escape_string", tests::test_unescape_sane); -crate::ffi_tests::add_test!("escape_string", tests::test_escape_var); -crate::ffi_tests::add_test!("escape_string", tests::test_convert); -crate::ffi_tests::add_test!("escape_string", tests::test_convert_ascii); -crate::ffi_tests::add_test!("escape_string", tests::test_convert_private_use); -crate::ffi_tests::add_test!("assert_is_locked", tests::test_assert_is_locked); - #[cxx::bridge] mod common_ffi { extern "C++" { diff --git a/fish-rust/src/env_dispatch.rs b/fish-rust/src/env_dispatch.rs index 3e116c771..17c3ccba7 100644 --- a/fish-rust/src/env_dispatch.rs +++ b/fish-rust/src/env_dispatch.rs @@ -692,7 +692,7 @@ fn init_locale(vars: &EnvStack) { "C.UTF-8", "en_US.UTF-8", "en_GB.UTF-8", "de_DE.UTF-8", "C.utf8", "UTF-8", ]; - let old_msg_locale = unsafe { + let old_msg_locale: CString = unsafe { let old = libc::setlocale(libc::LC_MESSAGES, std::ptr::null()); // We have to make a copy because the subsequent setlocale() call to change the locale will // invalidate the pointer from this setlocale() call. diff --git a/fish-rust/src/tests/common.rs b/fish-rust/src/tests/common.rs new file mode 100644 index 000000000..004fda668 --- /dev/null +++ b/fish-rust/src/tests/common.rs @@ -0,0 +1,76 @@ +#[allow(unused_imports)] +use crate::common::{scoped_push, ScopeGuard, ScopeGuarding}; + +#[test] +fn test_scoped_push() { + struct Context { + value: i32, + } + + let mut value = 0; + let mut ctx = Context { value }; + { + let mut ctx = scoped_push(&mut ctx, |ctx| &mut ctx.value, value + 1); + value = ctx.value; + assert_eq!(value, 1); + { + let mut ctx = scoped_push(&mut ctx, |ctx| &mut ctx.value, value + 1); + assert_eq!(ctx.value, 2); + ctx.value = 5; + assert_eq!(ctx.value, 5); + } + assert_eq!(ctx.value, 1); + } + assert_eq!(ctx.value, 0); +} + +#[test] +fn test_scope_guard() { + let relaxed = std::sync::atomic::Ordering::Relaxed; + let counter = std::sync::atomic::AtomicUsize::new(0); + { + let guard = ScopeGuard::new(123, |arg| { + assert_eq!(*arg, 123); + counter.fetch_add(1, relaxed); + }); + assert_eq!(counter.load(relaxed), 0); + std::mem::drop(guard); + assert_eq!(counter.load(relaxed), 1); + } + // commit also invokes the callback. + { + let guard = ScopeGuard::new(123, |arg| { + assert_eq!(*arg, 123); + counter.fetch_add(1, relaxed); + }); + assert_eq!(counter.load(relaxed), 1); + let val = ScopeGuard::commit(guard); + assert_eq!(counter.load(relaxed), 2); + assert_eq!(val, 123); + } +} + +#[test] +fn test_scope_guard_consume() { + // The following pattern works. + struct Storage { + value: &'static str, + } + let obj = Storage { value: "nu" }; + assert_eq!(obj.value, "nu"); + let obj = scoped_push(obj, |obj| &mut obj.value, "mu"); + assert_eq!(obj.value, "mu"); + let obj = scoped_push(obj, |obj| &mut obj.value, "mu2"); + assert_eq!(obj.value, "mu2"); + let obj = ScopeGuarding::commit(obj); + assert_eq!(obj.value, "mu"); + let obj = ScopeGuarding::commit(obj); + assert_eq!(obj.value, "nu"); +} + +#[test] +fn test_assert_is_locked() { + let lock = std::sync::Mutex::new(()); + let _guard = lock.lock().unwrap(); + assert_is_locked!(&lock); +} diff --git a/fish-rust/src/tests/mod.rs b/fish-rust/src/tests/mod.rs index 46bb9838d..3310e7ead 100644 --- a/fish-rust/src/tests/mod.rs +++ b/fish-rust/src/tests/mod.rs @@ -1 +1,3 @@ +mod common; mod fd_monitor; +mod string_escape; diff --git a/fish-rust/src/tests/string_escape.rs b/fish-rust/src/tests/string_escape.rs new file mode 100644 index 000000000..d44038212 --- /dev/null +++ b/fish-rust/src/tests/string_escape.rs @@ -0,0 +1,243 @@ +#![allow(unused_imports)] +use crate::common::{ + escape_string, str2wcstring, unescape_string, wcs2string, EscapeFlags, EscapeStringStyle, + UnescapeStringStyle, ENCODE_DIRECT_BASE, ENCODE_DIRECT_END, +}; +use crate::wchar::{widestrs, wstr, WString}; +use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX}; +use rand::random; + +/// wcs2string is locale-dependent, so ensure we have a multibyte locale +/// before using it in a test. +/// This is only needed for the variable escape function. +fn setlocale() { + #[rustfmt::skip] + const UTF8_LOCALES: &[&str] = &[ + "C.UTF-8", "en_US.UTF-8", "en_GB.UTF-8", "de_DE.UTF-8", "C.utf8", "UTF-8", + ]; + for locale in UTF8_LOCALES { + let locale = std::ffi::CString::new(locale.to_owned()).unwrap(); + unsafe { libc::setlocale(libc::LC_CTYPE, locale.as_ptr()) }; + if crate::compat::MB_CUR_MAX() > 1 { + return; + } + } + panic!("No UTF-8 locale found"); +} + +#[widestrs] +#[test] +fn test_escape_string() { + let regex = |input| escape_string(input, EscapeStringStyle::Regex); + + // plain text should not be needlessly escaped + assert_eq!(regex("hello world!"L), "hello world!"L); + + // all the following are intended to be ultimately matched literally - even if they + // don't look like that's the intent - so we escape them. + assert_eq!(regex(".ext"L), "\\.ext"L); + assert_eq!(regex("{word}"L), "\\{word\\}"L); + assert_eq!(regex("hola-mundo"L), "hola\\-mundo"L); + assert_eq!( + regex("$17.42 is your total?"L), + "\\$17\\.42 is your total\\?"L + ); + assert_eq!( + regex("not really escaped\\?"L), + "not really escaped\\\\\\?"L + ); +} + +#[widestrs] +#[test] +pub fn test_unescape_sane() { + const TEST_CASES: &[(&wstr, &wstr)] = &[ + ("abcd"L, "abcd"L), + ("'abcd'"L, "abcd"L), + ("'abcd\\n'"L, "abcd\\n"L), + ("\"abcd\\n\""L, "abcd\\n"L), + ("\"abcd\\n\""L, "abcd\\n"L), + ("\\143"L, "c"L), + ("'\\143'"L, "\\143"L), + ("\\n"L, "\n"L), // \n normally becomes newline + ]; + + for (input, expected) in TEST_CASES { + let Some(output) = unescape_string(input, UnescapeStringStyle::default()) else { + panic!("Failed to unescape string {input:?}"); + }; + + assert_eq!( + output, *expected, + "In unescaping {input:?}, expected {expected:?} but got {output:?}\n" + ); + } +} + +#[widestrs] +#[test] +fn test_escape_var() { + const TEST_CASES: &[(&wstr, &wstr)] = &[ + (" a"L, "_20_a"L), + ("a B "L, "a_20_42_20_"L), + ("a b "L, "a_20_b_20_"L), + (" B"L, "_20_42_"L), + (" f"L, "_20_f"L), + (" 1"L, "_20_31_"L), + ("a\nghi_"L, "a_0A_ghi__"L), + ]; + + for (input, expected) in TEST_CASES { + let output = escape_string(input, EscapeStringStyle::Var); + + assert_eq!( + output, *expected, + "In escaping {input:?} with style var, expected {expected:?} but got {output:?}\n" + ); + } +} + +#[widestrs] +#[test] +fn test_escape_crazy() { + setlocale(); + let mut random_string = WString::new(); + let mut escaped_string; + for _ in 0..(ESCAPE_TEST_COUNT as u32) { + random_string.clear(); + while random::<usize>() % ESCAPE_TEST_LENGTH != 0 { + random_string + .push(char::from_u32((random::<u32>() % ESCAPE_TEST_CHAR as u32) + 1).unwrap()); + } + + for (escape_style, unescape_style) in [ + (EscapeStringStyle::default(), UnescapeStringStyle::default()), + (EscapeStringStyle::Var, UnescapeStringStyle::Var), + (EscapeStringStyle::Url, UnescapeStringStyle::Url), + ] { + escaped_string = escape_string(&random_string, escape_style); + let Some(unescaped_string) = unescape_string(&escaped_string, unescape_style) else { + let slice = escaped_string.as_char_slice(); + panic!("Failed to unescape string {slice:?} using style {unescape_style:?}"); + }; + assert_eq!(random_string, unescaped_string, "Escaped and then unescaped string {random_string:?}, but got back a different string {unescaped_string:?}. The intermediate escape looked like {escaped_string:?}. Using escape style {escape_style:?}"); + } + } + + // Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892. + random_string = "line 1\\n\nline 2"L.to_owned(); + escaped_string = escape_string( + &random_string, + EscapeStringStyle::Script(EscapeFlags::NO_PRINTABLES | EscapeFlags::NO_QUOTED), + ); + let Some(unescaped_string) = unescape_string(&escaped_string, UnescapeStringStyle::default()) else { + panic!("Failed to unescape string <{escaped_string}>"); + }; + + assert_eq!(random_string, unescaped_string, "Escaped and then unescaped string '{random_string}', but got back a different string '{unescaped_string}'"); +} + +/// The number of tests to run. +const ESCAPE_TEST_COUNT: usize = 100_000; +/// The average length of strings to unescape. +const ESCAPE_TEST_LENGTH: usize = 100; +/// The highest character number of character to try and escape. +const ESCAPE_TEST_CHAR: usize = 4000; + +/// Helper to convert a narrow string to a sequence of hex digits. +fn str2hex(input: &[u8]) -> String { + let mut output = "".to_string(); + for byte in input { + output += &format!("0x{:2X} ", *byte); + } + output +} + +/// Test wide/narrow conversion by creating random strings and verifying that the original +/// string comes back through double conversion. +#[test] +fn test_convert() { + for _ in 0..ESCAPE_TEST_COUNT { + let mut origin: Vec<u8> = vec![]; + while (random::<usize>() % ESCAPE_TEST_LENGTH) != 0 { + let byte = random(); + origin.push(byte); + } + + let w = str2wcstring(&origin[..]); + let n = wcs2string(&w); + assert_eq!( + origin, + n, + "Conversion cycle of string:\n{:4} chars: {}\n\ + produced different string:\n\ + {:4} chars: {}", + origin.len(), + &str2hex(&origin), + n.len(), + &str2hex(&n) + ); + } +} + +/// Verify that ASCII narrow->wide conversions are correct. +pub fn test_convert_ascii() { + let mut s = vec![b'\0'; 4096]; + for (i, c) in s.iter_mut().enumerate() { + *c = u8::try_from(i % 10).unwrap() + b'0'; + } + + // Test a variety of alignments. + for left in 0..16 { + for right in 0..16 { + let len = s.len() - left - right; + let input = &s[left..left + len]; + let wide = str2wcstring(input); + let narrow = wcs2string(&wide); + assert_eq!(narrow, input); + } + } + + // Put some non-ASCII bytes in and ensure it all still works. + for i in 0..s.len() { + let saved = s[i]; + s[i] = 0xF7; + assert_eq!(wcs2string(&str2wcstring(&s)), s); + s[i] = saved; + } +} + +/// fish uses the private-use range to encode bytes that could not be decoded using the +/// user's locale. If the input could be decoded, but decoded to private-use codepoints, +/// then fish should also use the direct encoding for those bytes. Verify that characters +/// in the private use area are correctly round-tripped. See #7723. +#[test] +fn test_convert_private_use() { + for c in ENCODE_DIRECT_BASE..ENCODE_DIRECT_END { + // Encode the char via the locale. Do not use fish functions which interpret these + // specially. + let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX]; + let mut state = zero_mbstate(); + let len = unsafe { + wcrtomb( + std::ptr::addr_of_mut!(converted[0]).cast(), + c as libc::wchar_t, + &mut state, + ) + }; + if len == 0_usize.wrapping_sub(1) { + // Could not be encoded in this locale. + continue; + } + let s = &converted[..len]; + + // Ask fish to decode this via str2wcstring. + // str2wcstring should notice that the decoded form collides with its private use + // and encode it directly. + let ws = str2wcstring(s); + + // Each byte should be encoded directly, and round tripping should work. + assert_eq!(ws.len(), s.len()); + assert_eq!(wcs2string(&ws), s); + } +}