2023-03-19 15:54:07 +00:00
|
|
|
use std::{iter, slice};
|
|
|
|
|
2023-12-02 15:26:45 +00:00
|
|
|
use crate::{
|
|
|
|
common::subslice_position,
|
|
|
|
wchar::{wstr, WString},
|
2024-01-17 06:41:47 +00:00
|
|
|
L,
|
2023-12-02 15:26:45 +00:00
|
|
|
};
|
2023-01-14 22:56:24 +00:00
|
|
|
use widestring::utfstr::CharsUtf32;
|
|
|
|
|
2023-03-19 03:11:18 +00:00
|
|
|
/// Helpers to convert things to widestring.
|
|
|
|
/// This is like std::string::ToString.
|
|
|
|
pub trait ToWString {
|
|
|
|
fn to_wstring(&self) -> WString;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn to_wstring_impl(mut val: u64, neg: bool) -> WString {
|
|
|
|
// 20 digits max in u64: 18446744073709551616.
|
|
|
|
let mut digits = [0; 24];
|
|
|
|
let mut ndigits = 0;
|
|
|
|
while val > 0 {
|
|
|
|
digits[ndigits] = (val % 10) as u8;
|
|
|
|
val /= 10;
|
|
|
|
ndigits += 1;
|
|
|
|
}
|
|
|
|
if ndigits == 0 {
|
|
|
|
digits[0] = 0;
|
|
|
|
ndigits = 1;
|
|
|
|
}
|
|
|
|
let mut result = WString::with_capacity(ndigits + neg as usize);
|
|
|
|
if neg {
|
|
|
|
result.push('-');
|
|
|
|
}
|
|
|
|
for i in (0..ndigits).rev() {
|
|
|
|
result.push((digits[i] + b'0') as char);
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement to_wstring() for signed types.
|
|
|
|
macro_rules! impl_to_wstring_signed {
|
2023-03-20 07:15:21 +00:00
|
|
|
($($t:ty), *) => {
|
|
|
|
$(
|
2023-03-19 03:11:18 +00:00
|
|
|
impl ToWString for $t {
|
|
|
|
fn to_wstring(&self) -> WString {
|
|
|
|
let val = *self as i64;
|
|
|
|
to_wstring_impl(val.unsigned_abs(), val < 0)
|
|
|
|
}
|
|
|
|
}
|
2023-03-20 07:15:21 +00:00
|
|
|
)*
|
2023-03-19 03:11:18 +00:00
|
|
|
};
|
|
|
|
}
|
2023-03-20 07:15:21 +00:00
|
|
|
impl_to_wstring_signed!(i8, i16, i32, i64, isize);
|
2023-03-19 03:11:18 +00:00
|
|
|
|
|
|
|
/// Implement to_wstring() for unsigned types.
|
|
|
|
macro_rules! impl_to_wstring_unsigned {
|
2023-03-20 07:15:21 +00:00
|
|
|
($($t:ty), *) => {
|
|
|
|
$(
|
2023-03-19 03:11:18 +00:00
|
|
|
impl ToWString for $t {
|
|
|
|
fn to_wstring(&self) -> WString {
|
|
|
|
to_wstring_impl(*self as u64, false)
|
|
|
|
}
|
|
|
|
}
|
2023-03-20 07:15:21 +00:00
|
|
|
)*
|
2023-03-19 03:11:18 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-12-22 11:27:01 +00:00
|
|
|
impl_to_wstring_unsigned!(u8, u16, u32, u64, u128, usize);
|
2023-03-19 03:11:18 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_to_wstring() {
|
|
|
|
assert_eq!(0_u64.to_wstring(), "0");
|
|
|
|
assert_eq!(1_u64.to_wstring(), "1");
|
|
|
|
assert_eq!(0_i64.to_wstring(), "0");
|
|
|
|
assert_eq!(1_i64.to_wstring(), "1");
|
|
|
|
assert_eq!((-1_i64).to_wstring(), "-1");
|
|
|
|
assert_eq!((-5_i64).to_wstring(), "-5");
|
|
|
|
let mut val: i64 = 1;
|
|
|
|
loop {
|
|
|
|
assert_eq!(val.to_wstring(), val.to_string());
|
2023-08-24 14:11:40 +00:00
|
|
|
let Some(next) = val.checked_mul(-3) else {
|
|
|
|
break;
|
|
|
|
};
|
2023-03-19 03:11:18 +00:00
|
|
|
val = next;
|
|
|
|
}
|
|
|
|
assert_eq!(u64::MAX.to_wstring(), "18446744073709551615");
|
|
|
|
assert_eq!(i64::MIN.to_wstring(), "-9223372036854775808");
|
|
|
|
assert_eq!(i64::MAX.to_wstring(), "9223372036854775807");
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:11 +00:00
|
|
|
/// A trait for a thing that can produce a double-ended, cloneable
|
|
|
|
/// iterator of chars.
|
|
|
|
/// Common implementations include char, &str, &wstr, &WString.
|
|
|
|
pub trait IntoCharIter {
|
|
|
|
type Iter: DoubleEndedIterator<Item = char> + Clone;
|
2023-01-14 22:56:24 +00:00
|
|
|
fn chars(self) -> Self::Iter;
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:11 +00:00
|
|
|
impl IntoCharIter for char {
|
2023-01-14 22:56:24 +00:00
|
|
|
type Iter = std::iter::Once<char>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
std::iter::once(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:11 +00:00
|
|
|
impl<'a> IntoCharIter for &'a str {
|
2023-01-14 22:56:24 +00:00
|
|
|
type Iter = std::str::Chars<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
str::chars(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-19 15:54:07 +00:00
|
|
|
impl<'a> IntoCharIter for &'a [char] {
|
|
|
|
type Iter = iter::Copied<slice::Iter<'a, char>>;
|
|
|
|
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self.iter().copied()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:11 +00:00
|
|
|
impl<'a> IntoCharIter for &'a wstr {
|
2023-01-14 22:56:24 +00:00
|
|
|
type Iter = CharsUtf32<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
wstr::chars(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:11 +00:00
|
|
|
impl<'a> IntoCharIter for &'a WString {
|
2023-01-14 22:56:24 +00:00
|
|
|
type Iter = CharsUtf32<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
2023-02-04 23:45:25 +00:00
|
|
|
wstr::chars(self)
|
2023-01-14 22:56:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 03:52:12 +00:00
|
|
|
// Also support `str.chars()` itself.
|
|
|
|
impl<'a> IntoCharIter for std::str::Chars<'a> {
|
|
|
|
type Iter = Self;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Also support `wstr.chars()` itself.
|
|
|
|
impl<'a> IntoCharIter for CharsUtf32<'a> {
|
|
|
|
type Iter = Self;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:58:10 +00:00
|
|
|
/// Return true if `prefix` is a prefix of `contents`.
|
2023-02-04 23:45:25 +00:00
|
|
|
fn iter_prefixes_iter<Prefix, Contents>(prefix: Prefix, mut contents: Contents) -> bool
|
2023-01-14 22:56:24 +00:00
|
|
|
where
|
|
|
|
Prefix: Iterator,
|
|
|
|
Contents: Iterator,
|
|
|
|
Prefix::Item: PartialEq<Contents::Item>,
|
|
|
|
{
|
2023-02-04 23:45:25 +00:00
|
|
|
for c1 in prefix {
|
2023-01-14 22:56:24 +00:00
|
|
|
match contents.next() {
|
|
|
|
Some(c2) if c1 == c2 => {}
|
|
|
|
_ => return false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
true
|
|
|
|
}
|
|
|
|
|
2023-04-09 20:41:04 +00:00
|
|
|
/// Iterator type for splitting a wide string on a char.
|
|
|
|
pub struct WStrCharSplitIter<'a> {
|
|
|
|
split: char,
|
2023-04-24 02:33:10 +00:00
|
|
|
chars: Option<&'a [char]>,
|
2023-04-09 20:41:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator for WStrCharSplitIter<'a> {
|
|
|
|
type Item = &'a wstr;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2023-04-24 02:33:10 +00:00
|
|
|
let chars = self.chars?;
|
|
|
|
if let Some(idx) = chars.iter().position(|c| *c == self.split) {
|
|
|
|
let (prefix, rest) = chars.split_at(idx);
|
|
|
|
self.chars = Some(&rest[1..]);
|
2023-04-09 20:41:04 +00:00
|
|
|
return Some(wstr::from_char_slice(prefix));
|
|
|
|
} else {
|
2023-04-24 02:33:10 +00:00
|
|
|
self.chars = None;
|
|
|
|
return Some(wstr::from_char_slice(chars));
|
2023-04-09 20:41:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-14 22:56:24 +00:00
|
|
|
/// Convenience functions for WString.
|
|
|
|
pub trait WExt {
|
|
|
|
/// Access the chars of a WString or wstr.
|
|
|
|
fn as_char_slice(&self) -> &[char];
|
|
|
|
|
2023-04-01 17:17:49 +00:00
|
|
|
/// Return a char slice from a *char index*.
|
|
|
|
/// This is different from Rust string slicing, which takes a byte index.
|
|
|
|
fn slice_from(&self, start: usize) -> &wstr {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
wstr::from_char_slice(&chars[start..])
|
|
|
|
}
|
|
|
|
|
2023-04-30 19:38:06 +00:00
|
|
|
/// Return a char slice up to a *char index*.
|
|
|
|
/// This is different from Rust string slicing, which takes a byte index.
|
|
|
|
fn slice_to(&self, end: usize) -> &wstr {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
wstr::from_char_slice(&chars[..end])
|
|
|
|
}
|
|
|
|
|
2023-04-24 02:33:10 +00:00
|
|
|
/// Return the number of chars.
|
|
|
|
/// This is different from Rust string len, which returns the number of bytes.
|
|
|
|
fn char_count(&self) -> usize {
|
|
|
|
self.as_char_slice().len()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the char at an index.
|
2023-01-14 22:56:24 +00:00
|
|
|
/// If the index is equal to the length, return '\0'.
|
|
|
|
/// If the index exceeds the length, then panic.
|
|
|
|
fn char_at(&self, index: usize) -> char {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
if index == chars.len() {
|
|
|
|
'\0'
|
|
|
|
} else {
|
|
|
|
chars[index]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-09 18:12:16 +00:00
|
|
|
/// Return the char at an index.
|
|
|
|
/// If the index is equal to the length, return '\0'.
|
|
|
|
/// If the index exceeds the length, return None.
|
|
|
|
fn try_char_at(&self, index: usize) -> Option<char> {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
match index {
|
|
|
|
_ if index == chars.len() => Some('\0'),
|
|
|
|
_ if index > chars.len() => None,
|
|
|
|
_ => Some(chars[index]),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:58:10 +00:00
|
|
|
/// Return an iterator over substrings, split by a given char.
|
2023-04-09 20:41:04 +00:00
|
|
|
/// The split char is not included in the substrings.
|
|
|
|
fn split(&self, c: char) -> WStrCharSplitIter {
|
|
|
|
WStrCharSplitIter {
|
|
|
|
split: c,
|
2023-04-24 02:33:10 +00:00
|
|
|
chars: Some(self.as_char_slice()),
|
2023-04-09 20:41:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-16 18:17:34 +00:00
|
|
|
/// Returns the index of the first match against the provided substring or `None`.
|
|
|
|
fn find(&self, search: impl AsRef<[char]>) -> Option<usize> {
|
2024-01-19 05:08:18 +00:00
|
|
|
subslice_position(self.as_char_slice(), search.as_ref())
|
2023-05-16 18:17:34 +00:00
|
|
|
}
|
|
|
|
|
2023-12-02 15:26:45 +00:00
|
|
|
/// Replaces all matches of a pattern with another string.
|
|
|
|
fn replace(&self, from: impl AsRef<[char]>, to: &wstr) -> WString {
|
|
|
|
let from = from.as_ref();
|
|
|
|
let mut s = self.as_char_slice().to_vec();
|
|
|
|
let mut offset = 0;
|
|
|
|
while let Some(relpos) = subslice_position(&s[offset..], from) {
|
|
|
|
offset += relpos;
|
|
|
|
s.splice(offset..(offset + from.len()), to.chars());
|
|
|
|
offset += to.len();
|
|
|
|
}
|
|
|
|
WString::from_chars(s)
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:58:10 +00:00
|
|
|
/// Return the index of the first occurrence of the given char, or None.
|
2023-01-14 22:56:24 +00:00
|
|
|
fn find_char(&self, c: char) -> Option<usize> {
|
|
|
|
self.as_char_slice().iter().position(|&x| x == c)
|
|
|
|
}
|
|
|
|
|
2023-04-18 09:53:48 +00:00
|
|
|
fn contains(&self, c: char) -> bool {
|
|
|
|
self.as_char_slice().iter().any(|&x| x == c)
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:58:10 +00:00
|
|
|
/// Return whether we start with a given Prefix.
|
2023-01-14 22:56:24 +00:00
|
|
|
/// The Prefix can be a char, a &str, a &wstr, or a &WString.
|
2023-03-06 03:52:11 +00:00
|
|
|
fn starts_with<Prefix: IntoCharIter>(&self, prefix: Prefix) -> bool {
|
2023-01-14 22:56:24 +00:00
|
|
|
iter_prefixes_iter(prefix.chars(), self.as_char_slice().iter().copied())
|
|
|
|
}
|
|
|
|
|
2024-04-20 08:49:13 +00:00
|
|
|
fn strip_prefix<Prefix: IntoCharIter>(&self, prefix: Prefix) -> Option<&wstr> {
|
2024-03-30 15:10:12 +00:00
|
|
|
let iter = prefix.chars();
|
|
|
|
let prefix_len = iter.clone().count();
|
2024-04-20 08:49:13 +00:00
|
|
|
iter_prefixes_iter(iter, self.as_char_slice().iter().copied())
|
|
|
|
.then(|| self.slice_from(prefix_len))
|
2024-03-30 15:10:12 +00:00
|
|
|
}
|
|
|
|
|
2024-05-06 19:58:10 +00:00
|
|
|
/// Return whether we end with a given Suffix.
|
2023-01-14 22:56:24 +00:00
|
|
|
/// The Suffix can be a char, a &str, a &wstr, or a &WString.
|
2023-03-06 03:52:11 +00:00
|
|
|
fn ends_with<Suffix: IntoCharIter>(&self, suffix: Suffix) -> bool {
|
2023-01-14 22:56:24 +00:00
|
|
|
iter_prefixes_iter(
|
|
|
|
suffix.chars().rev(),
|
|
|
|
self.as_char_slice().iter().copied().rev(),
|
|
|
|
)
|
|
|
|
}
|
2024-01-17 06:41:47 +00:00
|
|
|
|
|
|
|
fn trim_matches(&self, pat: char) -> &wstr {
|
|
|
|
let slice = self.as_char_slice();
|
|
|
|
let leading_count = slice.chars().take_while(|&c| c == pat).count();
|
|
|
|
let trailing_count = slice.chars().rev().take_while(|&c| c == pat).count();
|
|
|
|
if leading_count == slice.len() {
|
|
|
|
return L!("");
|
|
|
|
}
|
|
|
|
let slice = self.slice_from(leading_count);
|
|
|
|
slice.slice_to(slice.len() - trailing_count)
|
|
|
|
}
|
2023-01-14 22:56:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl WExt for WString {
|
|
|
|
fn as_char_slice(&self) -> &[char] {
|
|
|
|
self.as_utfstr().as_char_slice()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl WExt for wstr {
|
|
|
|
fn as_char_slice(&self) -> &[char] {
|
|
|
|
wstr::as_char_slice(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2023-08-08 22:16:04 +00:00
|
|
|
use super::*;
|
|
|
|
use crate::wchar::L;
|
2023-01-14 22:56:24 +00:00
|
|
|
/// Write some tests.
|
2024-01-13 01:23:27 +00:00
|
|
|
#[test]
|
2023-01-14 22:56:24 +00:00
|
|
|
fn test_find_char() {
|
|
|
|
assert_eq!(Some(0), L!("abc").find_char('a'));
|
|
|
|
assert_eq!(Some(1), L!("abc").find_char('b'));
|
|
|
|
assert_eq!(None, L!("abc").find_char('X'));
|
|
|
|
assert_eq!(None, L!("").find_char('X'));
|
|
|
|
}
|
|
|
|
|
2024-01-13 01:23:27 +00:00
|
|
|
#[test]
|
2023-01-14 22:56:24 +00:00
|
|
|
fn test_prefix() {
|
|
|
|
assert!(L!("").starts_with(L!("")));
|
|
|
|
assert!(L!("abc").starts_with(L!("")));
|
|
|
|
assert!(L!("abc").starts_with('a'));
|
|
|
|
assert!(L!("abc").starts_with("ab"));
|
|
|
|
assert!(L!("abc").starts_with(L!("ab")));
|
|
|
|
assert!(L!("abc").starts_with(&WString::from_str("abc")));
|
|
|
|
}
|
|
|
|
|
2024-01-13 01:23:27 +00:00
|
|
|
#[test]
|
2023-01-14 22:56:24 +00:00
|
|
|
fn test_suffix() {
|
|
|
|
assert!(L!("").ends_with(L!("")));
|
|
|
|
assert!(L!("abc").ends_with(L!("")));
|
|
|
|
assert!(L!("abc").ends_with('c'));
|
|
|
|
assert!(L!("abc").ends_with("bc"));
|
|
|
|
assert!(L!("abc").ends_with(L!("bc")));
|
|
|
|
assert!(L!("abc").ends_with(&WString::from_str("abc")));
|
|
|
|
}
|
2023-04-09 20:41:04 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split() {
|
|
|
|
fn do_split(s: &wstr, c: char) -> Vec<&wstr> {
|
|
|
|
s.split(c).collect()
|
|
|
|
}
|
2023-04-24 02:33:10 +00:00
|
|
|
assert_eq!(do_split(L!(""), 'b'), &[""]);
|
2023-04-09 20:41:04 +00:00
|
|
|
assert_eq!(do_split(L!("abc"), 'b'), &["a", "c"]);
|
|
|
|
assert_eq!(do_split(L!("xxb"), 'x'), &["", "", "b"]);
|
|
|
|
assert_eq!(do_split(L!("bxxxb"), 'x'), &["b", "", "", "b"]);
|
2023-04-24 02:33:10 +00:00
|
|
|
assert_eq!(do_split(L!(""), 'x'), &[""]);
|
2023-04-09 20:41:04 +00:00
|
|
|
assert_eq!(do_split(L!("foo,bar,baz"), ','), &["foo", "bar", "baz"]);
|
|
|
|
assert_eq!(do_split(L!("foobar"), ','), &["foobar"]);
|
|
|
|
assert_eq!(do_split(L!("1,2,3,4,5"), ','), &["1", "2", "3", "4", "5"]);
|
2023-04-24 02:33:10 +00:00
|
|
|
assert_eq!(
|
|
|
|
do_split(L!("1,2,3,4,5,"), ','),
|
|
|
|
&["1", "2", "3", "4", "5", ""]
|
|
|
|
);
|
2023-04-09 20:41:04 +00:00
|
|
|
assert_eq!(
|
|
|
|
do_split(L!("Hello\nworld\nRust"), '\n'),
|
|
|
|
&["Hello", "world", "Rust"]
|
|
|
|
);
|
|
|
|
}
|
2023-05-16 18:17:34 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_prefix() {
|
|
|
|
let needle = L!("hello");
|
|
|
|
let haystack = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), Some(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_one() {
|
|
|
|
let needle = L!("ello");
|
|
|
|
let haystack = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), Some(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_suffix() {
|
|
|
|
let needle = L!("world");
|
|
|
|
let haystack = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), Some(6));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_none() {
|
|
|
|
let needle = L!("worldz");
|
|
|
|
let haystack = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), None);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_none_larger() {
|
|
|
|
// Notice that `haystack` and `needle` are reversed.
|
|
|
|
let haystack = L!("world");
|
|
|
|
let needle = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), None);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn find_none_case_mismatch() {
|
|
|
|
let haystack = L!("wOrld");
|
|
|
|
let needle = L!("hello world");
|
|
|
|
assert_eq!(haystack.find(needle), None);
|
|
|
|
}
|
2024-01-17 06:41:47 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_matches() {
|
|
|
|
assert_eq!(L!("|foo|").trim_matches('|'), L!("foo"));
|
|
|
|
assert_eq!(L!("<foo|").trim_matches('|'), L!("<foo"));
|
|
|
|
assert_eq!(L!("|foo>").trim_matches('|'), L!("foo>"));
|
|
|
|
}
|
2023-01-14 22:56:24 +00:00
|
|
|
}
|