Tweak the behavior of wstr::split to better match C++

Prior to this change, wstr::split had two weird behaviors:

1. Splitting an empty string would yield nothing, rather than an empty
   string.
2. Splitting a string with the separator character as last character
   would not yield an empty string.

For example L!("x:y:").split(':') would return ["x", "y"] instead of
what it does in C++, which is ["x", "y", ""].

Fix these.
This commit is contained in:
ridiculousfish 2023-04-23 19:33:10 -07:00
parent de8288634a
commit fa39113bc6

View file

@ -161,23 +161,21 @@ where
/// Iterator type for splitting a wide string on a char. /// Iterator type for splitting a wide string on a char.
pub struct WStrCharSplitIter<'a> { pub struct WStrCharSplitIter<'a> {
split: char, split: char,
chars: &'a [char], chars: Option<&'a [char]>,
} }
impl<'a> Iterator for WStrCharSplitIter<'a> { impl<'a> Iterator for WStrCharSplitIter<'a> {
type Item = &'a wstr; type Item = &'a wstr;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.chars.is_empty() { let chars = self.chars?;
return None; if let Some(idx) = chars.iter().position(|c| *c == self.split) {
} else if let Some(idx) = self.chars.iter().position(|c| *c == self.split) { let (prefix, rest) = chars.split_at(idx);
let (prefix, rest) = self.chars.split_at(idx); self.chars = Some(&rest[1..]);
self.chars = &rest[1..];
return Some(wstr::from_char_slice(prefix)); return Some(wstr::from_char_slice(prefix));
} else { } else {
let res = self.chars; self.chars = None;
self.chars = &[]; return Some(wstr::from_char_slice(chars));
return Some(wstr::from_char_slice(res));
} }
} }
} }
@ -194,7 +192,13 @@ pub trait WExt {
wstr::from_char_slice(&chars[start..]) wstr::from_char_slice(&chars[start..])
} }
/// \return the char at an index. /// Return the number of chars.
/// This is different from Rust string len, which returns the number of bytes.
fn char_count(&self) -> usize {
self.as_char_slice().len()
}
/// Return the char at an index.
/// If the index is equal to the length, return '\0'. /// If the index is equal to the length, return '\0'.
/// If the index exceeds the length, then panic. /// If the index exceeds the length, then panic.
fn char_at(&self, index: usize) -> char { fn char_at(&self, index: usize) -> char {
@ -208,12 +212,10 @@ pub trait WExt {
/// \return an iterator over substrings, split by a given char. /// \return an iterator over substrings, split by a given char.
/// The split char is not included in the substrings. /// The split char is not included in the substrings.
/// If the string is empty, the iterator will return no strings.
/// Note this differs from std::slice::split, which return a single empty item.
fn split(&self, c: char) -> WStrCharSplitIter { fn split(&self, c: char) -> WStrCharSplitIter {
WStrCharSplitIter { WStrCharSplitIter {
split: c, split: c,
chars: self.as_char_slice(), chars: Some(self.as_char_slice()),
} }
} }
@ -292,13 +294,18 @@ mod tests {
fn do_split(s: &wstr, c: char) -> Vec<&wstr> { fn do_split(s: &wstr, c: char) -> Vec<&wstr> {
s.split(c).collect() s.split(c).collect()
} }
assert_eq!(do_split(L!(""), 'b'), &[""]);
assert_eq!(do_split(L!("abc"), 'b'), &["a", "c"]); assert_eq!(do_split(L!("abc"), 'b'), &["a", "c"]);
assert_eq!(do_split(L!("xxb"), 'x'), &["", "", "b"]); assert_eq!(do_split(L!("xxb"), 'x'), &["", "", "b"]);
assert_eq!(do_split(L!("bxxxb"), 'x'), &["b", "", "", "b"]); assert_eq!(do_split(L!("bxxxb"), 'x'), &["b", "", "", "b"]);
assert_eq!(do_split(L!(""), 'x'), &[] as &[&str]); assert_eq!(do_split(L!(""), 'x'), &[""]);
assert_eq!(do_split(L!("foo,bar,baz"), ','), &["foo", "bar", "baz"]); assert_eq!(do_split(L!("foo,bar,baz"), ','), &["foo", "bar", "baz"]);
assert_eq!(do_split(L!("foobar"), ','), &["foobar"]); assert_eq!(do_split(L!("foobar"), ','), &["foobar"]);
assert_eq!(do_split(L!("1,2,3,4,5"), ','), &["1", "2", "3", "4", "5"]); assert_eq!(do_split(L!("1,2,3,4,5"), ','), &["1", "2", "3", "4", "5"]);
assert_eq!(
do_split(L!("1,2,3,4,5,"), ','),
&["1", "2", "3", "4", "5", ""]
);
assert_eq!( assert_eq!(
do_split(L!("Hello\nworld\nRust"), '\n'), do_split(L!("Hello\nworld\nRust"), '\n'),
&["Hello", "world", "Rust"] &["Hello", "world", "Rust"]