Fix output signature of split chars/words (#9739)

# Description
Those two commands did *not* vectorize over the input in the pure sense
as they performed a flat map. Now they return a list for each string
that gets split by them.

```
["foo" "bar"] | split chars
```

## Before 

```
╭───┬───╮
│ 0 │ f │
│ 1 │ o │
│ 2 │ o │
│ 3 │ b │
│ 4 │ a │
│ 5 │ r │
╰───┴───╯
```

## After
```
╭───┬───────────╮
│ 0 │ ╭───┬───╮ │
│   │ │ 0 │ f │ │
│   │ │ 1 │ o │ │
│   │ │ 2 │ o │ │
│   │ ╰───┴───╯ │
│ 1 │ ╭───┬───╮ │
│   │ │ 0 │ b │ │
│   │ │ 1 │ a │ │
│   │ │ 2 │ r │ │
│   │ ╰───┴───╯ │
╰───┴───────────╯
```
This commit is contained in:
Stefan Holderbach 2023-07-24 00:06:41 +02:00 committed by GitHub
parent 17f8ad7210
commit 2aeb77bd3e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 35 deletions

View file

@ -16,14 +16,20 @@ impl Command for SubCommand {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("split chars") Signature::build("split chars")
.input_output_types(vec![(Type::String, Type::List(Box::new(Type::String)))]) .input_output_types(vec![
(Type::String, Type::List(Box::new(Type::String))),
(
Type::List(Box::new(Type::String)),
Type::List(Box::new(Type::List(Box::new(Type::String)))),
),
])
.allow_variants_without_examples(true)
.switch("grapheme-clusters", "split on grapheme clusters", Some('g')) .switch("grapheme-clusters", "split on grapheme clusters", Some('g'))
.switch( .switch(
"code-points", "code-points",
"split on code points (default; splits combined characters)", "split on code points (default; splits combined characters)",
Some('c'), Some('c'),
) )
.vectorizes_over_list(true)
.category(Category::Strings) .category(Category::Strings)
} }
@ -63,6 +69,26 @@ impl Command for SubCommand {
span: Span::test_data(), span: Span::test_data(),
}), }),
}, },
Example {
description: "Split multiple strings into lists of characters",
example: "['hello', 'world'] | split chars",
result: Some(Value::test_list(vec![
Value::test_list(vec![
Value::test_string("h"),
Value::test_string("e"),
Value::test_string("l"),
Value::test_string("l"),
Value::test_string("o"),
]),
Value::test_list(vec![
Value::test_string("w"),
Value::test_string("o"),
Value::test_string("r"),
Value::test_string("l"),
Value::test_string("d"),
]),
])),
},
] ]
} }
@ -85,42 +111,45 @@ fn split_chars(
let span = call.head; let span = call.head;
let graphemes = grapheme_flags(call)?; let graphemes = grapheme_flags(call)?;
input.flat_map( input.map(
move |x| split_chars_helper(&x, span, graphemes), move |x| split_chars_helper(&x, span, graphemes),
engine_state.ctrlc.clone(), engine_state.ctrlc.clone(),
) )
} }
fn split_chars_helper(v: &Value, name: Span, graphemes: bool) -> Vec<Value> { fn split_chars_helper(v: &Value, name: Span, graphemes: bool) -> Value {
match v.span() { match v.span() {
Ok(v_span) => { Ok(v_span) => {
if let Ok(s) = v.as_string() { if let Ok(s) = v.as_string() {
if graphemes { Value::List {
s.graphemes(true) vals: if graphemes {
.collect::<Vec<_>>() s.graphemes(true)
.into_iter() .collect::<Vec<_>>()
.map(move |x| Value::string(x, v_span)) .into_iter()
.collect() .map(move |x| Value::string(x, v_span))
} else { .collect()
s.chars() } else {
.collect::<Vec<_>>() s.chars()
.into_iter() .collect::<Vec<_>>()
.map(move |x| Value::string(x, v_span)) .into_iter()
.collect() .map(move |x| Value::string(x, v_span))
.collect()
},
span: v_span,
} }
} else { } else {
vec![Value::Error { Value::Error {
error: Box::new(ShellError::PipelineMismatch { error: Box::new(ShellError::PipelineMismatch {
exp_input_type: "string".into(), exp_input_type: "string".into(),
dst_span: name, dst_span: name,
src_span: v_span, src_span: v_span,
}), }),
}] }
} }
} }
Err(error) => vec![Value::Error { Err(error) => Value::Error {
error: Box::new(error), error: Box::new(error),
}], },
} }
} }

View file

@ -18,8 +18,14 @@ impl Command for SubCommand {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("split words") Signature::build("split words")
.input_output_types(vec![(Type::String, Type::List(Box::new(Type::String)))]) .input_output_types(vec![
.vectorizes_over_list(true) (Type::String, Type::List(Box::new(Type::String))),
(
Type::List(Box::new(Type::String)),
Type::List(Box::new(Type::List(Box::new(Type::String))))
),
])
.allow_variants_without_examples(true)
.category(Category::Strings) .category(Category::Strings)
// .switch( // .switch(
// "ignore-hyphenated", // "ignore-hyphenated",
@ -133,18 +139,13 @@ fn split_words(
} }
let graphemes = grapheme_flags(call)?; let graphemes = grapheme_flags(call)?;
input.flat_map( input.map(
move |x| split_words_helper(&x, word_length, span, graphemes), move |x| split_words_helper(&x, word_length, span, graphemes),
engine_state.ctrlc.clone(), engine_state.ctrlc.clone(),
) )
} }
fn split_words_helper( fn split_words_helper(v: &Value, word_length: Option<usize>, span: Span, graphemes: bool) -> Value {
v: &Value,
word_length: Option<usize>,
span: Span,
graphemes: bool,
) -> Vec<Value> {
// There are some options here with this regex. // There are some options here with this regex.
// [^A-Za-z\'] = do not match uppercase or lowercase letters or apostrophes // [^A-Za-z\'] = do not match uppercase or lowercase letters or apostrophes
// [^[:alpha:]\'] = do not match any uppercase or lowercase letters or apostrophes // [^[:alpha:]\'] = do not match any uppercase or lowercase letters or apostrophes
@ -160,7 +161,7 @@ fn split_words_helper(
// let words: Vec<&str> = s.split_whitespace().collect(); // let words: Vec<&str> = s.split_whitespace().collect();
let replaced_string = regex_replace.replace_all(&s, " ").to_string(); let replaced_string = regex_replace.replace_all(&s, " ").to_string();
replaced_string let words = replaced_string
.split(' ') .split(' ')
.filter_map(|s| { .filter_map(|s| {
if s.trim() != "" { if s.trim() != "" {
@ -182,20 +183,24 @@ fn split_words_helper(
None None
} }
}) })
.collect::<Vec<Value>>() .collect::<Vec<Value>>();
Value::List {
vals: words,
span: v_span,
}
} else { } else {
vec![Value::Error { Value::Error {
error: Box::new(ShellError::PipelineMismatch { error: Box::new(ShellError::PipelineMismatch {
exp_input_type: "string".into(), exp_input_type: "string".into(),
dst_span: span, dst_span: span,
src_span: v_span, src_span: v_span,
}), }),
}] }
} }
} }
Err(error) => vec![Value::Error { Err(error) => Value::Error {
error: Box::new(error), error: Box::new(error),
}], },
} }
} }