Fix output signature of split chars/words (#9739)

# Description
Those two commands did *not* vectorize over the input in the pure sense
as they performed a flat map. Now they return a list for each string
that gets split by them.

```
["foo" "bar"] | split chars
```

## Before 

```
╭───┬───╮
│ 0 │ f │
│ 1 │ o │
│ 2 │ o │
│ 3 │ b │
│ 4 │ a │
│ 5 │ r │
╰───┴───╯
```

## After
```
╭───┬───────────╮
│ 0 │ ╭───┬───╮ │
│   │ │ 0 │ f │ │
│   │ │ 1 │ o │ │
│   │ │ 2 │ o │ │
│   │ ╰───┴───╯ │
│ 1 │ ╭───┬───╮ │
│   │ │ 0 │ b │ │
│   │ │ 1 │ a │ │
│   │ │ 2 │ r │ │
│   │ ╰───┴───╯ │
╰───┴───────────╯
```
This commit is contained in:
Stefan Holderbach 2023-07-24 00:06:41 +02:00 committed by GitHub
parent 17f8ad7210
commit 2aeb77bd3e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 35 deletions

View file

@ -16,14 +16,20 @@ impl Command for SubCommand {
fn signature(&self) -> Signature {
Signature::build("split chars")
.input_output_types(vec![(Type::String, Type::List(Box::new(Type::String)))])
.input_output_types(vec![
(Type::String, Type::List(Box::new(Type::String))),
(
Type::List(Box::new(Type::String)),
Type::List(Box::new(Type::List(Box::new(Type::String)))),
),
])
.allow_variants_without_examples(true)
.switch("grapheme-clusters", "split on grapheme clusters", Some('g'))
.switch(
"code-points",
"split on code points (default; splits combined characters)",
Some('c'),
)
.vectorizes_over_list(true)
.category(Category::Strings)
}
@ -63,6 +69,26 @@ impl Command for SubCommand {
span: Span::test_data(),
}),
},
Example {
description: "Split multiple strings into lists of characters",
example: "['hello', 'world'] | split chars",
result: Some(Value::test_list(vec![
Value::test_list(vec![
Value::test_string("h"),
Value::test_string("e"),
Value::test_string("l"),
Value::test_string("l"),
Value::test_string("o"),
]),
Value::test_list(vec![
Value::test_string("w"),
Value::test_string("o"),
Value::test_string("r"),
Value::test_string("l"),
Value::test_string("d"),
]),
])),
},
]
}
@ -85,42 +111,45 @@ fn split_chars(
let span = call.head;
let graphemes = grapheme_flags(call)?;
input.flat_map(
input.map(
move |x| split_chars_helper(&x, span, graphemes),
engine_state.ctrlc.clone(),
)
}
fn split_chars_helper(v: &Value, name: Span, graphemes: bool) -> Vec<Value> {
fn split_chars_helper(v: &Value, name: Span, graphemes: bool) -> Value {
match v.span() {
Ok(v_span) => {
if let Ok(s) = v.as_string() {
if graphemes {
s.graphemes(true)
.collect::<Vec<_>>()
.into_iter()
.map(move |x| Value::string(x, v_span))
.collect()
} else {
s.chars()
.collect::<Vec<_>>()
.into_iter()
.map(move |x| Value::string(x, v_span))
.collect()
Value::List {
vals: if graphemes {
s.graphemes(true)
.collect::<Vec<_>>()
.into_iter()
.map(move |x| Value::string(x, v_span))
.collect()
} else {
s.chars()
.collect::<Vec<_>>()
.into_iter()
.map(move |x| Value::string(x, v_span))
.collect()
},
span: v_span,
}
} else {
vec![Value::Error {
Value::Error {
error: Box::new(ShellError::PipelineMismatch {
exp_input_type: "string".into(),
dst_span: name,
src_span: v_span,
}),
}]
}
}
}
Err(error) => vec![Value::Error {
Err(error) => Value::Error {
error: Box::new(error),
}],
},
}
}

View file

@ -18,8 +18,14 @@ impl Command for SubCommand {
fn signature(&self) -> Signature {
Signature::build("split words")
.input_output_types(vec![(Type::String, Type::List(Box::new(Type::String)))])
.vectorizes_over_list(true)
.input_output_types(vec![
(Type::String, Type::List(Box::new(Type::String))),
(
Type::List(Box::new(Type::String)),
Type::List(Box::new(Type::List(Box::new(Type::String))))
),
])
.allow_variants_without_examples(true)
.category(Category::Strings)
// .switch(
// "ignore-hyphenated",
@ -133,18 +139,13 @@ fn split_words(
}
let graphemes = grapheme_flags(call)?;
input.flat_map(
input.map(
move |x| split_words_helper(&x, word_length, span, graphemes),
engine_state.ctrlc.clone(),
)
}
fn split_words_helper(
v: &Value,
word_length: Option<usize>,
span: Span,
graphemes: bool,
) -> Vec<Value> {
fn split_words_helper(v: &Value, word_length: Option<usize>, span: Span, graphemes: bool) -> Value {
// There are some options here with this regex.
// [^A-Za-z\'] = do not match uppercase or lowercase letters or apostrophes
// [^[:alpha:]\'] = do not match any uppercase or lowercase letters or apostrophes
@ -160,7 +161,7 @@ fn split_words_helper(
// let words: Vec<&str> = s.split_whitespace().collect();
let replaced_string = regex_replace.replace_all(&s, " ").to_string();
replaced_string
let words = replaced_string
.split(' ')
.filter_map(|s| {
if s.trim() != "" {
@ -182,20 +183,24 @@ fn split_words_helper(
None
}
})
.collect::<Vec<Value>>()
.collect::<Vec<Value>>();
Value::List {
vals: words,
span: v_span,
}
} else {
vec![Value::Error {
Value::Error {
error: Box::new(ShellError::PipelineMismatch {
exp_input_type: "string".into(),
dst_span: span,
src_span: v_span,
}),
}]
}
}
}
Err(error) => vec![Value::Error {
Err(error) => Value::Error {
error: Box::new(error),
}],
},
}
}