Provide the ability to split strings in columns via polars str-split (#14723)

# Description
Provides the ability to split string columns. This will change the
column type to list<str>.

```nushell
> ❯ : [[a]; ["one,two,three"]] | polars into-df | polars select (polars col a | polars str-split ",") | polars collect
╭───┬───────────────╮
│ # │       a       │
├───┼───────────────┤
│ 0 │ ╭───┬───────╮ │
│   │ │ 0 │ one   │ │
│   │ │ 1 │ two   │ │
│   │ │ 2 │ three │ │
│   │ ╰───┴───────╯ │
╰───┴───────────────╯

> ❯ : [[a]; ["one,two,three"]] | polars into-df | polars select (polars col a | polars str-split ",") | polars schema
╭───┬───────────╮
│ a │ list<str> │
╰───┴───────────╯
```



# User-Facing Changes
- Introduces new command `polars str-split`
This commit is contained in:
Jack Wright 2025-01-02 13:03:24 -08:00 committed by GitHub
parent 0d3f76ddef
commit df3892f323
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 94 additions and 0 deletions

View file

@ -5,6 +5,7 @@ mod replace_all;
mod str_join;
mod str_lengths;
mod str_slice;
mod str_split;
mod to_lowercase;
mod to_uppercase;
@ -27,6 +28,7 @@ pub(crate) fn string_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlug
Box::new(Contains),
Box::new(Replace),
Box::new(ReplaceAll),
Box::new(str_split::StrSplit),
Box::new(StrJoin),
Box::new(StrLengths),
Box::new(StrSlice),

View file

@ -0,0 +1,92 @@
use crate::{
values::{CustomValueSupport, NuDataFrame, NuExpression},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, Spanned, SyntaxShape, Type,
Value,
};
use polars::df;
#[derive(Clone)]
pub struct StrSplit;
impl PluginCommand for StrSplit {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars str-split"
}
fn description(&self) -> &str {
"Split the string by a substring. The resulting dtype is list<str>."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("expr", SyntaxShape::Any, "Separator expression")
.input_output_types(vec![(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Split the string by comma, then create a new row for each string",
example: r#"[[a]; ["one,two,three"]] | polars into-df
| polars select (polars col a | polars str-split "," | polars explode)
| polars collect"#,
result: Some(
NuDataFrame::from(
df!(
"a" => ["one", "two", "three"]
)
.expect("Should be able to create a dataframe"),
)
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let separator = call.req::<Spanned<Value>>(0).and_then(|sep| {
let sep_expr = NuExpression::try_from_value(plugin, &sep.item)?;
Ok(Spanned {
item: sep_expr,
span: sep.span,
})
})?;
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
let res: NuExpression = expr
.into_polars()
.str()
.split(separator.item.into_polars())
.into();
res.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use nu_protocol::ShellError;
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&StrSplit)
}
}