mirror of
https://github.com/nushell/nushell
synced 2025-01-26 11:55:20 +00:00
Provide the ability to split strings in columns via polars str-split
(#14723)
# Description Provides the ability to split string columns. This will change the column type to list<str>. ```nushell > ❯ : [[a]; ["one,two,three"]] | polars into-df | polars select (polars col a | polars str-split ",") | polars collect ╭───┬───────────────╮ │ # │ a │ ├───┼───────────────┤ │ 0 │ ╭───┬───────╮ │ │ │ │ 0 │ one │ │ │ │ │ 1 │ two │ │ │ │ │ 2 │ three │ │ │ │ ╰───┴───────╯ │ ╰───┴───────────────╯ > ❯ : [[a]; ["one,two,three"]] | polars into-df | polars select (polars col a | polars str-split ",") | polars schema ╭───┬───────────╮ │ a │ list<str> │ ╰───┴───────────╯ ``` # User-Facing Changes - Introduces new command `polars str-split`
This commit is contained in:
parent
0d3f76ddef
commit
df3892f323
2 changed files with 94 additions and 0 deletions
|
@ -5,6 +5,7 @@ mod replace_all;
|
|||
mod str_join;
|
||||
mod str_lengths;
|
||||
mod str_slice;
|
||||
mod str_split;
|
||||
mod to_lowercase;
|
||||
mod to_uppercase;
|
||||
|
||||
|
@ -27,6 +28,7 @@ pub(crate) fn string_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlug
|
|||
Box::new(Contains),
|
||||
Box::new(Replace),
|
||||
Box::new(ReplaceAll),
|
||||
Box::new(str_split::StrSplit),
|
||||
Box::new(StrJoin),
|
||||
Box::new(StrLengths),
|
||||
Box::new(StrSlice),
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
use crate::{
|
||||
values::{CustomValueSupport, NuDataFrame, NuExpression},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, Spanned, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::df;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StrSplit;
|
||||
|
||||
impl PluginCommand for StrSplit {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars str-split"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Split the string by a substring. The resulting dtype is list<str>."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("expr", SyntaxShape::Any, "Separator expression")
|
||||
.input_output_types(vec![(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Split the string by comma, then create a new row for each string",
|
||||
example: r#"[[a]; ["one,two,three"]] | polars into-df
|
||||
| polars select (polars col a | polars str-split "," | polars explode)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => ["one", "two", "three"]
|
||||
)
|
||||
.expect("Should be able to create a dataframe"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let separator = call.req::<Spanned<Value>>(0).and_then(|sep| {
|
||||
let sep_expr = NuExpression::try_from_value(plugin, &sep.item)?;
|
||||
Ok(Spanned {
|
||||
item: sep_expr,
|
||||
span: sep.span,
|
||||
})
|
||||
})?;
|
||||
|
||||
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
|
||||
let res: NuExpression = expr
|
||||
.into_polars()
|
||||
.str()
|
||||
.split(separator.item.into_polars())
|
||||
.into();
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&StrSplit)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue