mirror of
https://github.com/nushell/nushell
synced 2025-01-13 05:38:57 +00:00
tweak polars join for better cross joins (#14586)
# Description closes #14585 This PR tries to make `polars join --cross` work better. Example taken from https://docs.pola.rs/user-guide/transformations/joins/#cartesian-product ### Before ```nushell ❯ let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df ❯ let players = [[name, cash]; [Alice, 78] [Bob, 135]] | polars into-df ❯ $players | polars into-lazy | polars select (polars col name) | polars join --cross $tokens | polars collect Error: nu::parser::missing_positional × Missing required positional argument. ╭─[entry #3:1:92] 1 │ $players | polars into-lazy | polars select (polars col name) | polars join --cross $tokens ╰──── help: Usage: polars join {flags} <other> <left_on> <right_on> . Use `--help` for more information. ``` ### After ```nushell ❯ let players = [[name, cash]; [Alice, 78] [Bob, 135]] | polars into-df ❯ let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df ❯ $players | polars into-lazy | polars select (polars col name) | polars join --cross $tokens | polars collect ╭─#─┬─name──┬─monopoly_token─╮ │ 0 │ Alice │ hat │ │ 1 │ Alice │ shoe │ │ 2 │ Alice │ boat │ │ 3 │ Bob │ hat │ │ 4 │ Bob │ shoe │ │ 5 │ Bob │ boat │ ╰─#─┴─name──┴─monopoly_token─╯ ``` Other examples ```nushell ❯ 1..3 | polars into-df | polars join --cross (4..6 | polars into-df) ╭─#─┬─0─┬─0_x─╮ │ 0 │ 1 │ 4 │ │ 1 │ 1 │ 5 │ │ 2 │ 1 │ 6 │ │ 3 │ 2 │ 4 │ │ 4 │ 2 │ 5 │ │ 5 │ 2 │ 6 │ │ 6 │ 3 │ 4 │ │ 7 │ 3 │ 5 │ │ 8 │ 3 │ 6 │ ╰─#─┴─0─┴─0_x─╯ ❯ 1..3 | each {|x| {x: $x}} | polars into-df | polars join --cross (4..6 | each {|y| {y: $y}} | polars into-df) x y ╭─#─┬─x─┬─y─╮ │ 0 │ 1 │ 4 │ │ 1 │ 1 │ 5 │ │ 2 │ 1 │ 6 │ │ 3 │ 2 │ 4 │ │ 4 │ 2 │ 5 │ │ 5 │ 2 │ 6 │ │ 6 │ 3 │ 4 │ │ 7 │ 3 │ 5 │ │ 8 │ 3 │ 6 │ ╰─#─┴─x─┴─y─╯ ``` /cc @ayax79 # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
219b44a04f
commit
baf86dfb0e
1 changed files with 76 additions and 20 deletions
|
@ -27,8 +27,8 @@ impl PluginCommand for LazyJoin {
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build(self.name())
|
Signature::build(self.name())
|
||||||
.required("other", SyntaxShape::Any, "LazyFrame to join with")
|
.required("other", SyntaxShape::Any, "LazyFrame to join with")
|
||||||
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
|
.optional("left_on", SyntaxShape::Any, "Left column(s) to join on")
|
||||||
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
|
.optional("right_on", SyntaxShape::Any, "Right column(s) to join on")
|
||||||
.switch(
|
.switch(
|
||||||
"inner",
|
"inner",
|
||||||
"inner joining between lazyframes (default)",
|
"inner joining between lazyframes (default)",
|
||||||
|
@ -54,8 +54,8 @@ impl PluginCommand for LazyJoin {
|
||||||
vec![
|
vec![
|
||||||
Example {
|
Example {
|
||||||
description: "Join two lazy dataframes",
|
description: "Join two lazy dataframes",
|
||||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
|
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy)
|
||||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy)
|
||||||
$df_a | polars join $df_b a foo | polars collect"#,
|
$df_a | polars join $df_b a foo | polars collect"#,
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_columns(
|
NuDataFrame::try_from_columns(
|
||||||
|
@ -114,8 +114,8 @@ impl PluginCommand for LazyJoin {
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Join one eager dataframe with a lazy dataframe",
|
description: "Join one eager dataframe with a lazy dataframe",
|
||||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
|
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df)
|
||||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy)
|
||||||
$df_a | polars join $df_b a foo"#,
|
$df_a | polars join $df_b a foo"#,
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_columns(
|
NuDataFrame::try_from_columns(
|
||||||
|
@ -172,6 +172,43 @@ impl PluginCommand for LazyJoin {
|
||||||
.into_value(Span::test_data()),
|
.into_value(Span::test_data()),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "Join one eager dataframe with another using a cross join",
|
||||||
|
example: r#"let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df
|
||||||
|
let players = [[name, cash]; [Alice, 78] [Bob, 135]] | polars into-df
|
||||||
|
$players | polars select (polars col name) | polars join --cross $tokens | polars collect"#,
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::try_from_columns(
|
||||||
|
vec![
|
||||||
|
Column::new(
|
||||||
|
"name".to_string(),
|
||||||
|
vec![
|
||||||
|
Value::test_string("Alice"),
|
||||||
|
Value::test_string("Alice"),
|
||||||
|
Value::test_string("Alice"),
|
||||||
|
Value::test_string("Bob"),
|
||||||
|
Value::test_string("Bob"),
|
||||||
|
Value::test_string("Bob"),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
Column::new(
|
||||||
|
"monopoly_token".to_string(),
|
||||||
|
vec![
|
||||||
|
Value::test_string("hat"),
|
||||||
|
Value::test_string("shoe"),
|
||||||
|
Value::test_string("boat"),
|
||||||
|
Value::test_string("hat"),
|
||||||
|
Value::test_string("shoe"),
|
||||||
|
Value::test_string("boat"),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.expect("simple df for test should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,11 +237,21 @@ impl PluginCommand for LazyJoin {
|
||||||
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
|
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
|
||||||
let other = other.to_polars();
|
let other = other.to_polars();
|
||||||
|
|
||||||
let left_on: Value = call.req(1)?;
|
let left_on_opt: Option<Value> = call.opt(1)?;
|
||||||
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
|
let left_on = match left_on_opt {
|
||||||
|
Some(left_on_value) if left || left_on_opt.is_some() => {
|
||||||
|
NuExpression::extract_exprs(plugin, left_on_value)?
|
||||||
|
}
|
||||||
|
_ => vec![],
|
||||||
|
};
|
||||||
|
|
||||||
let right_on: Value = call.req(2)?;
|
let right_on_opt: Option<Value> = call.opt(2)?;
|
||||||
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
|
let right_on = match right_on_opt {
|
||||||
|
Some(right_on_value) if full || right_on_opt.is_some() => {
|
||||||
|
NuExpression::extract_exprs(plugin, right_on_value)?
|
||||||
|
}
|
||||||
|
_ => vec![],
|
||||||
|
};
|
||||||
|
|
||||||
if left_on.len() != right_on.len() {
|
if left_on.len() != right_on.len() {
|
||||||
let right_on: Value = call.req(2)?;
|
let right_on: Value = call.req(2)?;
|
||||||
|
@ -232,16 +279,25 @@ impl PluginCommand for LazyJoin {
|
||||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
||||||
let from_eager = lazy.from_eager;
|
let from_eager = lazy.from_eager;
|
||||||
let lazy = lazy.to_polars();
|
let lazy = lazy.to_polars();
|
||||||
|
let lazy = if cross {
|
||||||
let lazy = lazy
|
lazy.join_builder()
|
||||||
.join_builder()
|
.with(other)
|
||||||
|
.left_on(vec![])
|
||||||
|
.right_on(vec![])
|
||||||
|
.how(how)
|
||||||
|
.force_parallel(true)
|
||||||
|
.suffix(suffix)
|
||||||
|
.finish()
|
||||||
|
} else {
|
||||||
|
lazy.join_builder()
|
||||||
.with(other)
|
.with(other)
|
||||||
.left_on(left_on)
|
.left_on(left_on)
|
||||||
.right_on(right_on)
|
.right_on(right_on)
|
||||||
.how(how)
|
.how(how)
|
||||||
.force_parallel(true)
|
.force_parallel(true)
|
||||||
.suffix(suffix)
|
.suffix(suffix)
|
||||||
.finish();
|
.finish()
|
||||||
|
};
|
||||||
|
|
||||||
let lazy = NuLazyFrame::new(from_eager, lazy);
|
let lazy = NuLazyFrame::new(from_eager, lazy);
|
||||||
lazy.to_pipeline_data(plugin, engine, call.head)
|
lazy.to_pipeline_data(plugin, engine, call.head)
|
||||||
|
|
Loading…
Reference in a new issue