Added flag --coalesce-columns to allow columns to be coalsced on full joins (#14578)

- fixes #14572

# Description
This allowed columns to be coalesced on full joins with `polars join`,
providing functionality simlar to the old `--outer` join behavior.

# User-Facing Changes
- Provides a new flag `--coalesce-columns` on the `polars join` command
This commit is contained in:
Jack Wright 2024-12-17 09:55:42 -08:00 committed by GitHub
parent cc4da104e0
commit 981a000ee8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -8,7 +8,10 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value, Value,
}; };
use polars::prelude::{Expr, JoinType}; use polars::{
df,
prelude::{Expr, JoinCoalesce, JoinType},
};
#[derive(Clone)] #[derive(Clone)]
pub struct LazyJoin; pub struct LazyJoin;
@ -37,6 +40,7 @@ impl PluginCommand for LazyJoin {
.switch("left", "left join between lazyframes", Some('l')) .switch("left", "left join between lazyframes", Some('l'))
.switch("full", "full join between lazyframes", Some('f')) .switch("full", "full join between lazyframes", Some('f'))
.switch("cross", "cross join between lazyframes", Some('c')) .switch("cross", "cross join between lazyframes", Some('c'))
.switch("coalesce-columns", "Sets the join coalesce strategy to colesce columns. Most useful when used with --full, which will not otherwise coalesce.", None)
.named( .named(
"suffix", "suffix",
SyntaxShape::String, SyntaxShape::String,
@ -172,6 +176,24 @@ impl PluginCommand for LazyJoin {
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}, },
Example {
description: "Perform a full join of two dataframes and coalesce columns",
example: r#"let table1 = [[A B]; ["common" "common"] ["table1" "only"]] | polars into-df
let table2 = [[A C]; ["common" "common"] ["table2" "only"]] | polars into-df
$table1 | polars join -f $table2 --coalesce-columns A A"#,
result: Some(
NuDataFrame::new(
false,
df!(
"A" => [Some("common"), Some("table2"), Some("table1")],
"B" => [Some("common"), None, Some("only")],
"C" => [Some("common"), Some("only"), None]
)
.expect("Should have created a DataFrame"),
)
.into_value(Span::test_data()),
),
},
Example { Example {
description: "Join one eager dataframe with another using a cross join", description: "Join one eager dataframe with another using a cross join",
example: r#"let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df example: r#"let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df
@ -279,9 +301,17 @@ impl PluginCommand for LazyJoin {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let from_eager = lazy.from_eager; let from_eager = lazy.from_eager;
let lazy = lazy.to_polars(); let lazy = lazy.to_polars();
let coalesce = if call.has_flag("coalesce-columns")? {
JoinCoalesce::CoalesceColumns
} else {
JoinCoalesce::default()
};
let lazy = if cross { let lazy = if cross {
lazy.join_builder() lazy.join_builder()
.with(other) .with(other)
.coalesce(coalesce)
.left_on(vec![]) .left_on(vec![])
.right_on(vec![]) .right_on(vec![])
.how(how) .how(how)
@ -291,6 +321,7 @@ impl PluginCommand for LazyJoin {
} else { } else {
lazy.join_builder() lazy.join_builder()
.with(other) .with(other)
.coalesce(coalesce)
.left_on(left_on) .left_on(left_on)
.right_on(right_on) .right_on(right_on)
.how(how) .how(how)