From 981a000ee81b71692ac5d24ad7095a6460dabd94 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:55:42 -0800 Subject: [PATCH] Added flag --coalesce-columns to allow columns to be coalsced on full joins (#14578) - fixes #14572 # Description This allowed columns to be coalesced on full joins with `polars join`, providing functionality simlar to the old `--outer` join behavior. # User-Facing Changes - Provides a new flag `--coalesce-columns` on the `polars join` command --- .../src/dataframe/command/data/join.rs | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/join.rs b/crates/nu_plugin_polars/src/dataframe/command/data/join.rs index b1c13fef00..bd140b5c2a 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/join.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/join.rs @@ -8,7 +8,10 @@ use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::{Expr, JoinType}; +use polars::{ + df, + prelude::{Expr, JoinCoalesce, JoinType}, +}; #[derive(Clone)] pub struct LazyJoin; @@ -37,6 +40,7 @@ impl PluginCommand for LazyJoin { .switch("left", "left join between lazyframes", Some('l')) .switch("full", "full join between lazyframes", Some('f')) .switch("cross", "cross join between lazyframes", Some('c')) + .switch("coalesce-columns", "Sets the join coalesce strategy to colesce columns. Most useful when used with --full, which will not otherwise coalesce.", None) .named( "suffix", SyntaxShape::String, @@ -172,6 +176,24 @@ impl PluginCommand for LazyJoin { .into_value(Span::test_data()), ), }, + Example { + description: "Perform a full join of two dataframes and coalesce columns", + example: r#"let table1 = [[A B]; ["common" "common"] ["table1" "only"]] | polars into-df + let table2 = [[A C]; ["common" "common"] ["table2" "only"]] | polars into-df + $table1 | polars join -f $table2 --coalesce-columns A A"#, + result: Some( + NuDataFrame::new( + false, + df!( + "A" => [Some("common"), Some("table2"), Some("table1")], + "B" => [Some("common"), None, Some("only")], + "C" => [Some("common"), Some("only"), None] + ) + .expect("Should have created a DataFrame"), + ) + .into_value(Span::test_data()), + ), + }, Example { description: "Join one eager dataframe with another using a cross join", example: r#"let tokens = [[monopoly_token]; [hat] [shoe] [boat]] | polars into-df @@ -279,9 +301,17 @@ impl PluginCommand for LazyJoin { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let from_eager = lazy.from_eager; let lazy = lazy.to_polars(); + + let coalesce = if call.has_flag("coalesce-columns")? { + JoinCoalesce::CoalesceColumns + } else { + JoinCoalesce::default() + }; + let lazy = if cross { lazy.join_builder() .with(other) + .coalesce(coalesce) .left_on(vec![]) .right_on(vec![]) .how(how) @@ -291,6 +321,7 @@ impl PluginCommand for LazyJoin { } else { lazy.join_builder() .with(other) + .coalesce(coalesce) .left_on(left_on) .right_on(right_on) .how(how)