From ec3e0e593dd86a760374d4a69f005715214c8b20 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 7 Aug 2024 04:36:52 -0700 Subject: [PATCH] `polars first` and `polars last` will now handle lazy frames natively (#13555) # Description Prior this pull request `polars first` and `polars last` would collect a lazy frame into an eager frame before performing operations. Now `polars first` will to a `LazyFrame::limit` and `polars last` will perform a `LazyFrame::tail`. This is really useful in working with very large datasets. --- .../src/dataframe/eager/first.rs | 38 +++++++++++++----- .../src/dataframe/eager/last.rs | 39 ++++++++++++++----- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/eager/first.rs b/crates/nu_plugin_polars/src/dataframe/eager/first.rs index a35ea7dd16..b874c257cb 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/first.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/first.rs @@ -1,5 +1,5 @@ use crate::{ - values::{Column, CustomValueSupport, NuLazyFrame}, + values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject}, PolarsPlugin, }; @@ -98,20 +98,25 @@ impl PluginCommand for FirstDF { input: PipelineData, ) -> Result { let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; - command(plugin, engine, call, df).map_err(|e| e.into()) - } else { - let expr = NuExpression::try_from_value(plugin, &value)?; - let expr: NuExpression = expr.into_polars().first().into(); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => { + command_eager(plugin, engine, call, df).map_err(|e| e.into()) + } + PolarsPluginObject::NuLazyFrame(lazy) => { + command_lazy(plugin, engine, call, lazy).map_err(|e| e.into()) + } + _ => { + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = expr.into_polars().first().into(); - expr.to_pipeline_data(plugin, engine, call.head) - .map_err(LabeledError::from) + expr.to_pipeline_data(plugin, engine, call.head) + .map_err(LabeledError::from) + } } } } -fn command( +fn command_eager( plugin: &PolarsPlugin, engine: &EngineInterface, call: &EvaluatedCall, @@ -126,6 +131,19 @@ fn command( res.to_pipeline_data(plugin, engine, call.head) } +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let rows: Option = call.opt(0)?; + let rows = rows.unwrap_or(1); + + let res: NuLazyFrame = lazy.to_polars().limit(rows).into(); + res.to_pipeline_data(plugin, engine, call.head) +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/nu_plugin_polars/src/dataframe/eager/last.rs b/crates/nu_plugin_polars/src/dataframe/eager/last.rs index 23b44eb473..316d346085 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/last.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/last.rs @@ -1,5 +1,5 @@ use crate::{ - values::{Column, CustomValueSupport, NuLazyFrame}, + values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject}, PolarsPlugin, }; @@ -73,20 +73,25 @@ impl PluginCommand for LastDF { input: PipelineData, ) -> Result { let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; - command(plugin, engine, call, df).map_err(|e| e.into()) - } else { - let expr = NuExpression::try_from_value(plugin, &value)?; - let expr: NuExpression = expr.into_polars().last().into(); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => { + command_eager(plugin, engine, call, df).map_err(|e| e.into()) + } + PolarsPluginObject::NuLazyFrame(lazy) => { + command_lazy(plugin, engine, call, lazy).map_err(|e| e.into()) + } + _ => { + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = expr.into_polars().last().into(); - expr.to_pipeline_data(plugin, engine, call.head) - .map_err(LabeledError::from) + expr.to_pipeline_data(plugin, engine, call.head) + .map_err(LabeledError::from) + } } } } -fn command( +fn command_eager( plugin: &PolarsPlugin, engine: &EngineInterface, call: &EvaluatedCall, @@ -100,6 +105,20 @@ fn command( res.to_pipeline_data(plugin, engine, call.head) } +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let rows: Option = call.opt(0)?; + let rows = rows.unwrap_or(DEFAULT_ROWS as u32); + + let res: NuLazyFrame = lazy.to_polars().tail(rows).into(); + + res.to_pipeline_data(plugin, engine, call.head) +} + #[cfg(test)] mod test { use crate::test::test_polars_plugin_command;