polars first and polars last will now handle lazy frames natively (#13555)

# Description
Prior this pull request `polars first` and `polars last` would collect a
lazy frame into an eager frame before performing operations. Now `polars
first` will to a `LazyFrame::limit` and `polars last` will perform a
`LazyFrame::tail`. This is really useful in working with very large
datasets.
This commit is contained in:
Jack Wright 2024-08-07 04:36:52 -07:00 committed by GitHub
parent ff09c7964e
commit ec3e0e593d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 20 deletions

View file

@ -1,5 +1,5 @@
use crate::{
values::{Column, CustomValueSupport, NuLazyFrame},
values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject},
PolarsPlugin,
};
@ -98,20 +98,25 @@ impl PluginCommand for FirstDF {
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().first().into();
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
command_eager(plugin, engine, call, df).map_err(|e| e.into())
}
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
}
_ => {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().first().into();
expr.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
expr.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
}
}
}
}
fn command(
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
@ -126,6 +131,19 @@ fn command(
res.to_pipeline_data(plugin, engine, call.head)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<u32> = call.opt(0)?;
let rows = rows.unwrap_or(1);
let res: NuLazyFrame = lazy.to_polars().limit(rows).into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;

View file

@ -1,5 +1,5 @@
use crate::{
values::{Column, CustomValueSupport, NuLazyFrame},
values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject},
PolarsPlugin,
};
@ -73,20 +73,25 @@ impl PluginCommand for LastDF {
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().last().into();
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
command_eager(plugin, engine, call, df).map_err(|e| e.into())
}
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
}
_ => {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().last().into();
expr.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
expr.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
}
}
}
}
fn command(
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
@ -100,6 +105,20 @@ fn command(
res.to_pipeline_data(plugin, engine, call.head)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<u32> = call.opt(0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS as u32);
let res: NuLazyFrame = lazy.to_polars().tail(rows).into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;