Ensure that lazy frames converted via to-lazy are not converted back to eager frames later in the pipeline. (#12525)

# Description
@maxim-uvarov discovered the following error:
```
> [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a]
Error:   × Error using as series
   ╭─[entry #1:1:68]
 1 │ [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a]
   ·                                                                    ──────┬──────
   ·                                                                          ╰── dataframe has more than one column
   ╰────
 ```
 
During investigation, I discovered the root cause was that the lazy frame was incorrectly converted back to a eager dataframe. In order to keep this from happening, I explicitly set that the dataframe did not come from an eager frame. This causes the conversion logic to not attempt to convert the dataframe later in the pipeline.

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2024-04-15 16:29:42 -07:00 committed by GitHub
parent 078ba5aabe
commit 5f818eaefe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 36 additions and 7 deletions

View file

@ -52,10 +52,33 @@ impl PluginCommand for ToLazyFrame {
.transpose()?; .transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?; let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df); let mut lazy = NuLazyFrame::from_dataframe(df);
// We don't want this converted back to an eager dataframe at some point
lazy.from_eager = false;
Ok(PipelineData::Value( Ok(PipelineData::Value(
lazy.cache(plugin, engine, call.head)?.into_value(call.head), lazy.cache(plugin, engine, call.head)?.into_value(call.head),
None, None,
)) ))
} }
} }
#[cfg(test)]
mod tests {
use std::sync::Arc;
use nu_plugin_test_support::PluginTest;
use nu_protocol::{ShellError, Span};
use super::*;
#[test]
fn test_to_lazy() -> Result<(), ShellError> {
let plugin: Arc<PolarsPlugin> = PolarsPlugin::new_test_mode().into();
let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?;
let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?;
let value = pipeline_data.into_value(Span::test_data());
let df = NuLazyFrame::try_from_value(&plugin, &value)?;
assert!(!df.from_eager);
Ok(())
}
}

View file

@ -64,7 +64,7 @@ impl NuLazyFrame {
help: None, help: None,
inner: vec![], inner: vec![],
}) })
.map(|df| NuDataFrame::new(!self.from_eager, df)) .map(|df| NuDataFrame::new(false, df))
} }
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self

View file

@ -182,12 +182,18 @@ pub mod test {
use nu_plugin_test_support::PluginTest; use nu_plugin_test_support::PluginTest;
use nu_protocol::{ShellError, Span}; use nu_protocol::{ShellError, Span};
pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> { impl PolarsPlugin {
let plugin = PolarsPlugin { /// Creates a new polars plugin in test mode
disable_cache_drop: true, pub fn new_test_mode() -> Self {
..PolarsPlugin::default() PolarsPlugin {
}; disable_cache_drop: true,
..PolarsPlugin::default()
}
}
}
pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> {
let plugin = PolarsPlugin::new_test_mode();
let examples = command.examples(); let examples = command.examples();
// we need to cache values in the examples // we need to cache values in the examples