Dataframe new commands (#3425)

* Folder for dataframe commands

* New commands for dataframe
This commit is contained in:
Fernando Herrera 2021-05-15 08:24:11 +01:00 committed by GitHub
parent be2f66397b
commit c0cc9ce7cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 57 deletions

View file

@ -187,7 +187,7 @@ pub(crate) mod touch;
pub(crate) use all::Command as All; pub(crate) use all::Command as All;
pub(crate) use any::Command as Any; pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")] #[cfg(feature = "dataframe")]
pub(crate) use dataframe::Dataframe; pub(crate) use dataframe::{Dataframe, DataframeList, DataframeLoad};
pub(crate) use enter::Enter; pub(crate) use enter::Enter;
pub(crate) use every::Every; pub(crate) use every::Every;
pub(crate) use exec::Exec; pub(crate) use exec::Exec;

View file

@ -0,0 +1,38 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, UntaggedValue};
pub struct Command;
impl WholeStreamCommand for Command {
fn name(&self) -> &str {
"dataframe"
}
fn usage(&self) -> &str {
"Creates a dataframe from pipelined Table or List "
}
fn signature(&self) -> Signature {
Signature::build("dataframe")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
}]
}
}

View file

@ -0,0 +1,53 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value};
pub struct Dataframe;
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe list"
}
fn usage(&self) -> &str {
"Lists stored dataframes"
}
fn signature(&self) -> Signature {
Signature::build("dataframe list")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let args = args.evaluate_once()?;
let mut dataframes: Vec<Value> = Vec::new();
for (name, value) in args.context.scope.get_vars() {
if let UntaggedValue::Dataframe(df) = value.value {
let mut data = TaggedDictBuilder::new(value.tag);
let polars_df = df.dataframe.unwrap();
let rows = polars_df.height();
let cols = polars_df.width();
data.insert_value("name", name);
data.insert_value("file", df.name);
data.insert_value("rows", format!("{}", rows));
data.insert_value("columns", format!("{}", cols));
dataframes.push(data.into_value());
}
}
Ok(OutputStream::from_stream(dataframes.into_iter()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Lists loaded dataframes in current scope",
example: "dataframe list",
result: None,
}]
}
}

View file

@ -3,82 +3,47 @@ use std::path::PathBuf;
use crate::prelude::*; use crate::prelude::*;
use nu_engine::WholeStreamCommand; use nu_engine::WholeStreamCommand;
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue}; use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged; use nu_source::Tagged;
use polars::prelude::{CsvReader, SerReader}; use polars::prelude::{CsvReader, SerReader};
pub struct Dataframe; pub struct Dataframe;
#[derive(Deserialize)]
pub struct OpenArgs {
file: Tagged<PathBuf>,
}
impl WholeStreamCommand for Dataframe { impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str { fn name(&self) -> &str {
"dataframe" "dataframe load"
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates a dataframe from a csv file" "Loads dataframe form csv or parquet file"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("dataframe").named( Signature::build("dataframe load").required(
"file", "file",
SyntaxShape::FilePath, SyntaxShape::FilePath,
"the file path to load values from", "the file path to load values from",
Some('f'),
) )
} }
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> { fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
load_dataframe(args) create_from_file(args)
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![ vec![Example {
Example { description: "Takes a file name and creates a dataframe",
description: "Takes a file name and creates a dataframe", example: "dataframe load test.csv",
example: "dataframe -f test.csv", result: None,
result: None, }]
},
Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
},
]
} }
} }
// Creates a dataframe from either a file or a table.
// If both options are found, then an error is returned to the user.
// The InputStream can have a table and a dictionary as input variable.
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The file has priority over stream input
if let Some(NamedValue::Value(_, _)) = args
.call_info()
.args
.named
.as_ref()
.map(|named| named.named.get("file"))
.flatten()
{
return create_from_file(args);
}
create_from_input(args)
}
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> { fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
// Command Tag. This marks where the command is located and the name
// of the command used
let tag = args.call_info.name_tag.clone(); let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
// Parsing the arguments that the function uses let file: Tagged<PathBuf> = args.req(0)?;
let (OpenArgs { file }, _) = args.process()?;
// Needs more detail and arguments while loading the dataframe // Needs more detail and arguments while loading the dataframe
// Options: // Options:
@ -128,12 +93,3 @@ fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(init.to_output_stream()) Ok(init.to_output_stream())
} }
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}

View file

@ -0,0 +1,7 @@
pub mod command;
pub mod list;
pub mod load;
pub use command::Command as Dataframe;
pub use list::Dataframe as DataframeList;
pub use load::Dataframe as DataframeLoad;

View file

@ -253,6 +253,10 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(TermSize), whole_stream_command(TermSize),
#[cfg(feature = "dataframe")] #[cfg(feature = "dataframe")]
whole_stream_command(Dataframe), whole_stream_command(Dataframe),
#[cfg(feature = "dataframe")]
whole_stream_command(DataframeLoad),
#[cfg(feature = "dataframe")]
whole_stream_command(DataframeList),
]); ]);
#[cfg(feature = "clipboard-cli")] #[cfg(feature = "clipboard-cli")]