Dataframe new commands (#3425)

* Folder for dataframe commands

* New commands for dataframe
This commit is contained in:
Fernando Herrera 2021-05-15 08:24:11 +01:00 committed by GitHub
parent be2f66397b
commit c0cc9ce7cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 57 deletions

View file

@ -187,7 +187,7 @@ pub(crate) mod touch;
pub(crate) use all::Command as All;
pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")]
pub(crate) use dataframe::Dataframe;
pub(crate) use dataframe::{Dataframe, DataframeList, DataframeLoad};
pub(crate) use enter::Enter;
pub(crate) use every::Every;
pub(crate) use exec::Exec;

View file

@ -0,0 +1,38 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, UntaggedValue};
pub struct Command;
impl WholeStreamCommand for Command {
fn name(&self) -> &str {
"dataframe"
}
fn usage(&self) -> &str {
"Creates a dataframe from pipelined Table or List "
}
fn signature(&self) -> Signature {
Signature::build("dataframe")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
}]
}
}

View file

@ -0,0 +1,53 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value};
pub struct Dataframe;
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe list"
}
fn usage(&self) -> &str {
"Lists stored dataframes"
}
fn signature(&self) -> Signature {
Signature::build("dataframe list")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let args = args.evaluate_once()?;
let mut dataframes: Vec<Value> = Vec::new();
for (name, value) in args.context.scope.get_vars() {
if let UntaggedValue::Dataframe(df) = value.value {
let mut data = TaggedDictBuilder::new(value.tag);
let polars_df = df.dataframe.unwrap();
let rows = polars_df.height();
let cols = polars_df.width();
data.insert_value("name", name);
data.insert_value("file", df.name);
data.insert_value("rows", format!("{}", rows));
data.insert_value("columns", format!("{}", cols));
dataframes.push(data.into_value());
}
}
Ok(OutputStream::from_stream(dataframes.into_iter()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Lists loaded dataframes in current scope",
example: "dataframe list",
result: None,
}]
}
}

View file

@ -3,82 +3,47 @@ use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged;
use polars::prelude::{CsvReader, SerReader};
pub struct Dataframe;
#[derive(Deserialize)]
pub struct OpenArgs {
file: Tagged<PathBuf>,
}
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe"
"dataframe load"
}
fn usage(&self) -> &str {
"Creates a dataframe from a csv file"
"Loads dataframe form csv or parquet file"
}
fn signature(&self) -> Signature {
Signature::build("dataframe").named(
Signature::build("dataframe load").required(
"file",
SyntaxShape::FilePath,
"the file path to load values from",
Some('f'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
load_dataframe(args)
create_from_file(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe -f test.csv",
result: None,
},
Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
},
]
vec![Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe load test.csv",
result: None,
}]
}
}
// Creates a dataframe from either a file or a table.
// If both options are found, then an error is returned to the user.
// The InputStream can have a table and a dictionary as input variable.
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The file has priority over stream input
if let Some(NamedValue::Value(_, _)) = args
.call_info()
.args
.named
.as_ref()
.map(|named| named.named.get("file"))
.flatten()
{
return create_from_file(args);
}
create_from_input(args)
}
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
// Command Tag. This marks where the command is located and the name
// of the command used
let tag = args.call_info.name_tag.clone();
// Parsing the arguments that the function uses
let (OpenArgs { file }, _) = args.process()?;
let args = args.evaluate_once()?;
let file: Tagged<PathBuf> = args.req(0)?;
// Needs more detail and arguments while loading the dataframe
// Options:
@ -128,12 +93,3 @@ fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(init.to_output_stream())
}
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}

View file

@ -0,0 +1,7 @@
pub mod command;
pub mod list;
pub mod load;
pub use command::Command as Dataframe;
pub use list::Dataframe as DataframeList;
pub use load::Dataframe as DataframeLoad;

View file

@ -253,6 +253,10 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(TermSize),
#[cfg(feature = "dataframe")]
whole_stream_command(Dataframe),
#[cfg(feature = "dataframe")]
whole_stream_command(DataframeLoad),
#[cfg(feature = "dataframe")]
whole_stream_command(DataframeList),
]);
#[cfg(feature = "clipboard-cli")]