mirror of
https://github.com/nushell/nushell
synced 2025-01-15 14:44:14 +00:00
append dataframes (#3839)
This commit is contained in:
parent
111477aa74
commit
d54d7cc431
7 changed files with 277 additions and 37 deletions
138
crates/nu-command/src/commands/dataframe/append.rs
Normal file
138
crates/nu-command/src/commands/dataframe/append.rs
Normal file
|
@ -0,0 +1,138 @@
|
|||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Axis, Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe append"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[DataFrame] Appends a new dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe append")
|
||||
.required_named(
|
||||
"other",
|
||||
SyntaxShape::Any,
|
||||
"dataframe to be appended",
|
||||
Some('o'),
|
||||
)
|
||||
.required_named(
|
||||
"axis",
|
||||
SyntaxShape::String,
|
||||
"row or col axis orientation",
|
||||
Some('a'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Appends a dataframe as new columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dataframe to-df);
|
||||
$a | dataframe append -o $a -a row"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
Column::new(
|
||||
"a_x".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b_x".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Appends a dataframe merging at the end of columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dataframe to-df);
|
||||
$a | dataframe append -o $a -a col"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let other: Value = args.req_named("other")?;
|
||||
let axis: Tagged<String> = args.req_named("axis")?;
|
||||
|
||||
let axis = Axis::try_from_str(axis.item.as_str(), &axis.tag.span)?;
|
||||
|
||||
let df_other = match other.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only append a dataframe to a dataframe",
|
||||
other.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let df_new = df.append_df(&df_other, axis, &tag.span)?;
|
||||
Ok(OutputStream::one(df_new.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
pub mod aggregate;
|
||||
pub mod append;
|
||||
pub mod column;
|
||||
pub mod command;
|
||||
pub mod drop;
|
||||
|
@ -31,6 +32,7 @@ pub mod where_;
|
|||
pub mod with_column;
|
||||
|
||||
pub use aggregate::DataFrame as DataFrameAggregate;
|
||||
pub use append::DataFrame as DataFrameAppend;
|
||||
pub use column::DataFrame as DataFrameColumn;
|
||||
pub use command::Command as DataFrame;
|
||||
pub use drop::DataFrame as DataFrameDrop;
|
||||
|
|
|
@ -25,9 +25,9 @@ pub use conversions::*;
|
|||
pub use core_commands::*;
|
||||
#[cfg(feature = "dataframe")]
|
||||
pub use dataframe::{
|
||||
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax,
|
||||
DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn,
|
||||
DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameAppend,
|
||||
DataFrameArgMax, DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique,
|
||||
DataFrameColumn, DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst,
|
||||
DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull,
|
||||
DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt,
|
||||
|
|
|
@ -328,6 +328,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
|||
whole_stream_command(DataFrameToUppercase),
|
||||
whole_stream_command(DataFrameStringSlice),
|
||||
whole_stream_command(DataFrameConcatenate),
|
||||
whole_stream_command(DataFrameAppend),
|
||||
]);
|
||||
|
||||
#[cfg(feature = "clipboard-cli")]
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
use bigdecimal::BigDecimal;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::dataframe::NuDataFrame;
|
||||
use nu_protocol::dataframe::{Axis, NuDataFrame};
|
||||
use nu_protocol::hir::Operator;
|
||||
use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value};
|
||||
use nu_source::Span;
|
||||
use num_traits::ToPrimitive;
|
||||
|
||||
use polars::prelude::{
|
||||
BooleanType, ChunkCompare, ChunkedArray, DataFrame, DataType, Float64Type, Int64Type,
|
||||
IntoSeries, NumOpsDispatchChecked, PolarsError, Series,
|
||||
BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries,
|
||||
NumOpsDispatchChecked, PolarsError, Series,
|
||||
};
|
||||
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
|
||||
|
||||
|
@ -83,37 +83,14 @@ pub fn between_dataframes(
|
|||
operation_span: &Span,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
match operator {
|
||||
Operator::Plus => {
|
||||
let mut columns: Vec<&str> = Vec::new();
|
||||
|
||||
let new = lhs
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.chain(rhs.as_ref().get_columns().iter())
|
||||
.map(|s| {
|
||||
let name = if columns.contains(&s.name()) {
|
||||
format!("{}_{}", s.name(), "x")
|
||||
} else {
|
||||
columns.push(s.name());
|
||||
s.name().to_string()
|
||||
};
|
||||
|
||||
let mut series = s.clone();
|
||||
series.rename(name.as_str());
|
||||
series
|
||||
})
|
||||
.collect::<Vec<Series>>();
|
||||
|
||||
match DataFrame::new(new) {
|
||||
Ok(df) => Ok(NuDataFrame::dataframe_to_untagged(df)),
|
||||
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Appending error",
|
||||
format!("{}", e),
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) {
|
||||
Ok(df) => Ok(df.into_untagged()),
|
||||
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Appending error",
|
||||
format!("{}", e),
|
||||
operation_span,
|
||||
))),
|
||||
},
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Incorrect datatype",
|
||||
"unable to use this datatype for this operation",
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
pub mod nu_dataframe;
|
||||
pub mod nu_groupby;
|
||||
pub mod operations;
|
||||
|
||||
pub use nu_dataframe::{Column, NuDataFrame};
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
pub use operations::Axis;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
|
|
120
crates/nu-protocol/src/dataframe/operations.rs
Normal file
120
crates/nu-protocol/src/dataframe/operations.rs
Normal file
|
@ -0,0 +1,120 @@
|
|||
use nu_errors::ShellError;
|
||||
use nu_source::Span;
|
||||
use polars::prelude::{DataFrame, Series};
|
||||
|
||||
use super::NuDataFrame;
|
||||
|
||||
pub enum Axis {
|
||||
Row,
|
||||
Column,
|
||||
}
|
||||
|
||||
impl Axis {
|
||||
pub fn try_from_str(axis: &str, span: &Span) -> Result<Axis, ShellError> {
|
||||
match axis {
|
||||
"row" => Ok(Axis::Row),
|
||||
"col" => Ok(Axis::Column),
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Wrong axis",
|
||||
"The selected axis does not exist",
|
||||
span,
|
||||
"The only axis options are 'row' or 'col'",
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn append_df(
|
||||
&self,
|
||||
other: &NuDataFrame,
|
||||
axis: Axis,
|
||||
span: &Span,
|
||||
) -> Result<Self, ShellError> {
|
||||
match axis {
|
||||
Axis::Row => {
|
||||
let mut columns: Vec<&str> = Vec::new();
|
||||
|
||||
let new_cols = self
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.chain(other.as_ref().get_columns().iter())
|
||||
.map(|s| {
|
||||
let name = if columns.contains(&s.name()) {
|
||||
format!("{}_{}", s.name(), "x")
|
||||
} else {
|
||||
columns.push(s.name());
|
||||
s.name().to_string()
|
||||
};
|
||||
|
||||
let mut series = s.clone();
|
||||
series.rename(name.as_str());
|
||||
series
|
||||
})
|
||||
.collect::<Vec<Series>>();
|
||||
|
||||
let df_new = DataFrame::new(new_cols).map_err(|e| {
|
||||
ShellError::labeled_error("Appending error", format!("{}", e), span)
|
||||
})?;
|
||||
|
||||
Ok(NuDataFrame::new(df_new))
|
||||
}
|
||||
Axis::Column => {
|
||||
if self.as_ref().width() != other.as_ref().width() {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Appending error",
|
||||
"Dataframes with different number of columns",
|
||||
span,
|
||||
));
|
||||
}
|
||||
|
||||
if !self
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.all(|col| other.as_ref().get_column_names().contains(col))
|
||||
{
|
||||
return Err(ShellError::labeled_error(
|
||||
"Appending error",
|
||||
"Dataframes with different columns names",
|
||||
span,
|
||||
));
|
||||
}
|
||||
|
||||
let new_cols = self
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let other_col = other
|
||||
.as_ref()
|
||||
.column(s.name())
|
||||
.expect("Already checked that dataframes have same columns");
|
||||
|
||||
let mut tmp = s.clone();
|
||||
let res = tmp.append(other_col);
|
||||
|
||||
match res {
|
||||
Ok(s) => Ok(s.clone()),
|
||||
Err(e) => Err({
|
||||
ShellError::labeled_error(
|
||||
"Appending error",
|
||||
format!("Unable to append dataframes: {}", e),
|
||||
span,
|
||||
)
|
||||
}),
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<Series>, ShellError>>()?;
|
||||
|
||||
let df_new = DataFrame::new(new_cols).map_err(|e| {
|
||||
ShellError::labeled_error("Appending error", format!("{}", e), span)
|
||||
})?;
|
||||
|
||||
Ok(NuDataFrame::new(df_new))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue