Clean column names (#3678)

* Type in command description

* filter name change

* Clean column name

* Clippy error and updated polars version

* Lint correction in file
This commit is contained in:
Fernando Herrera 2021-06-25 08:09:41 +01:00 committed by GitHub
parent 93b5f3f421
commit 596062ccab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
48 changed files with 120 additions and 72 deletions

10
Cargo.lock generated
View file

@ -4666,7 +4666,7 @@ dependencies = [
[[package]]
name = "polars"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
dependencies = [
"polars-core",
"polars-io",
@ -4676,7 +4676,7 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
dependencies = [
"arrow",
"num 0.4.0",
@ -4686,7 +4686,7 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
dependencies = [
"ahash",
"anyhow",
@ -4712,7 +4712,7 @@ dependencies = [
[[package]]
name = "polars-io"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
dependencies = [
"ahash",
"anyhow",
@ -4735,7 +4735,7 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
dependencies = [
"ahash",
"itertools",

View file

@ -100,7 +100,7 @@ zip = { version="0.5.9", optional=true }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in"]

View file

@ -84,7 +84,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Performs an aggregation operation on a dataframe or groupby object"
"[DataFrame, GroupBy, Series] Performs an aggregation operation on a dataframe, groupby or series object"
}
fn signature(&self) -> Signature {
@ -101,6 +101,11 @@ impl WholeStreamCommand for DataFrame {
"quantile value for quantile operation",
Some('q'),
)
.switch(
"explicit",
"returns explicit names for groupby aggregations",
Some('e'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -159,7 +164,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
None => groupby,
};
let res = perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)?;
let res = perform_groupby_aggregation(
groupby,
op,
&operation.tag,
&agg_span,
args.has_flag("explicit"),
)?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
@ -197,8 +208,9 @@ fn perform_groupby_aggregation(
operation: Operation,
operation_tag: &Tag,
agg_span: &Span,
explicit: bool,
) -> Result<polars::prelude::DataFrame, ShellError> {
match operation {
let mut res = match operation {
Operation::Mean => groupby.mean(),
Operation::Sum => groupby.sum(),
Operation::Min => groupby.min(),
@ -219,7 +231,42 @@ fn perform_groupby_aggregation(
};
parse_polars_error::<&str>(&e, span, None)
})
})?;
if !explicit {
let col_names = res
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
for col in col_names {
let from = match operation {
Operation::Mean => "_mean",
Operation::Sum => "_sum",
Operation::Min => "_min",
Operation::Max => "_max",
Operation::First => "_first",
Operation::Last => "_last",
Operation::Nunique => "_n_unique",
Operation::Quantile(_) => "_quantile",
Operation::Median => "_median",
Operation::Var => "_agg_var",
Operation::Std => "_agg_std",
Operation::Count => "_count",
};
let new_col = match col.find(from) {
Some(index) => &col[..index],
None => &col[..],
};
res.rename(col.as_str(), new_col)
.expect("Column is always there. Looping with known names");
}
}
Ok(res)
}
fn perform_dataframe_aggregation(
@ -266,7 +313,7 @@ fn perform_series_aggregation(
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("mean", value);
data.insert_value(series.name(), value);
Ok(data.into_value())
}
@ -282,7 +329,7 @@ fn perform_series_aggregation(
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("median", value);
data.insert_value(series.name(), value);
Ok(data.into_value())
}
@ -319,7 +366,7 @@ fn perform_series_aggregation(
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("sum", value);
data.insert_value(series.name(), value);
Ok(data.into_value())
}
@ -356,7 +403,7 @@ fn perform_series_aggregation(
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("max", value);
data.insert_value(series.name(), value);
Ok(data.into_value())
}
@ -393,7 +440,7 @@ fn perform_series_aggregation(
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("min", value);
data.insert_value(series.name(), value);
Ok(data.into_value())
}

View file

@ -17,7 +17,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns the selected column as Series"
"[DataFrame] Returns the selected column as Series"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates a new dataframe by dropping the selected columns"
"[DataFrame] Creates a new dataframe by dropping the selected columns"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Drops duplicate values in dataframe"
"[DataFrame] Drops duplicate values in dataframe"
}
fn signature(&self) -> Signature {

View file

@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Drops null values in dataframe"
"[DataFrame, Series] Drops null values in dataframe"
}
fn signature(&self) -> Signature {
@ -37,7 +37,7 @@ impl WholeStreamCommand for DataFrame {
description: "drop null values in dataframe",
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df);
let res = ($df.b / $df.b);
let df = ($df | dataframe with-column $res as res);
let df = ($df | dataframe with-column $res --name res);
$df | dataframe drop-nulls
"#,
result: None,

View file

@ -11,7 +11,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Show dataframe data types"
"[DataFrame] Show dataframe data types"
}
fn signature(&self) -> Signature {

View file

@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates a new dataframe with dummy variables"
"[DataFrame] Creates a new dataframe with dummy variables"
}
fn signature(&self) -> Signature {

View file

@ -11,17 +11,19 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe filter"
"dataframe filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using a mask as reference"
"[DataFrame] Filters dataframe using a mask as reference"
}
fn signature(&self) -> Signature {
Signature::build("dataframe filter")
.required("with", SyntaxShape::String, "the word 'with'")
.required("mask", SyntaxShape::Any, "boolean mask used to filter data")
Signature::build("dataframe filter-with").required(
"mask",
SyntaxShape::Any,
"boolean mask used to filter data",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -33,13 +35,13 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([$true $false] | dataframe to-series);
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter with $mask"#,
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
result: None,
},
Example {
description: "Filter dataframe by creating a mask from operation",
example: r#"let mask = (([5 6] | dataframe to-series) > 5);
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter with $mask"#,
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
result: None,
},
]
@ -48,7 +50,7 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(1)?;
let value: Value = args.req(0)?;
let series_span = value.tag.span;
let series = match value.value {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates dataframe with the selected columns"
"[DataFrame] Creates dataframe with the selected columns"
}
fn signature(&self) -> Signature {

View file

@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates a groupby object that can be used for other aggregations"
"[DataFrame] Creates a groupby object that can be used for other aggregations"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates new dataframe with head rows"
"[DataFrame] Creates new dataframe with head rows"
}
fn signature(&self) -> Signature {

View file

@ -20,7 +20,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Joins a dataframe using columns as reference"
"[DataFrame] Joins a dataframe using columns as reference"
}
fn signature(&self) -> Signature {

View file

@ -208,7 +208,7 @@ fn from_csv(args: CommandArgs) -> Result<polars::prelude::DataFrame, ShellError>
};
match csv_reader.finish() {
Ok(csv_reader) => Ok(csv_reader),
Ok(df) => Ok(df),
Err(e) => Err(parse_polars_error::<&str>(&e, &file.tag.span, None)),
}
}

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Unpivot a DataFrame from wide to long format"
"[DataFrame] Unpivot a DataFrame from wide to long format"
}
fn signature(&self) -> Signature {

View file

@ -46,7 +46,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Performs a pivot operation on a groupby object"
"[GroupBy] Performs a pivot operation on a groupby object"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Create sample dataframe"
"[DataFrame] Create sample dataframe"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates a new dataframe with the selected columns"
"[DataFrame] Creates a new dataframe with the selected columns"
}
fn signature(&self) -> Signature {

View file

@ -11,7 +11,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns true if all values are false"
"[Series] Returns true if all values are false"
}
fn signature(&self) -> Signature {

View file

@ -11,7 +11,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns true if all values are true"
"[Series] Returns true if all values are true"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Return index for max value in series"
"[Series] Return index for max value in series"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Return index for min value in series"
"[Series] Return index for min value in series"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns indexes for a sorted series"
"[Series] Returns indexes for a sorted series"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns indexes where values are true"
"[Series] Returns indexes where values are true"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns indexes for unique values"
"[Series] Returns indexes for unique values"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates mask indicating duplicated values"
"[Series] Creates mask indicating duplicated values"
}
fn signature(&self) -> Signature {

View file

@ -15,7 +15,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Checks if elements from a series are contained in right series"
"[Series] Checks if elements from a series are contained in right series"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates mask where value is not null"
"[Series] Creates mask where value is not null"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates mask where value is null"
"[Series] Creates mask where value is null"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates mask indicating unique values"
"[Series] Creates mask indicating unique values"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Counts null values"
"[Series] Counts null values"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Counts unique value"
"[Series] Counts unique value"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Renames a series"
"[Series] Renames a series"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Sets value where given mask is true"
"[Series] Sets value where given mask is true"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Shifts the values by a given period"
"[Series] Shifts the values by a given period"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns unique values from a series"
"[Series] Returns unique values from a series"
}
fn signature(&self) -> Signature {

View file

@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Returns a dataframe with the counts for unique values in series"
"[Series] Returns a dataframe with the counts for unique values in series"
}
fn signature(&self) -> Signature {

View file

@ -13,7 +13,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Converts a section of the dataframe to a Table or List value"
"[DataFrame] Converts a section of the dataframe to a Table or List value"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates new dataframe from a slice of rows"
"[DataFrame] Creates new dataframe from a slice of rows"
}
fn signature(&self) -> Signature {

View file

@ -15,7 +15,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates new sorted dataframe or series"
"[DataFrame, Series] Creates new sorted dataframe or series"
}
fn signature(&self) -> Signature {

View file

@ -12,7 +12,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates new dataframe with tail rows"
"[DataFrame] Creates new dataframe with tail rows"
}
fn signature(&self) -> Signature {

View file

@ -22,7 +22,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Saves dataframe to csv file"
"[DataFrame] Saves dataframe to csv file"
}
fn signature(&self) -> Signature {

View file

@ -20,7 +20,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Saves dataframe to parquet file"
"[DataFrame] Saves dataframe to parquet file"
}
fn signature(&self) -> Signature {

View file

@ -26,7 +26,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Filter dataframe to match the condition"
"[DataFrame] Filter dataframe to match the condition"
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {

View file

@ -16,14 +16,13 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Adds a series to the dataframe"
"[DataFrame] Adds a series to the dataframe"
}
fn signature(&self) -> Signature {
Signature::build("dataframe with-column")
.required("series", SyntaxShape::Any, "series to be added")
.required("as", SyntaxShape::String, "the word 'as'")
.required("name", SyntaxShape::String, "column name")
.required_named("name", SyntaxShape::String, "column name", Some('n'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -34,7 +33,7 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Adds a series to the dataframe",
example:
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-series) as c",
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-series) --name c",
result: None,
}]
}
@ -43,7 +42,7 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let name: Tagged<String> = args.req(2)?;
let name: Tagged<String> = args.req_named("name")?;
let mut series = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),

View file

@ -39,7 +39,7 @@ nu-ansi-term = { version="0.33.1", path="../nu-ansi-term" }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
optional = true
features = ["strings", "checked_arithmetic"]

View file

@ -32,7 +32,7 @@ toml = "0.5.8"
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
optional = true
features = ["serde", "rows"]