diff --git a/Cargo.toml b/Cargo.toml index cd6be5d9fa..16b8c85863 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,6 +119,10 @@ path = "src/plugins/inc.rs" name = "nu_plugin_sum" path = "src/plugins/sum.rs" +[[bin]] +name = "nu_plugin_average" +path = "src/plugins/average.rs" + [[bin]] name = "nu_plugin_embed" path = "src/plugins/embed.rs" diff --git a/docs/commands/average.md b/docs/commands/average.md new file mode 100644 index 0000000000..d4095e518f --- /dev/null +++ b/docs/commands/average.md @@ -0,0 +1,45 @@ +# average +This command allows you to calculate the average of values in a column. + +## Examples +To get the average of the file sizes in a directory, simply pipe the size column from the ls command to the average command. + +```shell +> ls | get size | average +━━━━━━━━━ + +━━━━━━━━━ +2282.727272727273 +━━━━━━━━━ +``` + +```shell +> pwd | split-row / | size | get chars | average +━━━━━━━━━ + +━━━━━━━━━ +5.250000000000000 +━━━━━━━━━ +``` + +Note that average only works for integer and byte values. If the shell doesn't recognize the values in a column as one of those types, it will return an error. +One way to solve this is to convert each row to an integer when possible and then pipe the result to `average` + +```shell +> open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | average +error: Unrecognized type in stream: Primitive(String("2509000000")) +- shell:1:0 +1 | open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | average + | ^^^^ source +``` + +```shell +> open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | str --to-int | average +━━━━━━━━━━━━━━━━━━━ + +─────────────────── + 3239404444.000000 +━━━━━━━━━━━━━━━━━━━ +``` + + diff --git a/docs/commands/sum.md b/docs/commands/sum.md index f5c59848dd..7482ca0c54 100644 --- a/docs/commands/sum.md +++ b/docs/commands/sum.md @@ -1,9 +1,7 @@ -# sum - -This command allows you to calculate the sum of values in a column. - -## Examples +# sum +This command allows you to calculate the sum of values in a column. +## Examples To get the sum of the file sizes in a directory, simply pipe the size column from the ls command to the sum command. ```shell @@ -15,21 +13,32 @@ To get the sum of the file sizes in a directory, simply pipe the size column fro ━━━━━━━━━ ``` -Note that sum only works for integer and byte values at the moment, and if the shell doesn't recognize the values in a column as one of those types, it will return an error. +To get the sum of the characters that make up your present working directory. +```shell +> pwd | split-row / | size | get chars | sum +━━━━━━━━━ + +━━━━━━━━━ +21 +━━━━━━━━━ +``` + +Note that sum only works for integer and byte values. If the shell doesn't recognize the values in a column as one of those types, it will return an error. +One way to solve this is to convert each row to an integer when possible and then pipe the result to `sum` ```shell -> open example.csv -━━━┯━━━━━━━━━┯━━━━━━━━┯━━━━━━━━━━ - # │ fruit │ amount │ quality -───┼─────────┼────────┼────────── - 0 │ apples │ 1 │ fresh - 1 │ bananas │ 2 │ old - 2 │ oranges │ 7 │ fresh - 3 │ kiwis │ 25 │ rotten -━━━┷━━━━━━━━━┷━━━━━━━━┷━━━━━━━━━━ +> open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | sum +error: Unrecognized type in stream: Primitive(String("2509000000")) +- shell:1:0 +1 | open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | sum + | ^^^^ source ``` ```shell -> open example.csv | get amount | sum -error: Unrecognized type in stream: Primitive(String("1")) +> open tests/fixtures/formats/caco3_plastics.csv | get tariff_item | str --to-int | sum +━━━━━━━━━━━━━ + +───────────── + 29154639996 +━━━━━━━━━━━━━ ``` diff --git a/src/plugins/average.rs b/src/plugins/average.rs new file mode 100644 index 0000000000..f78078450a --- /dev/null +++ b/src/plugins/average.rs @@ -0,0 +1,115 @@ +use nu::{ + serve_plugin, CallInfo, CoerceInto, Plugin, Primitive, ReturnSuccess, ReturnValue, ShellError, + Signature, Tagged, TaggedItem, Value, +}; + +#[derive(Debug)] +struct Average { + total: Option>, + count: u64, +} + +impl Average { + fn new() -> Average { + Average { + total: None, + count: 0, + } + } + + fn average(&mut self, value: Tagged) -> Result<(), ShellError> { + match value.item() { + Value::Primitive(Primitive::Nothing) => Ok(()), + Value::Primitive(Primitive::Int(i)) => match &self.total { + Some(Tagged { + item: Value::Primitive(Primitive::Int(j)), + tag, + }) => { + self.total = Some(Value::int(i + j).tagged(tag)); + self.count += 1; + Ok(()) + } + None => { + self.total = Some(value.clone()); + self.count += 1; + Ok(()) + } + _ => Err(ShellError::labeled_error( + "Could calculate average of non-integer or unrelated types", + "source", + value.tag, + )), + }, + Value::Primitive(Primitive::Bytes(b)) => match &self.total { + Some(Tagged { + item: Value::Primitive(Primitive::Bytes(j)), + tag, + }) => { + self.total = Some(Value::bytes(b + j).tagged(tag)); + self.count += 1; + Ok(()) + } + None => { + self.total = Some(value); + self.count += 1; + Ok(()) + } + _ => Err(ShellError::labeled_error( + "Could calculate average of non-integer or unrelated types", + "source", + value.tag, + )), + }, + x => Err(ShellError::labeled_error( + format!("Unrecognized type in stream: {:?}", x), + "source", + value.tag, + )), + } + } +} + +impl Plugin for Average { + fn config(&mut self) -> Result { + Ok(Signature::build("average") + .desc("Compute the average of a column of numerical values.") + .filter()) + } + + fn begin_filter(&mut self, _: CallInfo) -> Result, ShellError> { + Ok(vec![]) + } + + fn filter(&mut self, input: Tagged) -> Result, ShellError> { + self.average(input)?; + Ok(vec![]) + } + + fn end_filter(&mut self) -> Result, ShellError> { + match self.total { + None => Ok(vec![]), + Some(ref inner) => match inner.item() { + Value::Primitive(Primitive::Int(i)) => { + let total: u64 = i + .tagged(inner.tag.clone()) + .coerce_into("converting for average")?; + let avg = total as f64 / self.count as f64; + let primitive_value: Value = Primitive::from(avg).into(); + let tagged_value = primitive_value.tagged(inner.tag.clone()); + Ok(vec![ReturnSuccess::value(tagged_value)]) + } + Value::Primitive(Primitive::Bytes(bytes)) => { + let avg = *bytes as f64 / self.count as f64; + let primitive_value: Value = Primitive::from(avg).into(); + let tagged_value = primitive_value.tagged(inner.tag.clone()); + Ok(vec![ReturnSuccess::value(tagged_value)]) + } + _ => Ok(vec![]), + }, + } + } +} + +fn main() { + serve_plugin(&mut Average::new()); +} diff --git a/tests/filters_test.rs b/tests/filters_test.rs index f0d5dead61..1eb55448b7 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -579,6 +579,31 @@ fn can_sum() { assert_eq!(actual, "203") } +#[test] +fn can_average_numbers() { + let actual = nu!( + cwd: "tests/fixtures/formats", h::pipeline( + r#" + open sgml_description.json + | get glossary.GlossDiv.GlossList.GlossEntry.Sections + | average + | echo $it + "# + )); + + assert_eq!(actual, "101.5000000000000") +} + +#[test] +fn can_average_bytes() { + let actual = nu!( + cwd: "tests/fixtures/formats", + "ls | sort-by name | skip 1 | first 2 | get size | average | echo $it" + ); + + assert_eq!(actual, "1600.000000000000"); +} + #[test] fn can_filter_by_unit_size_comparison() { let actual = nu!(