Added a count column on the histogram command (#1853)

* Adding iniitial draft for the addition of the count column on the histogram command

* Update histogram documentation

* Add count column test to histogram command

* Fix error in histogram documentation
This commit is contained in:
Joseph T. Lyons 2020-05-20 02:02:36 -04:00 committed by GitHub
parent b22db39775
commit 3239e5055c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 44 deletions

View file

@ -107,6 +107,32 @@ pub fn histogram(
let column = (*column_name).clone();
let count_column_name = "count".to_string();
let count_shell_error = ShellError::labeled_error("Unable to load group count", "unabled to load group count", &name);
let mut count_values: Vec<u64> = Vec::new();
for table_entry in reduced.table_entries() {
match table_entry {
Value {
value: UntaggedValue::Table(list),
..
} => {
for i in list {
if let Ok(count) = i.value.clone().into_value(&name).as_u64() {
count_values.push(count);
} else {
yield Err(count_shell_error);
return;
}
}
}
_ => {
yield Err(count_shell_error);
return;
}
}
}
if let Value { value: UntaggedValue::Table(start), .. } = datasets.get(0).ok_or_else(|| ShellError::labeled_error("Unable to load dataset", "unabled to load dataset", &name))? {
for percentage in start.iter() {
@ -114,6 +140,8 @@ pub fn histogram(
let value: Tagged<String> = group_labels.get(idx).ok_or_else(|| ShellError::labeled_error("Unable to load group labels", "unabled to load group labels", &name))?.clone();
fact.insert_value(&column, UntaggedValue::string(value.item).into_value(value.tag));
fact.insert_untagged(&count_column_name, UntaggedValue::int(count_values[idx]));
if let Value { value: UntaggedValue::Primitive(Primitive::Int(ref num)), ref tag } = percentage.clone() {
let string = std::iter::repeat("*").take(num.to_i32().ok_or_else(|| ShellError::labeled_error("Expected a number", "expected a number", tag))? as usize).collect::<String>();
fact.insert_untagged(&frequency_column_name, UntaggedValue::string(string));

View file

@ -62,3 +62,22 @@ fn help() {
assert_eq!(help_long.out, help_command.out);
})
}
#[test]
fn count() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo "[{"bit":1},{"bit":0},{"bit":0},{"bit":0},{"bit":0},{"bit":0},{"bit":0},{"bit":1}]"
| from json
| histogram bit
| sort-by count
| reject frequency
| to json
"#
));
let bit_json = r#"[{"bit":"1","count":2},{"bit":"0","count":6}]"#;
assert_eq!(actual.out, bit_json);
}

View file

@ -17,64 +17,76 @@ Let's say we have this file `random_numers.csv` which contains 50 random numbers
```shell
> open random_numbers.csv
open random_numbers2.csv
━━━━┯━━━━━━━━━━━━━━━━
────┬────────────────
# │ random numbers
────┼────────────────
0 │ 0
1 │ 5
2 │ 5
...
47 │ 0
48 │ 2
49 │ 4
━━━━┷━━━━━━━━━━━━━━━━
0 │ 1
1 │ 2
2 │ 2
...
47 │ 5
48 │ 5
49 │ 1
────┴────────────────
```
If we now want to see how often the different numbers were generated, we can use the `histogram` function:
```shell
> open random_numbers2.csv | histogram "random numbers"
━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# │ random numbers │ frequency
───┼────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 0 │ ****************************************************************************************************
1 │ 1 │ ******************************
2 │ 2 │ *************************************************************
3 │ 3 │ *********************************************************************
4 │ 4 │ *****************************************************
5 │ 5 │ *********************************************************************
━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
> open random_numbers.csv | histogram "random numbers"
───┬────────────────┬───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────
# │ random numbers │ count │ frequency
───┼────────────────┼───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 0 │ 1 │ ******
1 │ 1 │ 15 │ ****************************************************************************************************
2 │ 2 │ 10 │ ******************************************************************
3 │ 3 │ 7 │ **********************************************
4 │ 4 │ 9 │ ************************************************************
5 │ 5 │ 8 │ *****************************************************
───┴────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────
```
We can also set the name of the second column or sort the table:
```shell
> open random_numbers2.csv | histogram "random numbers" probability
━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# │ random numbers │ probability
───┼────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 0 │ ****************************************************************************************************
1 │ 1 │ ******************************
2 │ 2 │ *************************************************************
3 │ 3 │ *********************************************************************
4 │ 4 │ *****************************************************
5 │ 5 │ *********************************************************************
━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
> open random_numbers.csv | histogram "random numbers" probability
───┬────────────────┬───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────
# │ random numbers │ count │ probability
───┼────────────────┼───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 0 │ 1 │ ******
1 │ 1 │ 15 │ ****************************************************************************************************
2 │ 2 │ 10 │ ******************************************************************
3 │ 3 │ 7 │ **********************************************
4 │ 4 │ 9 │ ************************************************************
5 │ 5 │ 8 │ *****************************************************
───┴────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────
```
```shell
> open random_numbers2.csv | histogram "random numbers" probability | sort-by probability
━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# │ random numbers │ probability
───┼────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 1 │ ******************************
1 │ 4 │ *****************************************************
2 │ 2 │ *************************************************************
3 │ 3 │ *********************************************************************
4 │ 5 │ *********************************************************************
5 │ 0 ****************************************************************************************************
━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
> open random_numbers.csv | histogram "random numbers" probability | sort-by probability
───┬────────────────┬───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────
# │ random numbers │ count │ probability
───┼────────────────┼───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ 0 │ 1 │ ******
1 │ 3 │ 7 │ **********************************************
2 │ 5 │ 8 │ *****************************************************
3 │ 4 │ 9 │ ************************************************************
4 │ 2 │ 10 │ ******************************************************************
5 │ 1 │ 15****************************************************************************************************
───┴────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────
```
```
Of course, histogram operations are not restricted to just analyzing numbers in files, you can also analyze your directories
```shell
> ls -fa | histogram type | sort-by count
───┬─────────┬───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────
# │ type │ count │ frequency
───┼─────────┼───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────
0 │ Symlink │ 8 │ *****************
1 │ File │ 9 │ ********************
2 │ Dir │ 45 │ ****************************************************************************************************
───┴─────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────
```