mirror of
https://github.com/nushell/nushell
synced 2024-12-26 04:53:09 +00:00
Add uniq command (#1132)
* start playing with ways to use the uniq command * WIP * Got uniq working, but still need to figure out args issue and add tests * Add some tests for uniq * fmt * remove commented out code * Add documentation and some additional tests showing uniq values and rows. Also removed args TODO * add changes that didn't get committed * whoops, I didn't save the docs correctly... * fmt * Add a test for uniq with nested json * Add another test * Fix unique-ness when json keys are out of order and make the test json more complicated
This commit is contained in:
parent
dba82ac530
commit
f37f29b441
13 changed files with 298 additions and 6 deletions
|
@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||||
pub enum UntaggedValue {
|
pub enum UntaggedValue {
|
||||||
Primitive(Primitive),
|
Primitive(Primitive),
|
||||||
Row(Dictionary),
|
Row(Dictionary),
|
||||||
|
@ -182,7 +182,7 @@ impl UntaggedValue {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Hash, Serialize, Deserialize)]
|
||||||
pub struct Value {
|
pub struct Value {
|
||||||
pub value: UntaggedValue,
|
pub value: UntaggedValue,
|
||||||
pub tag: Tag,
|
pub tag: Tag,
|
||||||
|
|
|
@ -7,6 +7,7 @@ use indexmap::IndexMap;
|
||||||
use nu_source::{b, DebugDocBuilder, PrettyDebug, Spanned, Tag};
|
use nu_source::{b, DebugDocBuilder, PrettyDebug, Spanned, Tag};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::cmp::{Ord, Ordering, PartialOrd};
|
use std::cmp::{Ord, Ordering, PartialOrd};
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
||||||
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone, Getters, new)]
|
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone, Getters, new)]
|
||||||
pub struct Dictionary {
|
pub struct Dictionary {
|
||||||
|
@ -14,6 +15,15 @@ pub struct Dictionary {
|
||||||
pub entries: IndexMap<String, Value>,
|
pub entries: IndexMap<String, Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Hash for Dictionary {
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
let mut entries = self.entries.clone();
|
||||||
|
entries.sort_keys();
|
||||||
|
entries.keys().collect::<Vec<&String>>().hash(state);
|
||||||
|
entries.values().collect::<Vec<&Value>>().hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl PartialOrd for Dictionary {
|
impl PartialOrd for Dictionary {
|
||||||
fn partial_cmp(&self, other: &Dictionary) -> Option<Ordering> {
|
fn partial_cmp(&self, other: &Dictionary) -> Option<Ordering> {
|
||||||
let this: Vec<&String> = self.entries.keys().collect();
|
let this: Vec<&String> = self.entries.keys().collect();
|
||||||
|
|
|
@ -12,7 +12,7 @@ use num_traits::cast::{FromPrimitive, ToPrimitive};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Deserialize, Serialize)]
|
||||||
pub enum Primitive {
|
pub enum Primitive {
|
||||||
Nothing,
|
Nothing,
|
||||||
#[serde(with = "serde_bigint")]
|
#[serde(with = "serde_bigint")]
|
||||||
|
|
|
@ -3,7 +3,7 @@ use derive_new::new;
|
||||||
use nu_source::{b, DebugDocBuilder, Spanned};
|
use nu_source::{b, DebugDocBuilder, Spanned};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||||
pub enum RangeInclusion {
|
pub enum RangeInclusion {
|
||||||
Inclusive,
|
Inclusive,
|
||||||
Exclusive,
|
Exclusive,
|
||||||
|
@ -25,7 +25,7 @@ impl RangeInclusion {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize, new)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize, new)]
|
||||||
pub struct Range {
|
pub struct Range {
|
||||||
pub from: (Spanned<Primitive>, RangeInclusion),
|
pub from: (Spanned<Primitive>, RangeInclusion),
|
||||||
pub to: (Spanned<Primitive>, RangeInclusion),
|
pub to: (Spanned<Primitive>, RangeInclusion),
|
||||||
|
|
36
docs/commands/uniq.rs
Normal file
36
docs/commands/uniq.rs
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# uniq
|
||||||
|
|
||||||
|
Returns unique rows or values from a dataset.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Given a file `test.csv`
|
||||||
|
|
||||||
|
```
|
||||||
|
first_name,last_name,rusty_at,type
|
||||||
|
Andrés,Robalino,10/11/2013,A
|
||||||
|
Andrés,Robalino,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
> `open test.csv | uniq`
|
||||||
|
━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━
|
||||||
|
# │ first_name │ last_name │ rusty_at │ type
|
||||||
|
───┼────────────┼───────────┼────────────┼──────
|
||||||
|
0 │ Andrés │ Robalino │ 10/11/2013 │ A
|
||||||
|
1 │ Jonathan │ Turner │ 10/12/2013 │ B
|
||||||
|
2 │ Yehuda │ Katz │ 10/11/2013 │ A
|
||||||
|
━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
> `open test.csv | get type | uniq`
|
||||||
|
━━━┯━━━━━━━━━
|
||||||
|
# │ <value>
|
||||||
|
───┼─────────
|
||||||
|
0 │ A
|
||||||
|
1 │ B
|
||||||
|
━━━┷━━━━━━━━━
|
||||||
|
```
|
|
@ -294,6 +294,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
||||||
whole_stream_command(Default),
|
whole_stream_command(Default),
|
||||||
whole_stream_command(SkipWhile),
|
whole_stream_command(SkipWhile),
|
||||||
whole_stream_command(Range),
|
whole_stream_command(Range),
|
||||||
|
whole_stream_command(Uniq),
|
||||||
// Table manipulation
|
// Table manipulation
|
||||||
whole_stream_command(Wrap),
|
whole_stream_command(Wrap),
|
||||||
whole_stream_command(Pivot),
|
whole_stream_command(Pivot),
|
||||||
|
|
|
@ -90,6 +90,7 @@ pub(crate) mod to_tsv;
|
||||||
pub(crate) mod to_url;
|
pub(crate) mod to_url;
|
||||||
pub(crate) mod to_yaml;
|
pub(crate) mod to_yaml;
|
||||||
pub(crate) mod trim;
|
pub(crate) mod trim;
|
||||||
|
pub(crate) mod uniq;
|
||||||
pub(crate) mod version;
|
pub(crate) mod version;
|
||||||
pub(crate) mod what;
|
pub(crate) mod what;
|
||||||
pub(crate) mod where_;
|
pub(crate) mod where_;
|
||||||
|
@ -185,6 +186,7 @@ pub(crate) use to_tsv::ToTSV;
|
||||||
pub(crate) use to_url::ToURL;
|
pub(crate) use to_url::ToURL;
|
||||||
pub(crate) use to_yaml::ToYAML;
|
pub(crate) use to_yaml::ToYAML;
|
||||||
pub(crate) use trim::Trim;
|
pub(crate) use trim::Trim;
|
||||||
|
pub(crate) use uniq::Uniq;
|
||||||
pub(crate) use version::Version;
|
pub(crate) use version::Version;
|
||||||
pub(crate) use what::What;
|
pub(crate) use what::What;
|
||||||
pub(crate) use where_::Where;
|
pub(crate) use where_::Where;
|
||||||
|
|
48
src/commands/uniq.rs
Normal file
48
src/commands/uniq.rs
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
use crate::commands::WholeStreamCommand;
|
||||||
|
use crate::context::CommandRegistry;
|
||||||
|
use crate::prelude::*;
|
||||||
|
use indexmap::set::IndexSet;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{ReturnSuccess, Signature};
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct UniqArgs {}
|
||||||
|
|
||||||
|
pub struct Uniq;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for Uniq {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"uniq"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("uniq")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Return the unique rows"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&self,
|
||||||
|
args: CommandArgs,
|
||||||
|
registry: &CommandRegistry,
|
||||||
|
) -> Result<OutputStream, ShellError> {
|
||||||
|
args.process(registry, uniq)?.run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn uniq(
|
||||||
|
UniqArgs {}: UniqArgs,
|
||||||
|
RunnableContext { input, .. }: RunnableContext,
|
||||||
|
) -> Result<OutputStream, ShellError> {
|
||||||
|
let stream = async_stream! {
|
||||||
|
let uniq_values: IndexSet<_> = input.values.collect().await;
|
||||||
|
|
||||||
|
for item in uniq_values.iter().map(|row| ReturnSuccess::value(row.clone())) {
|
||||||
|
yield item;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(stream.to_output_stream())
|
||||||
|
}
|
|
@ -325,6 +325,10 @@ mod tests {
|
||||||
loc: fixtures().join("jonathan.xml"),
|
loc: fixtures().join("jonathan.xml"),
|
||||||
at: 0
|
at: 0
|
||||||
},
|
},
|
||||||
|
Res {
|
||||||
|
loc: fixtures().join("nested_uniq.json"),
|
||||||
|
at: 0
|
||||||
|
},
|
||||||
Res {
|
Res {
|
||||||
loc: fixtures().join("sample.bson"),
|
loc: fixtures().join("sample.bson"),
|
||||||
at: 0
|
at: 0
|
||||||
|
|
|
@ -26,5 +26,6 @@ mod save;
|
||||||
mod sort_by;
|
mod sort_by;
|
||||||
mod split_by;
|
mod split_by;
|
||||||
mod split_column;
|
mod split_column;
|
||||||
|
mod uniq;
|
||||||
mod where_;
|
mod where_;
|
||||||
mod wrap;
|
mod wrap;
|
||||||
|
|
118
tests/commands/uniq.rs
Normal file
118
tests/commands/uniq.rs
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
|
||||||
|
use nu_test_support::playground::Playground;
|
||||||
|
use nu_test_support::{nu, pipeline};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_rows() {
|
||||||
|
Playground::setup("uniq_test_1", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.csv",
|
||||||
|
r#"
|
||||||
|
first_name,last_name,rusty_at,type
|
||||||
|
Andrés,Robalino,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.csv
|
||||||
|
| uniq
|
||||||
|
| count
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual, "3");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_columns() {
|
||||||
|
Playground::setup("uniq_test_2", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.csv",
|
||||||
|
r#"
|
||||||
|
first_name,last_name,rusty_at,type
|
||||||
|
Andrés,Robalino,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.csv
|
||||||
|
| pick rusty_at type
|
||||||
|
| uniq
|
||||||
|
| count
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual, "2");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_values() {
|
||||||
|
Playground::setup("uniq_test_3", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.csv",
|
||||||
|
r#"
|
||||||
|
first_name,last_name,rusty_at,type
|
||||||
|
Andrés,Robalino,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
Jonathan,Turner,10/12/2013,B
|
||||||
|
Yehuda,Katz,10/11/2013,A
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.csv
|
||||||
|
| pick get type
|
||||||
|
| uniq
|
||||||
|
| count
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual, "2");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_when_keys_out_of_order() {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: "tests/fixtures/formats", pipeline(
|
||||||
|
r#"
|
||||||
|
echo '[{"a": "a", "b": [1,2,3]},{"b": [1,2,3], "a": "a"}]'
|
||||||
|
| from-json
|
||||||
|
| uniq
|
||||||
|
| count
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_nested_json_structures() {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: "tests/fixtures/formats",
|
||||||
|
"open nested_uniq.json | uniq | count | echo $it"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(actual, "3");
|
||||||
|
}
|
|
@ -7,7 +7,7 @@ fn filters_by_unit_size_comparison() {
|
||||||
"ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it"
|
"ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(actual, "cargo_sample.toml");
|
assert_eq!(actual, "nested_uniq.json");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
72
tests/fixtures/formats/nested_uniq.json
vendored
Normal file
72
tests/fixtures/formats/nested_uniq.json
vendored
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "this is duplicated",
|
||||||
|
"nesting": [
|
||||||
|
{
|
||||||
|
"a": "a",
|
||||||
|
"b": "b"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"c": "c",
|
||||||
|
"d": "d"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"can_be_ordered_differently": {
|
||||||
|
"array": [1, 2, 3, 4, 5],
|
||||||
|
"something": { "else": "works" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"can_be_ordered_differently": {
|
||||||
|
"something": { "else": "works" },
|
||||||
|
"array": [1, 2, 3, 4, 5]
|
||||||
|
},
|
||||||
|
"nesting": [
|
||||||
|
{
|
||||||
|
"b": "b",
|
||||||
|
"a": "a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"d": "d",
|
||||||
|
"c": "c"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name": "this is duplicated"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "this is unique",
|
||||||
|
"nesting": [
|
||||||
|
{
|
||||||
|
"a": "b",
|
||||||
|
"b": "a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"c": "d",
|
||||||
|
"d": "c"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"can_be_ordered_differently": {
|
||||||
|
"array": [],
|
||||||
|
"something": { "else": "does not work" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "this is unique",
|
||||||
|
"nesting": [
|
||||||
|
{
|
||||||
|
"a": "a",
|
||||||
|
"b": "b",
|
||||||
|
"c": "c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"d": "d",
|
||||||
|
"e": "e",
|
||||||
|
"f": "f"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"can_be_ordered_differently": {
|
||||||
|
"array": [],
|
||||||
|
"something": { "else": "works" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
Loading…
Reference in a new issue