Allow ColumnPaths when picking tables. (#1191)

This commit is contained in:
Andrés N. Robalino 2020-01-11 01:45:09 -05:00 committed by GitHub
parent 6d3a30772d
commit 60043df917
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 37 deletions

View file

@ -398,7 +398,19 @@ pub fn as_string(value: &Value) -> Result<String, ShellError> {
UntaggedValue::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)), UntaggedValue::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)),
UntaggedValue::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())), UntaggedValue::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())),
UntaggedValue::Primitive(Primitive::ColumnPath(path)) => { UntaggedValue::Primitive(Primitive::ColumnPath(path)) => {
Ok(path.iter().map(|member| member.display()).join(".")) let joined = path
.iter()
.map(|member| match &member.unspanned {
UnspannedPathMember::String(name) => name.to_string(),
UnspannedPathMember::Int(n) => format!("{}", n),
})
.join(".");
if joined.contains(' ') {
Ok(format!("\"{}\"", joined))
} else {
Ok(joined)
}
} }
// TODO: this should definitely be more general with better errors // TODO: this should definitely be more general with better errors

View file

@ -1,15 +1,18 @@
use crate::commands::WholeStreamCommand; use crate::commands::WholeStreamCommand;
use crate::context::CommandRegistry; use crate::context::CommandRegistry;
use crate::data::base::select_fields;
use crate::prelude::*; use crate::prelude::*;
use futures_util::pin_mut; use futures_util::pin_mut;
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_protocol::{Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, UntaggedValue}; use nu_protocol::{
use nu_source::Tagged; ColumnPath, PathMember, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape,
TaggedDictBuilder, UnspannedPathMember, UntaggedValue, Value,
};
use nu_source::span_for_spanned_list;
use nu_value_ext::{as_string, get_data_by_column_path};
#[derive(Deserialize)] #[derive(Deserialize)]
struct PickArgs { struct PickArgs {
rest: Vec<Tagged<String>>, rest: Vec<ColumnPath>,
} }
pub struct Pick; pub struct Pick;
@ -20,7 +23,10 @@ impl WholeStreamCommand for Pick {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("pick").rest(SyntaxShape::Any, "the columns to select from the table") Signature::build("pick").rest(
SyntaxShape::ColumnPath,
"the columns to select from the table",
)
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -37,7 +43,7 @@ impl WholeStreamCommand for Pick {
} }
fn pick( fn pick(
PickArgs { rest: fields }: PickArgs, PickArgs { rest: mut fields }: PickArgs,
RunnableContext { input, name, .. }: RunnableContext, RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
if fields.is_empty() { if fields.is_empty() {
@ -48,31 +54,110 @@ fn pick(
)); ));
} }
let fields: Vec<_> = fields.iter().map(|f| f.item.clone()).collect(); let member = fields.remove(0);
let member = vec![member];
let column_paths = vec![&member, &fields]
.into_iter()
.flatten()
.cloned()
.collect::<Vec<ColumnPath>>();
let stream = async_stream! { let stream = async_stream! {
let values = input.values; let values = input.values;
pin_mut!(values); pin_mut!(values);
let mut empty = true; let mut empty = true;
let mut bring_back: indexmap::IndexMap<String, Vec<Value>> = indexmap::IndexMap::new();
while let Some(value) = values.next().await { while let Some(value) = values.next().await {
let new_value = select_fields(&value, &fields, value.tag.clone()); for path in &column_paths {
let path_members_span = span_for_spanned_list(path.members().iter().map(|p| p.span));
if let UntaggedValue::Row(dict) = &new_value.value { let fetcher = get_data_by_column_path(&value, &path, Box::new(move |(obj_source, path_member_tried, error)| {
if dict if let PathMember { unspanned: UnspannedPathMember::String(column), .. } = path_member_tried {
.entries return ShellError::labeled_error_with_secondary(
.values() "No data to fetch.",
.any(|v| v.value != UntaggedValue::Primitive(Primitive::Nothing)) format!("Couldn't pick column \"{}\"", column),
{ path_member_tried.span,
empty = false; format!("How about exploring it with \"get\"? Check the input is appropiate originating from here"),
yield ReturnSuccess::value(new_value); obj_source.tag.span)
}
error
}));
let field = path.clone();
let key = as_string(&UntaggedValue::Primitive(Primitive::ColumnPath(field.clone())).into_untagged_value())?;
match fetcher {
Ok(results) => {
match results.value {
UntaggedValue::Table(records) => {
for x in records {
let mut out = TaggedDictBuilder::new(name.clone());
out.insert_untagged(&key, x.value.clone());
let group = bring_back.entry(key.clone()).or_insert(vec![]);
group.push(out.into_value());
}
},
x => {
let mut out = TaggedDictBuilder::new(name.clone());
out.insert_untagged(&key, x.clone());
let group = bring_back.entry(key.clone()).or_insert(vec![]);
group.push(out.into_value());
}
}
}
Err(reason) => {
// At the moment, we can't add switches, named flags
// and the like while already using .rest since it
// breaks the parser.
//
// We allow flexibility for now and skip the error
// if a given column isn't present.
let strict: Option<bool> = None;
if strict.is_some() {
yield Err(reason);
return;
}
bring_back.entry(key.clone()).or_insert(vec![]);
}
} }
} }
} }
if empty { let mut max = 0;
yield Err(ShellError::labeled_error("None of the columns were found in the input", "could not find columns given", name));
if let Some(max_column) = bring_back.values().max() {
max = max_column.len();
}
let keys = bring_back.keys().map(|x| x.clone()).collect::<Vec<String>>();
for mut current in 0..max {
let mut out = TaggedDictBuilder::new(name.clone());
for k in &keys {
let nothing = UntaggedValue::Primitive(Primitive::Nothing).into_untagged_value();
let subsets = bring_back.get(k);
match subsets {
Some(set) => {
match set.get(current) {
Some(row) => out.insert_untagged(k, row.get_data(k).borrow().clone()),
None => out.insert_untagged(k, nothing.clone()),
}
}
None => out.insert_untagged(k, nothing.clone()),
}
}
yield ReturnSuccess::value(out.into_value());
} }
}; };

View file

@ -86,14 +86,15 @@ impl std::convert::TryFrom<Option<&Value>> for Switch {
} }
} }
#[allow(unused)]
pub(crate) fn select_fields(obj: &Value, fields: &[String], tag: impl Into<Tag>) -> Value { pub(crate) fn select_fields(obj: &Value, fields: &[String], tag: impl Into<Tag>) -> Value {
let mut out = TaggedDictBuilder::new(tag); let mut out = TaggedDictBuilder::new(tag);
let descs = obj.data_descriptors(); let descs = obj.data_descriptors();
for field in fields { for column_name in fields {
match descs.iter().find(|d| *d == field) { match descs.iter().find(|d| *d == column_name) {
None => out.insert_untagged(field, UntaggedValue::nothing()), None => out.insert_untagged(column_name, UntaggedValue::nothing()),
Some(desc) => out.insert_value(desc.clone(), obj.get_data(desc).borrow().clone()), Some(desc) => out.insert_value(desc.clone(), obj.get_data(desc).borrow().clone()),
} }
} }

View file

@ -1,10 +1,10 @@
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground; use nu_test_support::playground::Playground;
use nu_test_support::{nu, nu_error, pipeline}; use nu_test_support::{nu, pipeline};
#[test] #[test]
fn columns() { fn regular_columns() {
Playground::setup("pick_by_test_1", |dirs, sandbox| { Playground::setup("pick_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed( sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv", "los_tres_caballeros.csv",
r#" r#"
@ -30,28 +30,72 @@ fn columns() {
}) })
} }
#[should_panic]
#[test] #[test]
fn errors_if_given_unknown_column_name_is_missing() { fn complex_nested_columns() {
Playground::setup("pick_test_2", |dirs, sandbox| { Playground::setup("pick_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed( sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv", "los_tres_caballeros.json",
r#" r#"
first_name,last_name,rusty_at,type {
Andrés,Robalino,10/11/2013,A "nu": {
Jonathan,Turner,10/12/2013,B "committers": [
Yehuda,Katz,10/11/2013,A {"name": "Andrés N. Robalino"},
{"name": "Jonathan Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#, "#,
)]); )]);
let actual = nu_error!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
open los_tres_caballeros.csv open los_tres_caballeros.json
| pick rrusty_at | pick nu.0xATYKARNU nu.committers.name nu.releases.version
| where $it."nu.releases.version" > "0.8"
| get "nu.releases.version"
| echo $it
"# "#
)); ));
assert!(actual.contains("Unknown column")); assert_eq!(actual, "0.9999999");
})
}
#[test]
fn allows_if_given_unknown_column_name_is_missing() {
Playground::setup("pick_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
first_name,last_name,rusty_at,type
Andrés,Robalino,10/11/2013,A
Jonathan,Turner,10/12/2013,B
Yehuda,Katz,10/11/2013,A
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.csv
| pick rrusty_at first_name
| count
| echo $it
"#
));
assert_eq!(actual, "3");
}) })
} }

View file

@ -50,7 +50,7 @@ fn uniq_values() {
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
open los_tres_caballeros.csv open los_tres_caballeros.csv
| pick get type | pick type
| uniq | uniq
| count | count
| echo $it | echo $it