Allow ColumnPaths when picking tables. (#1191)

This commit is contained in:
Andrés N. Robalino 2020-01-11 01:45:09 -05:00 committed by GitHub
parent 6d3a30772d
commit 60043df917
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 37 deletions

View file

@ -398,7 +398,19 @@ pub fn as_string(value: &Value) -> Result<String, ShellError> {
UntaggedValue::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)),
UntaggedValue::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())),
UntaggedValue::Primitive(Primitive::ColumnPath(path)) => {
Ok(path.iter().map(|member| member.display()).join("."))
let joined = path
.iter()
.map(|member| match &member.unspanned {
UnspannedPathMember::String(name) => name.to_string(),
UnspannedPathMember::Int(n) => format!("{}", n),
})
.join(".");
if joined.contains(' ') {
Ok(format!("\"{}\"", joined))
} else {
Ok(joined)
}
}
// TODO: this should definitely be more general with better errors

View file

@ -1,15 +1,18 @@
use crate::commands::WholeStreamCommand;
use crate::context::CommandRegistry;
use crate::data::base::select_fields;
use crate::prelude::*;
use futures_util::pin_mut;
use nu_errors::ShellError;
use nu_protocol::{Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged;
use nu_protocol::{
ColumnPath, PathMember, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape,
TaggedDictBuilder, UnspannedPathMember, UntaggedValue, Value,
};
use nu_source::span_for_spanned_list;
use nu_value_ext::{as_string, get_data_by_column_path};
#[derive(Deserialize)]
struct PickArgs {
rest: Vec<Tagged<String>>,
rest: Vec<ColumnPath>,
}
pub struct Pick;
@ -20,7 +23,10 @@ impl WholeStreamCommand for Pick {
}
fn signature(&self) -> Signature {
Signature::build("pick").rest(SyntaxShape::Any, "the columns to select from the table")
Signature::build("pick").rest(
SyntaxShape::ColumnPath,
"the columns to select from the table",
)
}
fn usage(&self) -> &str {
@ -37,7 +43,7 @@ impl WholeStreamCommand for Pick {
}
fn pick(
PickArgs { rest: fields }: PickArgs,
PickArgs { rest: mut fields }: PickArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
if fields.is_empty() {
@ -48,31 +54,110 @@ fn pick(
));
}
let fields: Vec<_> = fields.iter().map(|f| f.item.clone()).collect();
let member = fields.remove(0);
let member = vec![member];
let column_paths = vec![&member, &fields]
.into_iter()
.flatten()
.cloned()
.collect::<Vec<ColumnPath>>();
let stream = async_stream! {
let values = input.values;
pin_mut!(values);
let mut empty = true;
let mut bring_back: indexmap::IndexMap<String, Vec<Value>> = indexmap::IndexMap::new();
while let Some(value) = values.next().await {
let new_value = select_fields(&value, &fields, value.tag.clone());
for path in &column_paths {
let path_members_span = span_for_spanned_list(path.members().iter().map(|p| p.span));
if let UntaggedValue::Row(dict) = &new_value.value {
if dict
.entries
.values()
.any(|v| v.value != UntaggedValue::Primitive(Primitive::Nothing))
{
empty = false;
yield ReturnSuccess::value(new_value);
let fetcher = get_data_by_column_path(&value, &path, Box::new(move |(obj_source, path_member_tried, error)| {
if let PathMember { unspanned: UnspannedPathMember::String(column), .. } = path_member_tried {
return ShellError::labeled_error_with_secondary(
"No data to fetch.",
format!("Couldn't pick column \"{}\"", column),
path_member_tried.span,
format!("How about exploring it with \"get\"? Check the input is appropiate originating from here"),
obj_source.tag.span)
}
error
}));
let field = path.clone();
let key = as_string(&UntaggedValue::Primitive(Primitive::ColumnPath(field.clone())).into_untagged_value())?;
match fetcher {
Ok(results) => {
match results.value {
UntaggedValue::Table(records) => {
for x in records {
let mut out = TaggedDictBuilder::new(name.clone());
out.insert_untagged(&key, x.value.clone());
let group = bring_back.entry(key.clone()).or_insert(vec![]);
group.push(out.into_value());
}
},
x => {
let mut out = TaggedDictBuilder::new(name.clone());
out.insert_untagged(&key, x.clone());
let group = bring_back.entry(key.clone()).or_insert(vec![]);
group.push(out.into_value());
}
}
}
Err(reason) => {
// At the moment, we can't add switches, named flags
// and the like while already using .rest since it
// breaks the parser.
//
// We allow flexibility for now and skip the error
// if a given column isn't present.
let strict: Option<bool> = None;
if strict.is_some() {
yield Err(reason);
return;
}
bring_back.entry(key.clone()).or_insert(vec![]);
}
}
}
}
if empty {
yield Err(ShellError::labeled_error("None of the columns were found in the input", "could not find columns given", name));
let mut max = 0;
if let Some(max_column) = bring_back.values().max() {
max = max_column.len();
}
let keys = bring_back.keys().map(|x| x.clone()).collect::<Vec<String>>();
for mut current in 0..max {
let mut out = TaggedDictBuilder::new(name.clone());
for k in &keys {
let nothing = UntaggedValue::Primitive(Primitive::Nothing).into_untagged_value();
let subsets = bring_back.get(k);
match subsets {
Some(set) => {
match set.get(current) {
Some(row) => out.insert_untagged(k, row.get_data(k).borrow().clone()),
None => out.insert_untagged(k, nothing.clone()),
}
}
None => out.insert_untagged(k, nothing.clone()),
}
}
yield ReturnSuccess::value(out.into_value());
}
};

View file

@ -86,14 +86,15 @@ impl std::convert::TryFrom<Option<&Value>> for Switch {
}
}
#[allow(unused)]
pub(crate) fn select_fields(obj: &Value, fields: &[String], tag: impl Into<Tag>) -> Value {
let mut out = TaggedDictBuilder::new(tag);
let descs = obj.data_descriptors();
for field in fields {
match descs.iter().find(|d| *d == field) {
None => out.insert_untagged(field, UntaggedValue::nothing()),
for column_name in fields {
match descs.iter().find(|d| *d == column_name) {
None => out.insert_untagged(column_name, UntaggedValue::nothing()),
Some(desc) => out.insert_value(desc.clone(), obj.get_data(desc).borrow().clone()),
}
}

View file

@ -1,10 +1,10 @@
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, nu_error, pipeline};
use nu_test_support::{nu, pipeline};
#[test]
fn columns() {
Playground::setup("pick_by_test_1", |dirs, sandbox| {
fn regular_columns() {
Playground::setup("pick_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
@ -30,28 +30,72 @@ fn columns() {
})
}
#[should_panic]
#[test]
fn errors_if_given_unknown_column_name_is_missing() {
fn complex_nested_columns() {
Playground::setup("pick_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
"los_tres_caballeros.json",
r#"
first_name,last_name,rusty_at,type
Andrés,Robalino,10/11/2013,A
Jonathan,Turner,10/12/2013,B
Yehuda,Katz,10/11/2013,A
{
"nu": {
"committers": [
{"name": "Andrés N. Robalino"},
{"name": "Jonathan Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#,
)]);
let actual = nu_error!(
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.csv
| pick rrusty_at
open los_tres_caballeros.json
| pick nu.0xATYKARNU nu.committers.name nu.releases.version
| where $it."nu.releases.version" > "0.8"
| get "nu.releases.version"
| echo $it
"#
));
assert!(actual.contains("Unknown column"));
assert_eq!(actual, "0.9999999");
})
}
#[test]
fn allows_if_given_unknown_column_name_is_missing() {
Playground::setup("pick_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
first_name,last_name,rusty_at,type
Andrés,Robalino,10/11/2013,A
Jonathan,Turner,10/12/2013,B
Yehuda,Katz,10/11/2013,A
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.csv
| pick rrusty_at first_name
| count
| echo $it
"#
));
assert_eq!(actual, "3");
})
}

View file

@ -50,7 +50,7 @@ fn uniq_values() {
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.csv
| pick get type
| pick type
| uniq
| count
| echo $it