Refactoring and more group-by flexibility.

This commit is contained in:
Andrés N. Robalino 2020-06-17 12:33:50 -05:00
parent 6914099e28
commit 778e497903
10 changed files with 266 additions and 189 deletions

View file

@ -49,8 +49,9 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel
let mut input = String::new();
let result = match reader.read_line(&mut input) {
Ok(count) => {
trace!("processing response ({} bytes)", count);
trace!("response: {}", input);
trace!(target: "nu::load", "plugin infrastructure -> config response");
trace!(target: "nu::load", "plugin infrastructure -> processing response ({} bytes)", count);
trace!(target: "nu::load", "plugin infrastructure -> response: {}", input);
let response = serde_json::from_str::<JsonRpc<Result<Signature, ShellError>>>(&input);
match response {
@ -58,13 +59,13 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel
Ok(params) => {
let fname = path.to_string_lossy();
trace!("processing {:?}", params);
trace!(target: "nu::load", "plugin infrastructure -> processing {:?}", params);
let name = params.name.clone();
let fname = fname.to_string();
if context.get_command(&name).is_some() {
trace!("plugin {:?} already loaded.", &name);
trace!(target: "nu::load", "plugin infrastructure -> {:?} already loaded.", &name);
} else if params.is_filter {
context.add_commands(vec![whole_stream_command(PluginCommand::new(
name, fname, params,
@ -79,7 +80,7 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel
Err(e) => Err(e),
},
Err(e) => {
trace!("incompatible plugin {:?}", input);
trace!(target: "nu::load", "plugin infrastructure -> incompatible {:?}", input);
Err(ShellError::untagged_runtime_error(format!(
"Error: {:?}",
e
@ -188,7 +189,7 @@ pub fn load_plugins(context: &mut Context) -> Result<(), ShellError> {
};
if is_valid_name && is_executable {
trace!("Trying {:?}", path.display());
trace!(target: "nu::load", "plugin infrastructure -> Trying {:?}", path.display());
// we are ok if this plugin load fails
let _ = load_plugin(&path, &mut context.clone());

View file

@ -4,6 +4,7 @@ use indexmap::indexmap;
use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
use nu_source::Tagged;
use nu_value_ext::as_string;
pub struct GroupBy;
@ -71,6 +72,10 @@ impl WholeStreamCommand for GroupBy {
}
}
enum Grouper {
ByColumn(Option<Tagged<String>>),
}
pub async fn group_by(
args: CommandArgs,
registry: &CommandRegistry,
@ -81,30 +86,84 @@ pub async fn group_by(
let values: Vec<Value> = input.collect().await;
if values.is_empty() {
Err(ShellError::labeled_error(
return Err(ShellError::labeled_error(
"Expected table from pipeline",
"requires a table input",
name,
))
} else {
match crate::utils::data::group(column_name, &values, None, &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
));
}
let values = UntaggedValue::table(&values).into_value(&name);
match group(&column_name, &values, name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
}
}
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
let possibilities = for_value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &tried), x))
.collect();
possible_matches.sort();
if !possible_matches.is_empty() {
ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
tried.tag(),
)
} else {
ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
tried.tag(),
)
}
}
pub fn group(
column_name: &Tagged<String>,
values: Vec<Value>,
column_name: &Option<Tagged<String>>,
values: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
crate::utils::data::group(Some(column_name.clone()), &values, None, tag)
let name = tag.into();
let grouper = if let Some(column_name) = column_name {
Grouper::ByColumn(Some(column_name.clone()))
} else {
Grouper::ByColumn(None)
};
match grouper {
Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
}
});
crate::utils::data::group(&values, &Some(block), &name)
}
Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason),
});
crate::utils::data::group(&values, &Some(block), &name)
}
}
}
#[cfg(test)]
mod tests {
use crate::commands::group_by::group;
use super::group;
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
@ -122,7 +181,7 @@ mod tests {
UntaggedValue::table(list).into_untagged_value()
}
fn nu_releases_commiters() -> Vec<Value> {
fn nu_releases_committers() -> Vec<Value> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
@ -156,10 +215,11 @@ mod tests {
#[test]
fn groups_table_by_date_column() -> Result<(), ShellError> {
let for_key = String::from("date").tagged_unknown();
let for_key = Some(String::from("date").tagged_unknown());
let sample = table(&nu_releases_committers());
assert_eq!(
group(&for_key, nu_releases_commiters(), Tag::unknown())?,
group(&for_key, &sample, Tag::unknown())?,
row(indexmap! {
"August 23-2019".into() => table(&[
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
@ -184,10 +244,11 @@ mod tests {
#[test]
fn groups_table_by_country_column() -> Result<(), ShellError> {
let for_key = String::from("country").tagged_unknown();
let for_key = Some(String::from("country").tagged_unknown());
let sample = table(&nu_releases_committers());
assert_eq!(
group(&for_key, nu_releases_commiters(), Tag::unknown())?,
group(&for_key, &sample, Tag::unknown())?,
row(indexmap! {
"EC".into() => table(&[
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),

View file

@ -1,7 +1,7 @@
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, Value};
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
use nu_source::Tagged;
pub struct GroupByDate;
@ -55,7 +55,11 @@ impl WholeStreamCommand for GroupByDate {
}
enum Grouper {
ByDate(Option<String>),
ByDate(Option<Tagged<String>>),
}
enum GroupByColumn {
Name(Option<Tagged<String>>),
}
pub async fn group_by_date(
@ -80,31 +84,63 @@ pub async fn group_by_date(
name,
))
} else {
let grouper = if let Some(Tagged { item: fmt, tag: _ }) = format {
Grouper::ByDate(Some(fmt))
let values = UntaggedValue::table(&values).into_value(&name);
let grouper_column = if let Some(column_name) = column_name {
GroupByColumn::Name(Some(column_name))
} else {
GroupByColumn::Name(None)
};
let grouper_date = if let Some(date_format) = format {
Grouper::ByDate(Some(date_format))
} else {
Grouper::ByDate(None)
};
match grouper {
Grouper::ByDate(None) => {
match crate::utils::data::group(
column_name,
&values,
Some(Box::new(|row: &Value| row.format("%Y-%b-%d"))),
&name,
) {
match (grouper_date, grouper_column) {
(Grouper::ByDate(None), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format("%Y-%b-%d"));
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
}
}
Grouper::ByDate(Some(fmt)) => {
match crate::utils::data::group(
column_name,
&values,
Some(Box::new(move |row: &Value| row.format(&fmt))),
&name,
) {
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
};
group_key?.format("%Y-%b-%d")
});
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
}
}
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format(&fmt));
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
}
}
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
};
group_key?.format(&fmt)
});
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
}
@ -113,6 +149,31 @@ pub async fn group_by_date(
}
}
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
let possibilities = for_value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &tried), x))
.collect();
possible_matches.sort();
if !possible_matches.is_empty() {
ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
tried.tag(),
)
} else {
ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
tried.tag(),
)
}
}
#[cfg(test)]
mod tests {
use super::GroupByDate;

View file

@ -76,14 +76,14 @@ pub async fn histogram(
) -> Result<OutputStream, ShellError> {
let registry = registry.clone();
let name = args.call_info.name_tag.clone();
let (HistogramArgs { column_name, rest }, input) = args.process(&registry).await?;
let values: Vec<Value> = input.collect().await;
let values = UntaggedValue::table(&values).into_value(&name);
let Tagged { item: group_by, .. } = column_name.clone();
let groups = group(&column_name, values, &name)?;
let group_labels = columns_sorted(Some(group_by.clone()), &groups, &name);
let sorted = t_sort(Some(group_by), None, &groups, &name)?;
let groups = group(&Some(column_name.clone()), &values, &name)?;
let group_labels = columns_sorted(Some(column_name.clone()), &groups, &name);
let sorted = t_sort(Some(column_name.clone()), None, &groups, &name)?;
let evaled = evaluate(&sorted, None, &name)?;
let reduced = reduce(&evaled, None, &name)?;
let maxima = map_max(&reduced, None, &name)?;

View file

@ -1,16 +1,15 @@
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::{
Signature, SpannedTypeName, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
};
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, Value};
use nu_source::Tagged;
use nu_value_ext::as_string;
pub struct SplitBy;
#[derive(Deserialize)]
pub struct SplitByArgs {
column_name: Tagged<String>,
column_name: Option<Tagged<String>>,
}
#[async_trait]
@ -20,7 +19,7 @@ impl WholeStreamCommand for SplitBy {
}
fn signature(&self) -> Signature {
Signature::build("split-by").required(
Signature::build("split-by").optional(
"column_name",
SyntaxShape::String,
"the name of the column within the nested table to split by",
@ -53,108 +52,84 @@ pub async fn split_by(
return Err(ShellError::labeled_error(
"Expected table from pipeline",
"requires a table input",
column_name.span(),
name,
));
}
match split(&column_name, &values[0], name) {
Ok(split) => Ok(OutputStream::one(split)),
match split(&column_name, &values[0], &name) {
Ok(splits) => Ok(OutputStream::one(ReturnSuccess::value(splits))),
Err(err) => Err(err),
}
}
enum Grouper {
ByColumn(Option<Tagged<String>>),
}
pub fn split(
column_name: &Tagged<String>,
value: &Value,
column_name: &Option<Tagged<String>>,
values: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let origin_tag = tag.into();
let name = tag.into();
let mut splits = indexmap::IndexMap::new();
let grouper = if let Some(column_name) = column_name {
Grouper::ByColumn(Some(column_name.clone()))
} else {
Grouper::ByColumn(None)
};
match value {
Value {
value: UntaggedValue::Row(group_sets),
..
} => {
for (group_key, group_value) in group_sets.entries.iter() {
match *group_value {
Value {
value: UntaggedValue::Table(ref dataset),
..
} => {
let group = crate::commands::group_by::group(
&column_name,
dataset.to_vec(),
&origin_tag,
)?;
match grouper {
Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
}
});
match group {
Value {
value: UntaggedValue::Row(o),
..
} => {
for (split_label, subset) in o.entries.into_iter() {
match subset {
Value {
value: UntaggedValue::Table(subset),
tag,
} => {
let s = splits
.entry(split_label.clone())
.or_insert(indexmap::IndexMap::new());
s.insert(
group_key.clone(),
UntaggedValue::table(&subset).into_value(tag),
crate::utils::data::split(&values, &Some(block), &name)
}
Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason),
});
crate::utils::data::split(&values, &Some(block), &name)
}
}
}
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
let possibilities = for_value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &tried), x))
.collect();
possible_matches.sort();
if !possible_matches.is_empty() {
return ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
tried.tag(),
);
} else {
return ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
tried.tag(),
);
}
other => {
return Err(ShellError::type_error(
"a table value",
other.spanned_type_name(),
))
}
}
}
}
_ => {
return Err(ShellError::type_error(
"a table value",
group.spanned_type_name(),
))
}
}
}
ref other => {
return Err(ShellError::type_error(
"a table value",
other.spanned_type_name(),
))
}
}
}
}
_ => {
return Err(ShellError::type_error(
"a table value",
value.spanned_type_name(),
))
}
}
let mut out = TaggedDictBuilder::new(&origin_tag);
for (k, v) in splits.into_iter() {
out.insert_untagged(k, UntaggedValue::row(v));
}
Ok(out.into_value())
}
#[cfg(test)]
mod tests {
use super::split;
use crate::commands::group_by::group;
use crate::commands::split_by::split;
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
@ -173,11 +148,12 @@ mod tests {
}
fn nu_releases_grouped_by_date() -> Result<Value, ShellError> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown())
let key = Some(String::from("date").tagged_unknown());
let sample = table(&nu_releases_committers());
group(&key, &sample, Tag::unknown())
}
fn nu_releases_commiters() -> Vec<Value> {
fn nu_releases_committers() -> Vec<Value> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
@ -211,7 +187,7 @@ mod tests {
#[test]
fn splits_inner_tables_by_key() -> Result<(), ShellError> {
let for_key = String::from("country").tagged_unknown();
let for_key = Some(String::from("country").tagged_unknown());
assert_eq!(
split(&for_key, &nu_releases_grouped_by_date()?, Tag::unknown())?,
@ -257,7 +233,7 @@ mod tests {
#[test]
fn errors_if_key_within_some_inner_table_is_missing() {
let for_key = String::from("country").tagged_unknown();
let for_key = Some(String::from("country").tagged_unknown());
let nu_releases = row(indexmap! {
"August 23-2019".into() => table(&[

View file

@ -78,7 +78,7 @@ async fn t_sort_by(
let values: Vec<Value> = input.collect().await;
let column_grouped_by_name = if let Some(grouped_by) = group_by {
Some(grouped_by.item().clone())
Some(grouped_by)
} else {
None
};

View file

@ -1,61 +1,28 @@
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::{TaggedDictBuilder, UntaggedValue, Value};
use nu_source::{Tag, Tagged, TaggedItem};
use nu_value_ext::{as_string, get_data_by_key};
use nu_source::Tag;
use nu_value_ext::as_string;
#[allow(clippy::type_complexity)]
pub fn group(
column_name: Option<Tagged<String>>,
values: &[Value],
grouper: Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
values: &Value,
grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
for value in values {
let group_key = if let Some(ref column_name) = column_name {
get_data_by_key(&value, column_name.borrow_spanned())
for value in values.table_entries() {
let group_key = if let Some(ref grouper) = grouper {
grouper(&value)
} else {
Some(value.clone())
as_string(&value)
};
if let Some(group_key) = group_key {
let group_key = if let Some(ref grouper) = grouper {
grouper(&group_key)
} else {
as_string(&group_key)
};
let group = groups.entry(group_key?).or_insert(vec![]);
group.push((*value).clone());
} else {
let column_name = column_name.unwrap_or_else(|| String::from("").tagged(&tag));
let possibilities = value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &column_name), x))
.collect();
possible_matches.sort();
if !possible_matches.is_empty() {
return Err(ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
column_name.tag(),
));
} else {
return Err(ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
column_name.tag(),
));
}
}
}
let mut out = TaggedDictBuilder::new(&tag);

View file

@ -1,3 +1,5 @@
pub mod group;
pub mod split;
pub use crate::utils::data::group::group;
pub use crate::utils::data::split::split;

View file

@ -12,7 +12,7 @@ use num_traits::Zero;
const ERR_EMPTY_DATA: &str = "Cannot perform aggregate math operation on empty data";
pub fn columns_sorted(
_group_by_name: Option<String>,
_group_by_name: Option<Tagged<String>>,
value: &Value,
tag: impl Into<Tag>,
) -> Vec<Tagged<String>> {
@ -61,7 +61,7 @@ pub fn columns_sorted(
}
pub fn t_sort(
group_by_name: Option<String>,
group_by_name: Option<Tagged<String>>,
split_by_name: Option<String>,
value: &Value,
tag: impl Into<Tag>,
@ -454,12 +454,13 @@ mod tests {
}
fn nu_releases_grouped_by_date() -> Result<Value, ShellError> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown())
let key = Some(String::from("date").tagged_unknown());
let sample = table(&nu_releases_committers());
group(&key, &sample, Tag::unknown())
}
fn nu_releases_sorted_by_date() -> Result<Value, ShellError> {
let key = String::from("date");
let key = String::from("date").tagged(Tag::unknown());
t_sort(
Some(key),
@ -481,7 +482,7 @@ mod tests {
)
}
fn nu_releases_commiters() -> Vec<Value> {
fn nu_releases_committers() -> Vec<Value> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
@ -515,7 +516,7 @@ mod tests {
#[test]
fn show_columns_sorted_given_a_column_to_sort_by() -> Result<(), ShellError> {
let by_column = String::from("date");
let by_column = String::from("date").tagged(Tag::unknown());
assert_eq!(
columns_sorted(
@ -535,7 +536,7 @@ mod tests {
#[test]
fn sorts_the_tables() -> Result<(), ShellError> {
let group_by = String::from("date");
let group_by = String::from("date").tagged(Tag::unknown());
assert_eq!(
t_sort(

View file

@ -91,6 +91,14 @@ impl UntaggedValue {
}
}
/// Returns true if this value represents a table
pub fn is_table(&self) -> bool {
match self {
UntaggedValue::Table(_) => true,
_ => false,
}
}
/// Returns true if the value represents something other than Nothing
pub fn is_some(&self) -> bool {
!self.is_none()