WIP: 1486/first row as headers (#1530)

* headers plugin

* Remove plugin

* Add non-functioning headers command

* Add ability to extract headers from first row

* Refactor header extraction

* Rebuild indexmap with proper headers

* Rebuild result properly

* Compiling, probably wrapped too much?

* Refactoring

* Deal with case of empty header cell

* Deal with case of empty header cell

* Fix formatting

* Fix linting, attempt 2.

* Move whole_stream_command(Headers) to more appropriate section

* ... more linting

* Return Err(ShellError...) instead of panic, yield each row instead of entire table

* Insert Column[index] if no header info is found.

* Update error description

* Add initial test

* Add tests for headers command

* Lint test cases in headers

* Change ShellError for headers, Add sample_headers file to utils.rs

* Add empty sheet to test file

* Revert "Add empty sheet to test file"

This reverts commit a4bf38a31d.

* Show error message when given empty table
This commit is contained in:
Sam Hedin 2020-03-29 04:05:57 +02:00 committed by GitHub
parent a5e97ca549
commit ae5f3c8210
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 119 additions and 0 deletions

View file

@ -311,6 +311,7 @@ pub fn create_default_context(
whole_stream_command(Shuffle),
whole_stream_command(Wrap),
whole_stream_command(Pivot),
whole_stream_command(Headers),
// Data processing
whole_stream_command(Histogram),
whole_stream_command(Sum),

View file

@ -45,6 +45,7 @@ pub(crate) mod from_xml;
pub(crate) mod from_yaml;
pub(crate) mod get;
pub(crate) mod group_by;
pub(crate) mod headers;
pub(crate) mod help;
pub(crate) mod histogram;
pub(crate) mod history;
@ -155,6 +156,7 @@ pub(crate) use from_yaml::FromYAML;
pub(crate) use from_yaml::FromYML;
pub(crate) use get::Get;
pub(crate) use group_by::GroupBy;
pub(crate) use headers::Headers;
pub(crate) use help::Help;
pub(crate) use histogram::Histogram;
pub(crate) use history::History;

View file

@ -0,0 +1,80 @@
use crate::commands::WholeStreamCommand;
use crate::context::CommandRegistry;
use crate::prelude::*;
use futures::stream::StreamExt;
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::Dictionary;
use nu_protocol::{ReturnSuccess, Signature, UntaggedValue, Value};
pub struct Headers;
#[derive(Deserialize)]
pub struct HeadersArgs {}
impl WholeStreamCommand for Headers {
fn name(&self) -> &str {
"headers"
}
fn signature(&self) -> Signature {
Signature::build("headers")
}
fn usage(&self) -> &str {
"Use the first row of the table as column names"
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, headers)?.run()
}
}
pub fn headers(
HeadersArgs {}: HeadersArgs,
RunnableContext { input, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let stream = async_stream! {
let rows: Vec<Value> = input.values.collect().await;
if rows.len() < 1 {
yield Err(ShellError::untagged_runtime_error("Couldn't find headers, was the input a properly formatted, non-empty table?"));
}
//the headers are the first row in the table
let headers: Vec<String> = match &rows[0].value {
UntaggedValue::Row(d) => {
Ok(d.entries.iter().map(|(k, v)| {
match v.as_string() {
Ok(s) => s,
Err(_) => { //If a cell that should contain a header name is empty, we name the column Column[index]
match d.entries.get_full(k) {
Some((index, _, _)) => format!("Column{}", index),
None => "unknownColumn".to_string()
}
}
}
}).collect())
}
_ => Err(ShellError::unexpected_eof("Could not get headers, is the table empty?", rows[0].tag.span))
}?;
//Each row is a dictionary with the headers as keys
for r in rows.iter().skip(1) {
match &r.value {
UntaggedValue::Row(d) => {
let mut i = 0;
let mut entries = IndexMap::new();
for (_, v) in d.entries.iter() {
entries.insert(headers[i].clone(), v.clone());
i += 1;
}
yield Ok(ReturnSuccess::Value(UntaggedValue::Row(Dictionary{entries}).into_value(r.tag.clone())))
}
_ => yield Err(ShellError::unexpected_eof("Couldn't iterate through rows, was the input a properly formatted table?", r.tag.span))
}
}
};
Ok(stream.to_output_stream())
}

View file

@ -317,6 +317,10 @@ mod tests {
loc: fixtures().join("sample_data.xlsx"),
at: 0
},
Res {
loc: fixtures().join("sample_headers.xlsx"),
at: 0
},
Res {
loc: fixtures().join("script.nu"),
at: 0

View file

@ -0,0 +1,31 @@
use nu_test_support::{nu, pipeline};
#[test]
fn headers_uses_first_row_as_header() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open sample_headers.xlsx
| get Sheet1
| headers
| get header0
| from-json"#
));
assert_eq!(actual, "r1c0r2c0")
}
#[test]
fn headers_adds_missing_column_name() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open sample_headers.xlsx
| get Sheet1
| headers
| get Column1
| from-json"#
));
assert_eq!(actual, "r1c1r2c1")
}

View file

@ -10,6 +10,7 @@ mod first;
mod format;
mod get;
mod group_by;
mod headers;
mod histogram;
mod insert;
mod last;

Binary file not shown.