to csv and to tsv (#412)

* MathEval Variance and Stddev

* Fix tests and linting

* Typo

* Deal with streams when they are not tables

* ToTsv and ToCsv
This commit is contained in:
Luccas Mateus 2021-12-02 23:02:22 -03:00 committed by GitHub
parent 349e83abd0
commit 3d8394a909
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 363 additions and 0 deletions

View file

@ -157,6 +157,8 @@ pub fn create_default_context() -> EngineState {
ToJson, ToJson,
ToUrl, ToUrl,
ToToml, ToToml,
ToTsv,
ToCsv,
Touch, Touch,
Use, Use,
Update, Update,

View file

@ -0,0 +1,106 @@
use crate::formats::to::delimited::to_delimited_data;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Config, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
Value,
};
#[derive(Clone)]
pub struct ToCsv;
impl Command for ToCsv {
fn name(&self) -> &str {
"to csv"
}
fn signature(&self) -> Signature {
Signature::build("to csv")
.named(
"separator",
SyntaxShape::String,
"a character to separate columns, defaults to ','",
Some('s'),
)
.switch(
"noheaders",
"do not output the columns names as the first row",
Some('n'),
)
.category(Category::Formats)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Outputs an CSV string representing the contents of this table",
example: "[[foo bar]; [1 2]] | to csv",
result: Some(Value::test_string("foo,bar\n1,2\n")),
},
Example {
description: "Outputs an CSV string representing the contents of this table",
example: "[[foo bar]; [1 2]] | to csv -s ';' ",
result: Some(Value::test_string("foo;bar\n1;2\n")),
},
]
}
fn usage(&self) -> &str {
"Convert table into .csv text "
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let noheaders = call.has_flag("noheaders");
let separator: Option<Spanned<String>> = call.get_flag(engine_state, stack, "separator")?;
let config = stack.get_config()?;
to_csv(input, noheaders, separator, head, config)
}
}
fn to_csv(
input: PipelineData,
noheaders: bool,
separator: Option<Spanned<String>>,
head: Span,
config: Config,
) -> Result<PipelineData, ShellError> {
let sep = match separator {
Some(Spanned { item: s, span, .. }) => {
if s == r"\t" {
'\t'
} else {
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::UnsupportedInput(
"Expected a single separator char from --separator".to_string(),
span,
));
};
vec_s[0]
}
}
_ => ',',
};
to_delimited_data(noheaders, sep, "CSV", input, head, config)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(ToCsv {})
}
}

View file

@ -0,0 +1,149 @@
use csv::WriterBuilder;
use indexmap::{indexset, IndexSet};
use nu_protocol::{Config, IntoPipelineData, PipelineData, ShellError, Span, Value};
use std::collections::VecDeque;
fn from_value_to_delimited_string(
value: &Value,
separator: char,
config: &Config,
) -> Result<String, ShellError> {
match value {
Value::Record { cols, vals, span } => {
let mut wtr = WriterBuilder::new()
.delimiter(separator as u8)
.from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
let mut values: VecDeque<String> = VecDeque::new();
for (k, v) in cols.iter().zip(vals.iter()) {
fields.push_back(k.clone());
values.push_back(to_string_tagged_value(v, config)?);
}
wtr.write_record(fields).expect("can not write.");
wtr.write_record(values).expect("can not write.");
let v = String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::UnsupportedInput("Could not convert record".to_string(), *span)
})?)
.map_err(|_| {
ShellError::UnsupportedInput("Could not convert record".to_string(), *span)
})?;
Ok(v)
}
Value::List { vals, span } => {
let mut wtr = WriterBuilder::new()
.delimiter(separator as u8)
.from_writer(vec![]);
let merged_descriptors = merge_descriptors(vals);
if merged_descriptors.is_empty() {
wtr.write_record(
vals.iter()
.map(|ele| {
to_string_tagged_value(ele, config).unwrap_or_else(|_| String::new())
})
.collect::<Vec<_>>(),
)
.expect("can not write");
} else {
wtr.write_record(merged_descriptors.iter().map(|item| &item[..]))
.expect("can not write.");
for l in vals {
let mut row = vec![];
for desc in &merged_descriptors {
row.push(match l.to_owned().get_data_by_key(desc) {
Some(s) => to_string_tagged_value(&s, config)?,
None => String::new(),
});
}
wtr.write_record(&row).expect("can not write");
}
}
let v = String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::UnsupportedInput("Could not convert record".to_string(), *span)
})?)
.map_err(|_| {
ShellError::UnsupportedInput("Could not convert record".to_string(), *span)
})?;
Ok(v)
}
_ => to_string_tagged_value(value, config),
}
}
fn to_string_tagged_value(v: &Value, config: &Config) -> Result<String, ShellError> {
match &v {
Value::String { .. }
| Value::Bool { .. }
| Value::Int { .. }
| Value::Duration { .. }
| Value::Binary { .. }
| Value::CustomValue { .. }
| Value::Error { .. }
| Value::Filesize { .. }
| Value::CellPath { .. }
| Value::Float { .. } => Ok(v.clone().into_string("", config)),
Value::Date { val, .. } => Ok(val.to_string()),
Value::Nothing { .. } => Ok(String::new()),
Value::List { ref vals, .. } => match &vals[..] {
[Value::Record { .. }, _end @ ..] => Ok(String::from("[Table]")),
_ => Ok(String::from("[List]")),
},
Value::Record { .. } => Ok(String::from("[Row]")),
_ => Err(ShellError::UnsupportedInput(
"Unexpected value".to_string(),
v.span().unwrap_or_else(|_| Span::unknown()),
)),
}
}
fn merge_descriptors(values: &[Value]) -> Vec<String> {
let mut ret: Vec<String> = vec![];
let mut seen: IndexSet<String> = indexset! {};
for value in values {
let data_descriptors = match value {
Value::Record { cols, .. } => cols.to_owned(),
_ => vec![],
};
for desc in data_descriptors {
if !seen.contains(&desc) {
seen.insert(desc.to_string());
ret.push(desc.to_string());
}
}
}
ret
}
pub fn to_delimited_data(
noheaders: bool,
sep: char,
format_name: &'static str,
input: PipelineData,
span: Span,
config: Config,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(span);
let output = match from_value_to_delimited_string(&value, sep, &config) {
Ok(mut x) => {
if noheaders {
if let Some(second_line) = x.find('\n') {
let start = second_line + 1;
x.replace_range(0..start, "");
}
}
Ok(x)
}
Err(_) => Err(ShellError::CantConvert(
format_name.into(),
value.get_type().to_string(),
value.span().unwrap_or_else(|_| Span::unknown()),
)),
}?;
Ok(Value::string(output, span).into_pipeline_data())
}

View file

@ -1,9 +1,14 @@
mod command; mod command;
mod csv;
mod delimited;
mod json; mod json;
mod toml; mod toml;
mod tsv;
mod url; mod url;
pub use self::csv::ToCsv;
pub use self::toml::ToToml; pub use self::toml::ToToml;
pub use command::To; pub use command::To;
pub use json::ToJson; pub use json::ToJson;
pub use tsv::ToTsv;
pub use url::ToUrl; pub use url::ToUrl;

View file

@ -0,0 +1,69 @@
use crate::formats::to::delimited::to_delimited_data;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{Category, Config, Example, PipelineData, ShellError, Signature, Span, Value};
#[derive(Clone)]
pub struct ToTsv;
impl Command for ToTsv {
fn name(&self) -> &str {
"to tsv"
}
fn signature(&self) -> Signature {
Signature::build("to tsv")
.switch(
"noheaders",
"do not output the column names as the first row",
Some('n'),
)
.category(Category::Formats)
}
fn usage(&self) -> &str {
"Convert table into .tsv text"
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Outputs an TSV string representing the contents of this table",
example: "[[foo bar]; [1 2]] | to tsv",
result: Some(Value::test_string("foo\tbar\n1\t2\n")),
}]
}
fn run(
&self,
_engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let noheaders = call.has_flag("noheaders");
let config = stack.get_config()?;
to_tsv(input, noheaders, head, config)
}
}
fn to_tsv(
input: PipelineData,
noheaders: bool,
head: Span,
config: Config,
) -> Result<PipelineData, ShellError> {
to_delimited_data(noheaders, '\t', "TSV", input, head, config)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(ToTsv {})
}
}

View file

@ -287,6 +287,38 @@ impl Value {
} }
} }
pub fn get_data_by_key(&self, name: &str) -> Option<Value> {
match self {
Value::Record { cols, vals, .. } => cols
.iter()
.zip(vals.iter())
.find(|(col, _)| col == &name)
.map(|(_, val)| val.clone()),
Value::List { vals, span } => {
let mut out = vec![];
for item in vals {
match item {
Value::Record { .. } => match item.get_data_by_key(name) {
Some(v) => out.push(v),
None => out.push(Value::nothing(*span)),
},
_ => out.push(Value::nothing(*span)),
}
}
if !out.is_empty() {
Some(Value::List {
vals: out,
span: *span,
})
} else {
None
}
}
_ => None,
}
}
/// Convert Value into string. Note that Streams will be consumed. /// Convert Value into string. Note that Streams will be consumed.
pub fn into_string(self, separator: &str, config: &Config) -> String { pub fn into_string(self, separator: &str, config: &Config) -> String {
match self { match self {