[from|to]csv additions/refactoring.

Introduced flag to tell `from-to` / `to-csv` whether we want headers parsed and/or written.
This commit is contained in:
Andrés N. Robalino 2019-08-25 07:59:46 -05:00
parent de930daf33
commit 0e14ba86ae
7 changed files with 303 additions and 143 deletions

View file

@ -5,26 +5,32 @@ use csv::ReaderBuilder;
pub struct FromCSV;
impl WholeStreamCommand for FromCSV {
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
from_csv(args, registry)
}
#[derive(Deserialize)]
pub struct FromCSVArgs {
headerless: bool,
}
impl WholeStreamCommand for FromCSV {
fn name(&self) -> &str {
"from-csv"
}
fn signature(&self) -> Signature {
Signature::build("from-csv")
Signature::build("from-csv").switch("headerless")
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, from_csv)?.run()
}
}
pub fn from_csv_string_to_value(
s: String,
headerless: bool,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, csv::Error> {
let mut reader = ReaderBuilder::new()
@ -39,10 +45,14 @@ pub fn from_csv_string_to_value(
if let Some(result) = iter.next() {
let line = result?;
for item in line.iter() {
for (idx, item) in line.iter().enumerate() {
if headerless {
fields.push_back(format!("Column{}", idx + 1));
} else {
fields.push_back(item.to_string());
}
}
}
loop {
if let Some(row_values) = iter.next() {
@ -66,10 +76,13 @@ pub fn from_csv_string_to_value(
Ok(Tagged::from_item(Value::List(rows), tag))
}
fn from_csv(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
let args = args.evaluate_once(registry)?;
let span = args.name_span();
let input = args.input;
fn from_csv(
FromCSVArgs {
headerless: skip_headers,
}: FromCSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_span = name;
let stream = async_stream_block! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
@ -88,7 +101,7 @@ fn from_csv(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStrea
_ => yield Err(ShellError::labeled_error_with_secondary(
"Expected a string from pipeline",
"requires string input",
span,
name_span,
"value originates from here",
value_tag.span,
)),
@ -96,7 +109,7 @@ fn from_csv(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStrea
}
}
match from_csv_string_to_value(concat_string, span) {
match from_csv_string_to_value(concat_string, skip_headers, name_span) {
Ok(x) => match x {
Tagged { item: Value::List(list), .. } => {
for l in list {
@ -109,7 +122,7 @@ fn from_csv(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStrea
yield Err(ShellError::labeled_error_with_secondary(
"Could not parse as CSV",
"input cannot be parsed as CSV",
span,
name_span,
"value originates from here",
last_tag.span,
))

View file

@ -428,6 +428,7 @@ pub fn parse_string_as_value(
match extension {
Some(x) if x == "csv" => crate::commands::from_csv::from_csv_string_to_value(
contents,
false,
contents_tag,
)
.map_err(move |_| {

View file

@ -5,21 +5,26 @@ use csv::WriterBuilder;
pub struct ToCSV;
impl WholeStreamCommand for ToCSV {
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
to_csv(args, registry)
}
#[derive(Deserialize)]
pub struct ToCSVArgs {
headerless: bool,
}
impl WholeStreamCommand for ToCSV {
fn name(&self) -> &str {
"to-csv"
}
fn signature(&self) -> Signature {
Signature::build("to-csv")
Signature::build("to-csv").switch("headerless")
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, to_csv)?.run()
}
}
@ -27,6 +32,9 @@ pub fn value_to_csv_value(v: &Value) -> Value {
match v {
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
Value::Object(o) => Value::Object(o.clone()),
Value::List(l) => Value::List(l.clone()),
Value::Block(_) => Value::Primitive(Primitive::Nothing),
@ -34,9 +42,20 @@ pub fn value_to_csv_value(v: &Value) -> Value {
}
}
fn to_string_helper(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
match v {
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", *b as u64)),
Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?),
Value::List(_) => return Ok(String::from("[list list]")),
Value::Object(_) => return Ok(String::from("[object]")),
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => return Err("Bad input".into()),
}
}
pub fn to_string(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
match v {
Value::List(_l) => return Ok(String::from("[list list]")),
Value::Object(o) => {
let mut wtr = WriterBuilder::new().from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
@ -44,7 +63,7 @@ pub fn to_string(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
for (k, v) in o.entries.iter() {
fields.push_back(k.clone());
values.push_back(to_string(&v)?);
values.push_back(to_string_helper(&v)?);
}
wtr.write_record(fields).expect("can not write.");
@ -52,22 +71,31 @@ pub fn to_string(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
return Ok(String::from_utf8(wtr.into_inner()?)?);
}
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => return Err("Bad input".into()),
_ => return to_string_helper(&v),
}
}
fn to_csv(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
let args = args.evaluate_once(registry)?;
let name_span = args.name_span();
let out = args.input;
fn to_csv(
ToCSVArgs { headerless }: ToCSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_span = name;
let out = input;
Ok(out
.values
.map(move |a| match to_string(&value_to_csv_value(&a.item)) {
Ok(x) => ReturnSuccess::value(
Value::Primitive(Primitive::String(x)).simple_spanned(name_span),
),
Ok(x) => {
let converted = if headerless {
x.lines().skip(1).collect()
} else {
x
};
ReturnSuccess::value(
Value::Primitive(Primitive::String(converted)).simple_spanned(name_span),
)
}
_ => Err(ShellError::labeled_error_with_secondary(
"Expected an object with CSV-compatible structure from pipeline",
"requires CSV-compatible input",

114
tests/command_open_tests.rs Normal file
View file

@ -0,0 +1,114 @@
mod helpers;
use helpers::{in_directory as cwd, Playground, Stub::*};
#[test]
fn recognizes_csv() {
Playground::setup_for("open_recognizes_csv_test").with_files(vec![FileWithContentToBeTrimmed(
"nu.zion.csv",
r#"
author,lang,source
Jonathan Turner,Rust,New Zealand
Andres N. Robalino,Rust,Ecuador
Yehuda Katz,Rust,Estados Unidos
"#,
)]);
nu!(
output,
cwd("tests/fixtures/nuplayground/open_recognizes_csv_test"),
"open nu.zion.csv | where author == \"Andres N. Robalino\" | get source | echo $it"
);
assert_eq!(output, "Ecuador");
}
#[test]
fn open_can_parse_bson_1() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.bson | nth 0 | get b | echo $it"
);
assert_eq!(output, "hello");
}
#[test]
fn open_can_parse_bson_2() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.bson | nth 6 | get b | get '$binary_subtype' | echo $it "
);
assert_eq!(output, "function");
}
#[test]
fn open_can_parse_toml() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open cargo_sample.toml | get package.edition | echo $it"
);
assert_eq!(output, "2018");
}
#[test]
fn open_can_parse_json() {
nu!(output,
cwd("tests/fixtures/formats"),
"open sgml_description.json | get glossary.GlossDiv.GlossList.GlossEntry.GlossSee | echo $it"
);
assert_eq!(output, "markup")
}
#[test]
fn open_can_parse_xml() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open jonathan.xml | get rss.channel.item.link | echo $it"
);
assert_eq!(
output,
"http://www.jonathanturner.org/2015/10/off-to-new-adventures.html"
)
}
#[test]
fn open_can_parse_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.ini | get SectionOne.integer | echo $it"
);
assert_eq!(output, "1234")
}
#[test]
fn open_can_parse_utf16_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it"
);
assert_eq!(output, "-236")
}
#[test]
fn errors_if_file_not_found() {
nu_error!(
output,
cwd("tests/fixtures/formats"),
"open i_dont_exist.txt | echo $it"
);
assert!(output.contains("File could not be opened"));
}

View file

@ -13,108 +13,6 @@ fn lines() {
assert_eq!(output, "rustyline");
}
#[test]
fn open_can_parse_csv() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open caco3_plastics.csv | first 1 | get origin | echo $it"
);
assert_eq!(output, "SPAIN");
}
#[test]
fn open_can_parse_bson_1() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.bson | nth 0 | get b | echo $it"
);
assert_eq!(output, "hello");
}
#[test]
fn open_can_parse_bson_2() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.bson | nth 6 | get b | get '$binary_subtype' | echo $it "
);
assert_eq!(output, "function");
}
#[test]
fn open_can_parse_toml() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open cargo_sample.toml | get package.edition | echo $it"
);
assert_eq!(output, "2018");
}
#[test]
fn open_can_parse_json() {
nu!(output,
cwd("tests/fixtures/formats"),
"open sgml_description.json | get glossary.GlossDiv.GlossList.GlossEntry.GlossSee | echo $it"
);
assert_eq!(output, "markup")
}
#[test]
fn open_can_parse_xml() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open jonathan.xml | get rss.channel.item.link | echo $it"
);
assert_eq!(
output,
"http://www.jonathanturner.org/2015/10/off-to-new-adventures.html"
)
}
#[test]
fn open_can_parse_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open sample.ini | get SectionOne.integer | echo $it"
);
assert_eq!(output, "1234")
}
#[test]
fn open_can_parse_utf16_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it"
);
assert_eq!(output, "-236")
}
#[test]
fn open_error_if_file_not_found() {
nu_error!(
output,
cwd("tests/fixtures/formats"),
"open i_dont_exist.txt | echo $it"
);
assert!(output.contains("File could not be opened"));
}
#[test]
fn save_figures_out_intelligently_where_to_write_out_with_metadata() {
let sandbox = Playground::setup_for("save_smart_test")
.with_files(vec![FileWithContent(

View file

@ -1,6 +1,6 @@
mod helpers;
use helpers::in_directory as cwd;
use helpers::{in_directory as cwd, Playground, Stub::*};
#[test]
fn can_convert_table_to_csv_text_and_from_csv_text_back_into_table() {
@ -13,6 +13,88 @@ fn can_convert_table_to_csv_text_and_from_csv_text_back_into_table() {
assert_eq!(output, "SPAIN");
}
#[test]
fn converts_structured_table_to_csv_text() {
Playground::setup_for("filter_to_csv_test_1").with_files(vec![FileWithContentToBeTrimmed(
"sample.csv",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
nu!(
output,
cwd("tests/fixtures/nuplayground/filter_to_csv_test_1"),
"open sample.csv --raw | lines | split-column \",\" a b c d origin | last 1 | to-csv | lines | nth 1 | echo \"$it\""
);
assert!(output.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
}
#[test]
fn converts_structured_table_to_csv_text_skipping_headers_after_conversion() {
Playground::setup_for("filter_to_csv_test_2").with_files(vec![FileWithContentToBeTrimmed(
"sample.csv",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
nu!(
output,
cwd("tests/fixtures/nuplayground/filter_to_csv_test_2"),
"open sample.csv --raw | lines | split-column \",\" a b c d origin | last 1 | to-csv --headerless | echo \"$it\""
);
assert!(output.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
}
#[test]
fn converts_from_csv_text_to_structured_table() {
Playground::setup_for("filter_from_csv_test_1").with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
nu!(
output,
cwd("tests/fixtures/nuplayground/filter_from_csv_test_1"),
"open los_tres_amigos.txt | from-csv | get rusty_luck | str --to-int | sum | echo $it"
);
assert_eq!(output, "3");
}
#[test]
fn converts_from_csv_text_skipping_headers_to_structured_table() {
Playground::setup_for("filter_from_csv_test_2").with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
nu!(
output,
cwd("tests/fixtures/nuplayground/filter_from_csv_test_2"),
"open los_tres_amigos.txt | from-csv --headerless | get Column3 | str --to-int | sum | echo $it"
);
assert_eq!(output, "3");
}
#[test]
fn can_convert_table_to_json_text_and_from_json_text_back_into_table() {
nu!(

View file

@ -79,6 +79,7 @@ macro_rules! nu_error {
pub enum Stub<'a> {
FileWithContent(&'a str, &'a str),
FileWithContentToBeTrimmed(&'a str, &'a str),
EmptyFile(&'a str),
}
@ -124,14 +125,25 @@ impl Playground {
}
pub fn with_files(&mut self, files: Vec<Stub>) -> &mut Self {
let endl = line_ending();
files
.iter()
.map(|f| {
let mut path = PathBuf::from(&self.cwd);
let (file_name, contents) = match *f {
Stub::EmptyFile(name) => (name, "fake data"),
Stub::FileWithContent(name, content) => (name, content),
Stub::EmptyFile(name) => (name, "fake data".to_string()),
Stub::FileWithContent(name, content) => (name, content.to_string()),
Stub::FileWithContentToBeTrimmed(name, content) => (
name,
content
.lines()
.skip(1)
.map(|line| line.trim())
.collect::<Vec<&str>>()
.join(&endl),
),
};
path.push(file_name);
@ -176,6 +188,18 @@ pub fn file_contents(full_path: &str) -> String {
contents
}
pub fn line_ending() -> String {
#[cfg(windows)]
{
String::from("\r\n")
}
#[cfg(not(windows))]
{
String::from("\n")
}
}
pub fn normalize_string(input: &str) -> String {
#[cfg(windows)]
{