Merge pull request #524 from androbtech/tsv-support

[from/to]tsv support.
This commit is contained in:
Andrés N. Robalino 2019-08-29 04:26:51 -05:00 committed by GitHub
commit 6638fe4ab3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 429 additions and 2 deletions

View file

@ -180,6 +180,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
whole_stream_command(ToCSV),
whole_stream_command(ToJSON),
whole_stream_command(ToTOML),
whole_stream_command(ToTSV),
whole_stream_command(ToYAML),
whole_stream_command(SortBy),
whole_stream_command(Tags),
@ -188,6 +189,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
whole_stream_command(FromArray),
whole_stream_command(FromArray),
whole_stream_command(FromCSV),
whole_stream_command(FromTSV),
whole_stream_command(FromINI),
whole_stream_command(FromBSON),
whole_stream_command(FromJSON),

View file

@ -20,6 +20,7 @@ crate mod from_csv;
crate mod from_ini;
crate mod from_json;
crate mod from_toml;
crate mod from_tsv;
crate mod from_xml;
crate mod from_yaml;
crate mod get;
@ -52,6 +53,7 @@ crate mod to_bson;
crate mod to_csv;
crate mod to_json;
crate mod to_toml;
crate mod to_tsv;
crate mod to_yaml;
crate mod trim;
crate mod version;
@ -78,6 +80,7 @@ crate use from_csv::FromCSV;
crate use from_ini::FromINI;
crate use from_json::FromJSON;
crate use from_toml::FromTOML;
crate use from_tsv::FromTSV;
crate use from_xml::FromXML;
crate use from_yaml::FromYAML;
crate use get::Get;
@ -109,6 +112,7 @@ crate use to_bson::ToBSON;
crate use to_csv::ToCSV;
crate use to_json::ToJSON;
crate use to_toml::ToTOML;
crate use to_tsv::ToTSV;
crate use to_yaml::ToYAML;
crate use trim::Trim;
crate use version::Version;

135
src/commands/from_tsv.rs Normal file
View file

@ -0,0 +1,135 @@
use crate::commands::WholeStreamCommand;
use crate::object::{Primitive, TaggedDictBuilder, Value};
use crate::prelude::*;
use csv::ReaderBuilder;
pub struct FromTSV;
#[derive(Deserialize)]
pub struct FromTSVArgs {
headerless: bool,
}
impl WholeStreamCommand for FromTSV {
fn name(&self) -> &str {
"from-tsv"
}
fn signature(&self) -> Signature {
Signature::build("from-tsv").switch("headerless")
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, from_tsv)?.run()
}
}
pub fn from_tsv_string_to_value(
s: String,
headerless: bool,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, csv::Error> {
let mut reader = ReaderBuilder::new()
.has_headers(false)
.delimiter(b'\t')
.from_reader(s.as_bytes());
let tag = tag.into();
let mut fields: VecDeque<String> = VecDeque::new();
let mut iter = reader.records();
let mut rows = vec![];
if let Some(result) = iter.next() {
let line = result?;
for (idx, item) in line.iter().enumerate() {
if headerless {
fields.push_back(format!("Column{}", idx + 1));
} else {
fields.push_back(item.to_string());
}
}
}
loop {
if let Some(row_values) = iter.next() {
let row_values = row_values?;
let mut row = TaggedDictBuilder::new(tag);
for (idx, entry) in row_values.iter().enumerate() {
row.insert_tagged(
fields.get(idx).unwrap(),
Value::Primitive(Primitive::String(String::from(entry))).tagged(tag),
);
}
rows.push(row.into_tagged_value());
} else {
break;
}
}
Ok(Tagged::from_item(Value::List(rows), tag))
}
fn from_tsv(
FromTSVArgs {
headerless: skip_headers,
}: FromTSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_span = name;
let stream = async_stream_block! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
let mut concat_string = String::new();
let mut latest_tag: Option<Tag> = None;
for value in values {
let value_tag = value.tag();
latest_tag = Some(value_tag);
match value.item {
Value::Primitive(Primitive::String(s)) => {
concat_string.push_str(&s);
concat_string.push_str("\n");
}
_ => yield Err(ShellError::labeled_error_with_secondary(
"Expected a string from pipeline",
"requires string input",
name_span,
"value originates from here",
value_tag.span,
)),
}
}
match from_tsv_string_to_value(concat_string, skip_headers, name_span) {
Ok(x) => match x {
Tagged { item: Value::List(list), .. } => {
for l in list {
yield ReturnSuccess::value(l);
}
}
x => yield ReturnSuccess::value(x),
},
Err(_) => if let Some(last_tag) = latest_tag {
yield Err(ShellError::labeled_error_with_secondary(
"Could not parse as TSV",
"input cannot be parsed as TSV",
name_span,
"value originates from here",
last_tag.span,
))
} ,
}
};
Ok(stream.to_output_stream())
}

View file

@ -437,6 +437,16 @@ pub fn parse_string_as_value(
)
})
}
Some(ref x) if x == "tsv" => {
crate::commands::from_tsv::from_tsv_string_to_value(contents, false, contents_tag)
.map_err(move |_| {
ShellError::labeled_error(
"Could not open as TSV",
"could not open as TSV",
name_span,
)
})
}
Some(ref x) if x == "toml" => {
crate::commands::from_toml::from_toml_string_to_value(contents, contents_tag).map_err(
move |_| {

View file

@ -1,4 +1,5 @@
use crate::commands::to_csv::{to_string as to_csv_to_string, value_to_csv_value};
use crate::commands::to_tsv::{to_string as to_tsv_to_string, value_to_tsv_value};
use crate::commands::to_json::value_to_json_value;
use crate::commands::to_toml::value_to_toml_value;
use crate::commands::to_yaml::value_to_yaml_value;
@ -166,6 +167,14 @@ fn to_string_for(
}
to_csv_to_string(&value_to_csv_value(&input[0]))?
}
Some(x) if x == "tsv" => {
if input.len() != 1 {
return Err(ShellError::string(
"saving to tsv requires a single object (or use --raw)",
));
}
to_tsv_to_string(&value_to_tsv_value(&input[0]))?
}
Some(x) if x == "toml" => {
if input.len() != 1 {
return Err(ShellError::string(

108
src/commands/to_tsv.rs Normal file
View file

@ -0,0 +1,108 @@
use crate::commands::WholeStreamCommand;
use crate::object::{Primitive, Value};
use crate::prelude::*;
use csv::WriterBuilder;
pub struct ToTSV;
#[derive(Deserialize)]
pub struct ToTSVArgs {
headerless: bool,
}
impl WholeStreamCommand for ToTSV {
fn name(&self) -> &str {
"to-tsv"
}
fn signature(&self) -> Signature {
Signature::build("to-tsv").switch("headerless")
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, to_tsv)?.run()
}
}
pub fn value_to_tsv_value(v: &Value) -> Value {
match v {
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
Value::Object(o) => Value::Object(o.clone()),
Value::List(l) => Value::List(l.clone()),
Value::Block(_) => Value::Primitive(Primitive::Nothing),
_ => Value::Primitive(Primitive::Nothing),
}
}
fn to_string_helper(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
match v {
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", *b as u64)),
Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?),
Value::List(_) => return Ok(String::from("[list list]")),
Value::Object(_) => return Ok(String::from("[object]")),
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => return Err("Bad input".into()),
}
}
pub fn to_string(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
match v {
Value::Object(o) => {
let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
let mut values: VecDeque<String> = VecDeque::new();
for (k, v) in o.entries.iter() {
fields.push_back(k.clone());
values.push_back(to_string_helper(&v)?);
}
wtr.write_record(fields).expect("can not write.");
wtr.write_record(values).expect("can not write.");
return Ok(String::from_utf8(wtr.into_inner()?)?);
}
_ => return to_string_helper(&v),
}
}
fn to_tsv(
ToTSVArgs { headerless }: ToTSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_span = name;
let out = input;
Ok(out
.values
.map(move |a| match to_string(&value_to_tsv_value(&a.item)) {
Ok(x) => {
let converted = if headerless {
x.lines().skip(1).collect()
} else {
x
};
ReturnSuccess::value(
Value::Primitive(Primitive::String(converted)).simple_spanned(name_span),
)
}
_ => Err(ShellError::labeled_error_with_secondary(
"Expected an object with TSV-compatible structure from pipeline",
"requires TSV-compatible input",
name_span,
format!("{} originates from here", a.item.type_name()),
a.span(),
)),
})
.to_output_stream())
}

View file

@ -250,6 +250,10 @@ mod tests {
loc: fixtures().join("caco3_plastics.csv"),
at: 0
},
Res {
loc: fixtures().join("caco3_plastics.tsv"),
at: 0
},
Res {
loc: fixtures().join("cargo_sample.toml"),
at: 0

View file

@ -68,6 +68,21 @@ fn open_can_parse_toml() {
assert_eq!(actual, "2018");
}
#[test]
fn open_can_parse_tsv() {
let actual = nu!(
cwd: "tests/fixtures/formats", h::pipeline(
r#"
open caco3_plastics.tsv
| first 1
| get origin
| echo $it
"#
));
assert_eq!(actual, "SPAIN")
}
#[test]
fn open_can_parse_json() {
let actual = nu!(

View file

@ -237,6 +237,136 @@ fn converts_structured_table_to_json_text() {
})
}
#[test]
fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table() {
let actual = nu!(
cwd: "tests/fixtures/formats",
"open caco3_plastics.tsv | to-tsv | from-tsv | first 1 | get origin | echo $it"
);
assert_eq!(actual, "SPAIN");
}
#[test]
fn converts_structured_table_to_tsv_text() {
Playground::setup("filter_to_tsv_test_1", |dirs, sandbox| {
sandbox
.with_files(vec![FileWithContentToBeTrimmed(
"tsv_text_sample.txt",
r#"
importer shipper tariff_item name origin
Plasticos Rival Reverte 2509000000 Calcium carbonate Spain
Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia
"#
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open tsv_text_sample.txt
| lines
| split-column "\t" a b c d origin
| last 1
| to-tsv
| lines
| nth 1
| echo "$it"
"#
));
assert!(actual.contains("Colombia"));
})
}
#[test]
fn converts_structured_table_to_tsv_text_skipping_headers_after_conversion() {
Playground::setup("filter_to_tsv_test_2", |dirs, sandbox| {
sandbox
.with_files(vec![FileWithContentToBeTrimmed(
"tsv_text_sample.txt",
r#"
importer shipper tariff_item name origin
Plasticos Rival Reverte 2509000000 Calcium carbonate Spain
Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia
"#
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open tsv_text_sample.txt
| lines
| split-column "\t" a b c d origin
| last 1
| to-tsv --headerless
| echo "$it"
"#
));
assert!(actual.contains("Colombia"));
})
}
#[test]
fn converts_from_tsv_text_to_structured_table() {
Playground::setup("filter_from_tsv_test_1", |dirs, sandbox| {
sandbox
.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
first Name Last Name rusty_luck
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_amigos.txt
| from-tsv
| get rusty_luck
| str --to-int
| sum
| echo $it
"#
));
assert_eq!(actual, "3");
})
}
#[test]
fn converts_from_tsv_text_skipping_headers_to_structured_table() {
Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| {
sandbox
.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
first Name Last Name rusty_luck
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_amigos.txt
| from-tsv --headerless
| get Column3
| str --to-int
| sum
| echo $it
"#
));
assert_eq!(actual, "3");
})
}
#[test]
fn can_convert_json_text_to_bson_and_back_into_table() {
let actual = nu!(
@ -333,10 +463,10 @@ fn can_sum() {
fn can_filter_by_unit_size_comparison() {
let actual = nu!(
cwd: "tests/fixtures/formats",
"ls | where size > 1kb | sort-by size | get name | skip 1 | trim | echo $it"
"ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it"
);
assert_eq!(actual, "caco3_plastics.csv");
assert_eq!(actual, "cargo_sample.toml");
}
#[test]

View file

@ -0,0 +1,10 @@
importer shipper tariff_item name origin shipped_at arrived_at net_weight fob_price cif_price cif_per_net_weight
PLASTICOS RIVAL CIA LTDA S A REVERTE 2509000000 CARBONATO DE CALCIO TIPO CALCIPORE 160 T AL SPAIN 18/03/2016 17/04/2016 81,000.00 14,417.58 18,252.34 0.23
MEXICHEM ECUADOR S.A. OMYA ANDINA S A 2836500000 CARBONATO COLOMBIA 07/07/2016 10/07/2016 26,000.00 7,072.00 8,127.18 0.31
PLASTIAZUAY SA SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 27/07/2016 09/08/2016 81,000.00 8,100.00 11,474.55 0.14
PLASTICOS RIVAL CIA LTDA AND ENDUSTRIYEL HAMMADDELER DIS TCARET LTD.STI. 2836500000 CALCIUM CARBONATE ANADOLU ANDCARB CT-1 TURKEY 04/10/2016 11/11/2016 100,000.00 17,500.00 22,533.75 0.23
QUIMICA COMERCIAL QUIMICIAL CIA. LTDA. SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 24/06/2016 12/07/2016 27,000.00 3,258.90 5,585.00 0.21
PICA PLASTICOS INDUSTRIALES C.A. OMYA ANDINA S.A 3824909999 CARBONATO DE CALCIO COLOMBIA 01/01/1900 18/01/2016 66,500.00 12,635.00 18,670.52 0.28
PLASTIQUIM S.A. OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYA CARB 1T CG BBS 1000 COLOMBIA 01/01/1900 25/10/2016 33,000.00 6,270.00 9,999.00 0.30
QUIMICOS ANDINOS QUIMANDI S.A. SIBELCO COLOMBIA SAS 3824909999 CARBONATO DE CALCIO RECUBIERTO COLOMBIA 01/11/2016 03/11/2016 52,000.00 8,944.00 13,039.05 0.25
TIGRE ECUADOR S.A. ECUATIGRE OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO COLOMBIA 01/01/1900 28/10/2016 66,000.00 11,748.00 18,216.00 0.28
1 importer shipper tariff_item name origin shipped_at arrived_at net_weight fob_price cif_price cif_per_net_weight
2 PLASTICOS RIVAL CIA LTDA S A REVERTE 2509000000 CARBONATO DE CALCIO TIPO CALCIPORE 160 T AL SPAIN 18/03/2016 17/04/2016 81,000.00 14,417.58 18,252.34 0.23
3 MEXICHEM ECUADOR S.A. OMYA ANDINA S A 2836500000 CARBONATO COLOMBIA 07/07/2016 10/07/2016 26,000.00 7,072.00 8,127.18 0.31
4 PLASTIAZUAY SA SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 27/07/2016 09/08/2016 81,000.00 8,100.00 11,474.55 0.14
5 PLASTICOS RIVAL CIA LTDA AND ENDUSTRIYEL HAMMADDELER DIS TCARET LTD.STI. 2836500000 CALCIUM CARBONATE ANADOLU ANDCARB CT-1 TURKEY 04/10/2016 11/11/2016 100,000.00 17,500.00 22,533.75 0.23
6 QUIMICA COMERCIAL QUIMICIAL CIA. LTDA. SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 24/06/2016 12/07/2016 27,000.00 3,258.90 5,585.00 0.21
7 PICA PLASTICOS INDUSTRIALES C.A. OMYA ANDINA S.A 3824909999 CARBONATO DE CALCIO COLOMBIA 01/01/1900 18/01/2016 66,500.00 12,635.00 18,670.52 0.28
8 PLASTIQUIM S.A. OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYA CARB 1T CG BBS 1000 COLOMBIA 01/01/1900 25/10/2016 33,000.00 6,270.00 9,999.00 0.30
9 QUIMICOS ANDINOS QUIMANDI S.A. SIBELCO COLOMBIA SAS 3824909999 CARBONATO DE CALCIO RECUBIERTO COLOMBIA 01/11/2016 03/11/2016 52,000.00 8,944.00 13,039.05 0.25
10 TIGRE ECUADOR S.A. ECUATIGRE OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO COLOMBIA 01/01/1900 28/10/2016 66,000.00 11,748.00 18,216.00 0.28