From a3ff5f12460c44286d2e5e00c9712379e643a306 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Sun, 3 Nov 2019 23:12:14 +0100 Subject: [PATCH 1/5] Updates tests for from tsv, csv, and ssv. With the proposed changes, these tests now become invalid. If the first line is to be counted as data, then converting the headers to ints will fail. Removing the headers and instead treating the first line as data, however, reflects the new, desired mode of operation. --- tests/filters_test.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/filters_test.rs b/tests/filters_test.rs index e410e99e65..9ccb4ab718 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -135,7 +135,6 @@ fn converts_from_csv_text_skipping_headers_to_structured_table() { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_amigos.txt", r#" - first_name,last_name,rusty_luck Andrés,Robalino,1 Jonathan,Turner,1 Yehuda,Katz,1 @@ -361,7 +360,6 @@ fn converts_from_tsv_text_skipping_headers_to_structured_table() { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_amigos.txt", r#" - first Name Last Name rusty_luck Andrés Robalino 1 Jonathan Turner 1 Yehuda Katz 1 @@ -441,12 +439,11 @@ fn converts_from_ssv_text_to_structured_table_with_separator_specified() { } #[test] -fn converts_from_ssv_text_skipping_headers_to_structured_table() { +fn converts_from_ssv_text_treating_first_line_as_data_with_flag() { Playground::setup("filter_from_ssv_test_2", |dirs, sandbox| { sandbox.with_files(vec![FileWithContentToBeTrimmed( "oc_get_svc.txt", r#" - NAME LABELS SELECTOR IP PORT(S) docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP kubernetes component=apiserver,provider=kubernetes 172.30.0.2 443/TCP kubernetes-ro component=apiserver,provider=kubernetes 172.30.0.1 80/TCP @@ -458,13 +455,13 @@ fn converts_from_ssv_text_skipping_headers_to_structured_table() { r#" open oc_get_svc.txt | from-ssv --headerless - | nth 2 - | get Column2 + | first + | get Column1 | echo $it "# )); - assert_eq!(actual, "component=apiserver,provider=kubernetes"); + assert_eq!(actual, "docker-registry"); }) } From 282cb46ff12d333106cc1f3cc8c9493b6a370387 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Mon, 11 Nov 2019 12:01:21 +0100 Subject: [PATCH 2/5] Implements --headerless for from-csv --- src/commands/from_csv.rs | 55 +++++++++++++++------------------------- 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs index 9483fed521..cd29b625a6 100644 --- a/src/commands/from_csv.rs +++ b/src/commands/from_csv.rs @@ -27,7 +27,7 @@ impl WholeStreamCommand for FromCSV { } fn usage(&self) -> &str { - "Parse text as .csv and create table" + "Parse text as .csv and create table." } fn run( @@ -46,44 +46,29 @@ pub fn from_csv_string_to_value( tag: impl Into, ) -> Result, csv::Error> { let mut reader = ReaderBuilder::new() - .has_headers(false) + .has_headers(!headerless) .delimiter(separator as u8) .from_reader(s.as_bytes()); let tag = tag.into(); - let mut fields: VecDeque = VecDeque::new(); - let mut iter = reader.records(); + let headers = if headerless { + (1..=reader.headers()?.len()) + .map(|i| format!("Column{}", i)) + .collect::>() + } else { + reader.headers()?.iter().map(String::from).collect() + }; + let mut rows = vec![]; - - if let Some(result) = iter.next() { - let line = result?; - - for (idx, item) in line.iter().enumerate() { - if headerless { - fields.push_back(format!("Column{}", idx + 1)); - } else { - fields.push_back(item.to_string()); - } - } - } - - loop { - if let Some(row_values) = iter.next() { - let row_values = row_values?; - - let mut row = TaggedDictBuilder::new(tag.clone()); - - for (idx, entry) in row_values.iter().enumerate() { - row.insert_tagged( - fields.get(idx).unwrap(), - Value::Primitive(Primitive::String(String::from(entry))).tagged(&tag), - ); - } - - rows.push(row.into_tagged_value()); - } else { - break; + for row in reader.records() { + let mut tagged_row = TaggedDictBuilder::new(&tag); + for (value, header) in row?.iter().zip(headers.iter()) { + tagged_row.insert_tagged( + header, + Value::Primitive(Primitive::String(String::from(value))).tagged(&tag), + ) } + rows.push(tagged_row.into_tagged_value()); } Ok(Value::Table(rows).tagged(&tag)) @@ -91,7 +76,7 @@ pub fn from_csv_string_to_value( fn from_csv( FromCSVArgs { - headerless: skip_headers, + headerless, separator, }: FromCSVArgs, RunnableContext { input, name, .. }: RunnableContext, @@ -141,7 +126,7 @@ fn from_csv( } } - match from_csv_string_to_value(concat_string, skip_headers, sep, name_tag.clone()) { + match from_csv_string_to_value(concat_string, headerless, sep, name_tag.clone()) { Ok(x) => match x { Tagged { item: Value::Table(list), .. } => { for l in list { From f8dc06ef49e9a2325981611f3d170ee32fa864c9 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Mon, 11 Nov 2019 12:25:41 +0100 Subject: [PATCH 3/5] Changes implementation of --headerless for from-tsv. --- src/commands/from_tsv.rs | 55 ++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/src/commands/from_tsv.rs b/src/commands/from_tsv.rs index 2284e95573..24841b91c1 100644 --- a/src/commands/from_tsv.rs +++ b/src/commands/from_tsv.rs @@ -39,53 +39,36 @@ pub fn from_tsv_string_to_value( tag: impl Into, ) -> Result, csv::Error> { let mut reader = ReaderBuilder::new() - .has_headers(false) + .has_headers(!headerless) .delimiter(b'\t') .from_reader(s.as_bytes()); let tag = tag.into(); - let mut fields: VecDeque = VecDeque::new(); - let mut iter = reader.records(); + let headers = if headerless { + (1..=reader.headers()?.len()) + .map(|i| format!("Column{}", i)) + .collect::>() + } else { + reader.headers()?.iter().map(String::from).collect() + }; + let mut rows = vec![]; - - if let Some(result) = iter.next() { - let line = result?; - - for (idx, item) in line.iter().enumerate() { - if headerless { - fields.push_back(format!("Column{}", idx + 1)); - } else { - fields.push_back(item.to_string()); - } - } - } - - loop { - if let Some(row_values) = iter.next() { - let row_values = row_values?; - - let mut row = TaggedDictBuilder::new(&tag); - - for (idx, entry) in row_values.iter().enumerate() { - row.insert_tagged( - fields.get(idx).unwrap(), - Value::Primitive(Primitive::String(String::from(entry))).tagged(&tag), - ); - } - - rows.push(row.into_tagged_value()); - } else { - break; + for row in reader.records() { + let mut tagged_row = TaggedDictBuilder::new(&tag); + for (value, header) in row?.iter().zip(headers.iter()) { + tagged_row.insert_tagged( + header, + Value::Primitive(Primitive::String(String::from(value))).tagged(&tag), + ) } + rows.push(tagged_row.into_tagged_value()); } Ok(Value::Table(rows).tagged(&tag)) } fn from_tsv( - FromTSVArgs { - headerless: skip_headers, - }: FromTSVArgs, + FromTSVArgs { headerless }: FromTSVArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let name_tag = name; @@ -115,7 +98,7 @@ fn from_tsv( } } - match from_tsv_string_to_value(concat_string, skip_headers, name_tag.clone()) { + match from_tsv_string_to_value(concat_string, headerless, name_tag.clone()) { Ok(x) => match x { Tagged { item: Value::Table(list), .. } => { for l in list { From 040108717561f531c65d131fe8f23215f37cab06 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Mon, 11 Nov 2019 12:54:58 +0100 Subject: [PATCH 4/5] Refactors out structured parsing logic to a separate module. --- src/commands.rs | 2 + src/commands/from_csv.rs | 90 ++------------------------ src/commands/from_structured_data.rs | 97 ++++++++++++++++++++++++++++ src/commands/from_tsv.rs | 89 +------------------------ 4 files changed, 106 insertions(+), 172 deletions(-) create mode 100644 src/commands/from_structured_data.rs diff --git a/src/commands.rs b/src/commands.rs index ee70534640..c238b451d8 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -1,6 +1,8 @@ #[macro_use] pub(crate) mod macros; +mod from_structured_data; + pub(crate) mod append; pub(crate) mod args; pub(crate) mod autoview; diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs index cd29b625a6..4bada42dfb 100644 --- a/src/commands/from_csv.rs +++ b/src/commands/from_csv.rs @@ -1,7 +1,7 @@ +use crate::commands::from_structured_data::from_structured_data; use crate::commands::WholeStreamCommand; -use crate::data::{Primitive, TaggedDictBuilder, Value}; +use crate::data::{Primitive, Value}; use crate::prelude::*; -use csv::ReaderBuilder; pub struct FromCSV; @@ -39,49 +39,13 @@ impl WholeStreamCommand for FromCSV { } } -pub fn from_csv_string_to_value( - s: String, - headerless: bool, - separator: char, - tag: impl Into, -) -> Result, csv::Error> { - let mut reader = ReaderBuilder::new() - .has_headers(!headerless) - .delimiter(separator as u8) - .from_reader(s.as_bytes()); - let tag = tag.into(); - - let headers = if headerless { - (1..=reader.headers()?.len()) - .map(|i| format!("Column{}", i)) - .collect::>() - } else { - reader.headers()?.iter().map(String::from).collect() - }; - - let mut rows = vec![]; - for row in reader.records() { - let mut tagged_row = TaggedDictBuilder::new(&tag); - for (value, header) in row?.iter().zip(headers.iter()) { - tagged_row.insert_tagged( - header, - Value::Primitive(Primitive::String(String::from(value))).tagged(&tag), - ) - } - rows.push(tagged_row.into_tagged_value()); - } - - Ok(Value::Table(rows).tagged(&tag)) -} - fn from_csv( FromCSVArgs { headerless, separator, }: FromCSVArgs, - RunnableContext { input, name, .. }: RunnableContext, + runnable_context: RunnableContext, ) -> Result { - let name_tag = name; let sep = match separator { Some(Tagged { item: Value::Primitive(Primitive::String(s)), @@ -101,51 +65,5 @@ fn from_csv( _ => ',', }; - let stream = async_stream! { - let values: Vec> = input.values.collect().await; - - let mut concat_string = String::new(); - let mut latest_tag: Option = None; - - for value in values { - let value_tag = value.tag(); - latest_tag = Some(value_tag.clone()); - match value.item { - Value::Primitive(Primitive::String(s)) => { - concat_string.push_str(&s); - concat_string.push_str("\n"); - } - _ => yield Err(ShellError::labeled_error_with_secondary( - "Expected a string from pipeline", - "requires string input", - name_tag.clone(), - "value originates from here", - value_tag.clone(), - )), - - } - } - - match from_csv_string_to_value(concat_string, headerless, sep, name_tag.clone()) { - Ok(x) => match x { - Tagged { item: Value::Table(list), .. } => { - for l in list { - yield ReturnSuccess::value(l); - } - } - x => yield ReturnSuccess::value(x), - }, - Err(_) => if let Some(last_tag) = latest_tag { - yield Err(ShellError::labeled_error_with_secondary( - "Could not parse as CSV", - "input cannot be parsed as CSV", - name_tag.clone(), - "value originates from here", - last_tag.clone(), - )) - } , - } - }; - - Ok(stream.to_output_stream()) + from_structured_data(headerless, sep, "CSV", runnable_context) } diff --git a/src/commands/from_structured_data.rs b/src/commands/from_structured_data.rs new file mode 100644 index 0000000000..4799a40993 --- /dev/null +++ b/src/commands/from_structured_data.rs @@ -0,0 +1,97 @@ +use crate::data::{Primitive, TaggedDictBuilder, Value}; +use crate::prelude::*; +use csv::ReaderBuilder; + +fn from_stuctured_string_to_value( + s: String, + headerless: bool, + separator: char, + tag: impl Into, +) -> Result, csv::Error> { + let mut reader = ReaderBuilder::new() + .has_headers(!headerless) + .delimiter(separator as u8) + .from_reader(s.as_bytes()); + let tag = tag.into(); + + let headers = if headerless { + (1..=reader.headers()?.len()) + .map(|i| format!("Column{}", i)) + .collect::>() + } else { + reader.headers()?.iter().map(String::from).collect() + }; + + let mut rows = vec![]; + for row in reader.records() { + let mut tagged_row = TaggedDictBuilder::new(&tag); + for (value, header) in row?.iter().zip(headers.iter()) { + tagged_row.insert_tagged( + header, + Value::Primitive(Primitive::String(String::from(value))).tagged(&tag), + ) + } + rows.push(tagged_row.into_tagged_value()); + } + + Ok(Value::Table(rows).tagged(&tag)) +} + +pub fn from_structured_data( + headerless: bool, + sep: char, + format_name: &'static str, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let name_tag = name; + + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + let mut concat_string = String::new(); + let mut latest_tag: Option = None; + + for value in values { + let value_tag = value.tag(); + latest_tag = Some(value_tag.clone()); + match value.item { + Value::Primitive(Primitive::String(s)) => { + concat_string.push_str(&s); + concat_string.push_str("\n"); + } + _ => yield Err(ShellError::labeled_error_with_secondary( + "Expected a string from pipeline", + "requires string input", + name_tag.clone(), + "value originates from here", + value_tag.clone(), + )), + + } + } + + match from_stuctured_string_to_value(concat_string, headerless, sep, name_tag.clone()) { + Ok(x) => match x { + Tagged { item: Value::Table(list), .. } => { + for l in list { + yield ReturnSuccess::value(l); + } + } + x => yield ReturnSuccess::value(x), + }, + Err(_) => if let Some(last_tag) = latest_tag { + let line_one = format!("Could not parse as {}", format_name); + let line_two = format!("input cannot be parsed as {}", format_name); + yield Err(ShellError::labeled_error_with_secondary( + line_one, + line_two, + name_tag.clone(), + "value originates from here", + last_tag.clone(), + )) + } , + } + }; + + Ok(stream.to_output_stream()) +} diff --git a/src/commands/from_tsv.rs b/src/commands/from_tsv.rs index 24841b91c1..7931b8ef38 100644 --- a/src/commands/from_tsv.rs +++ b/src/commands/from_tsv.rs @@ -1,7 +1,6 @@ +use crate::commands::from_structured_data::from_structured_data; use crate::commands::WholeStreamCommand; -use crate::data::{Primitive, TaggedDictBuilder, Value}; use crate::prelude::*; -use csv::ReaderBuilder; pub struct FromTSV; @@ -33,91 +32,9 @@ impl WholeStreamCommand for FromTSV { } } -pub fn from_tsv_string_to_value( - s: String, - headerless: bool, - tag: impl Into, -) -> Result, csv::Error> { - let mut reader = ReaderBuilder::new() - .has_headers(!headerless) - .delimiter(b'\t') - .from_reader(s.as_bytes()); - let tag = tag.into(); - - let headers = if headerless { - (1..=reader.headers()?.len()) - .map(|i| format!("Column{}", i)) - .collect::>() - } else { - reader.headers()?.iter().map(String::from).collect() - }; - - let mut rows = vec![]; - for row in reader.records() { - let mut tagged_row = TaggedDictBuilder::new(&tag); - for (value, header) in row?.iter().zip(headers.iter()) { - tagged_row.insert_tagged( - header, - Value::Primitive(Primitive::String(String::from(value))).tagged(&tag), - ) - } - rows.push(tagged_row.into_tagged_value()); - } - - Ok(Value::Table(rows).tagged(&tag)) -} - fn from_tsv( FromTSVArgs { headerless }: FromTSVArgs, - RunnableContext { input, name, .. }: RunnableContext, + runnable_context: RunnableContext, ) -> Result { - let name_tag = name; - - let stream = async_stream! { - let values: Vec> = input.values.collect().await; - - let mut concat_string = String::new(); - let mut latest_tag: Option = None; - - for value in values { - let value_tag = value.tag(); - latest_tag = Some(value_tag.clone()); - match value.item { - Value::Primitive(Primitive::String(s)) => { - concat_string.push_str(&s); - concat_string.push_str("\n"); - } - _ => yield Err(ShellError::labeled_error_with_secondary( - "Expected a string from pipeline", - "requires string input", - &name_tag, - "value originates from here", - &value_tag, - )), - - } - } - - match from_tsv_string_to_value(concat_string, headerless, name_tag.clone()) { - Ok(x) => match x { - Tagged { item: Value::Table(list), .. } => { - for l in list { - yield ReturnSuccess::value(l); - } - } - x => yield ReturnSuccess::value(x), - }, - Err(_) => if let Some(last_tag) = latest_tag { - yield Err(ShellError::labeled_error_with_secondary( - "Could not parse as TSV", - "input cannot be parsed as TSV", - &name_tag, - "value originates from here", - &last_tag, - )) - } , - } - }; - - Ok(stream.to_output_stream()) + from_structured_data(headerless, '\t', "TSV", runnable_context) } From 1060ba220670261c3104b2873eff0ffe6b87b61f Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Mon, 11 Nov 2019 15:07:02 +0100 Subject: [PATCH 5/5] Fixes --headerless functionality for from-ssv. Squashed commit of the following: commit fc59d47a2291461d84e0587fc0fe63af0dc26f9f Author: Thomas Hartmann Date: Tue Nov 12 15:39:38 2019 +0100 Fixes inconsistencies in output. commit da4084e9fdd983557b101207b381e333a443e551 Author: Thomas Hartmann Date: Tue Nov 12 13:04:10 2019 +0100 remove unused enum. commit 7f6a105879c8746786b99fb19bb9f0860c41796a Author: Thomas Hartmann Date: Tue Nov 12 12:58:41 2019 +0100 Starts refactoring from_ssv. commit b70ddd169ef0c900e03fb590cb171cc7181528db Author: Thomas Hartmann Date: Tue Nov 12 11:34:06 2019 +0100 Fixes --headerless for non-aligned columns. commit 6332778dd26de8d07be77b291124115141479892 Author: Thomas Hartmann Date: Tue Nov 12 10:27:35 2019 +0100 Fixes from-ssv headerless aligned-columns logic. commit 747d8c812e06349b4a15b8c130721881d86fff98 Author: Thomas Hartmann Date: Mon Nov 11 23:53:59 2019 +0100 fixes unit tests for ssv. commit c77cb451623b37a7a9742c791a4fc38cad053d3d Author: Thomas Hartmann Date: Mon Nov 11 22:49:21 2019 +0100 it compiles! one broken test. commit 08a05964f56cf92507c255057d0aaf2b6dbb6f45 Author: Thomas Hartmann Date: Mon Nov 11 18:52:54 2019 +0100 Backed into a corner. Help. commit c95ab683025a8007b8a6f8e1659f021a002df584 Author: Thomas Hartmann Date: Mon Nov 11 17:30:54 2019 +0100 broken but on the way --- src/commands/from_ssv.rs | 280 +++++++++++++++++++++++++++++---------- tests/filters_test.rs | 16 ++- 2 files changed, 226 insertions(+), 70 deletions(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 090bab508f..37bba215f1 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -45,6 +45,149 @@ impl WholeStreamCommand for FromSSV { } } +enum HeaderOptions<'a> { + WithHeaders(&'a str), + WithoutHeaders, +} + +fn parse_aligned_columns<'a>( + lines: impl Iterator, + headers: HeaderOptions, + separator: &str, +) -> Vec> { + fn construct<'a>( + lines: impl Iterator, + headers: Vec<(String, usize)>, + ) -> Vec> { + lines + .map(|l| { + headers + .iter() + .enumerate() + .map(|(i, (header_name, start_position))| { + let val = match headers.get(i + 1) { + Some((_, end)) => { + if *end < l.len() { + l.get(*start_position..*end) + } else { + l.get(*start_position..) + } + } + None => l.get(*start_position..), + } + .unwrap_or("") + .trim() + .into(); + (header_name.clone(), val) + }) + .collect() + }) + .collect() + } + + let find_indices = |line: &str| { + let values = line + .split(&separator) + .map(str::trim) + .filter(|s| !s.is_empty()); + values + .fold( + (0, vec![]), + |(current_pos, mut indices), value| match line[current_pos..].find(value) { + None => (current_pos, indices), + Some(index) => { + let absolute_index = current_pos + index; + indices.push(absolute_index); + (absolute_index + value.len(), indices) + } + }, + ) + .1 + }; + + let parse_with_headers = |lines, headers_raw: &str| { + let indices = find_indices(headers_raw); + let headers = headers_raw + .split(&separator) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(String::from) + .zip(indices); + + let columns = headers.collect::>(); + + construct(lines, columns) + }; + + let parse_without_headers = |ls: Vec<&str>| { + let mut indices = ls + .iter() + .flat_map(|s| find_indices(*s)) + .collect::>(); + + indices.sort(); + indices.dedup(); + + let headers: Vec<(String, usize)> = indices + .iter() + .enumerate() + .map(|(i, position)| (format!("Column{}", i + 1), *position)) + .collect(); + + construct(ls.iter().map(|s| s.to_owned()), headers) + }; + + match headers { + HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), + HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), + } +} + +fn parse_separated_columns<'a>( + lines: impl Iterator, + headers: HeaderOptions, + separator: &str, +) -> Vec> { + fn collect<'a>( + headers: Vec, + rows: impl Iterator, + separator: &str, + ) -> Vec> { + rows.map(|r| { + headers + .iter() + .zip(r.split(separator).map(str::trim).filter(|s| !s.is_empty())) + .map(|(a, b)| (a.to_owned(), b.to_owned())) + .collect() + }) + .collect() + } + + let parse_with_headers = |lines, headers_raw: &str| { + let headers = headers_raw + .split(&separator) + .map(str::trim) + .map(|s| s.to_owned()) + .filter(|s| !s.is_empty()) + .collect(); + collect(headers, lines, separator) + }; + + let parse_without_headers = |ls: Vec<&str>| { + let num_columns = ls.iter().map(|r| r.len()).max().unwrap_or(0); + + let headers = (1..=num_columns) + .map(|i| format!("Column{}", i)) + .collect::>(); + collect(headers, ls.iter().map(|s| s.as_ref()), separator) + }; + + match headers { + HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), + HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), + } +} + fn string_to_table( s: &str, headerless: bool, @@ -54,76 +197,23 @@ fn string_to_table( let mut lines = s.lines().filter(|l| !l.trim().is_empty()); let separator = " ".repeat(std::cmp::max(split_at, 1)); - if aligned_columns { - let headers_raw = lines.next()?; - - let headers = headers_raw - .trim() - .split(&separator) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| (headers_raw.find(s).unwrap(), s.to_owned())); - - let columns = if headerless { - headers - .enumerate() - .map(|(header_no, (string_index, _))| { - (string_index, format!("Column{}", header_no + 1)) - }) - .collect::>() - } else { - headers.collect::>() - }; - - Some( - lines - .map(|l| { - columns - .iter() - .enumerate() - .filter_map(|(i, (start, col))| { - (match columns.get(i + 1) { - Some((end, _)) => l.get(*start..*end), - None => l.get(*start..), - }) - .and_then(|s| Some((col.clone(), String::from(s.trim())))) - }) - .collect() - }) - .collect(), - ) + let (ls, header_options) = if headerless { + (lines, HeaderOptions::WithoutHeaders) } else { - let headers = lines - .next()? - .split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .map(|s| s.to_owned()) - .collect::>(); + let headers = lines.next()?; + (lines, HeaderOptions::WithHeaders(headers)) + }; - let header_row = if headerless { - (1..=headers.len()) - .map(|i| format!("Column{}", i)) - .collect::>() - } else { - headers - }; + let f = if aligned_columns { + parse_aligned_columns + } else { + parse_separated_columns + }; - Some( - lines - .map(|l| { - header_row - .iter() - .zip( - l.split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()), - ) - .map(|(a, b)| (String::from(a), String::from(b))) - .collect() - }) - .collect(), - ) + let parsed = f(ls, header_options, &separator); + match parsed.len() { + 0 => None, + _ => Some(parsed), } } @@ -250,7 +340,7 @@ mod tests { } #[test] - fn it_ignores_headers_when_headerless() { + fn it_uses_first_row_as_data_when_headerless() { let input = r#" a b 1 2 @@ -260,6 +350,7 @@ mod tests { assert_eq!( result, Some(vec![ + vec![owned("Column1", "a"), owned("Column2", "b")], vec![owned("Column1", "1"), owned("Column2", "2")], vec![owned("Column1", "3"), owned("Column2", "4")] ]) @@ -357,4 +448,57 @@ mod tests { ],] ) } + + #[test] + fn it_handles_empty_values_when_headerless_and_aligned_columns() { + let input = r#" + a multi-word value b d + 1 3-3 4 + last + "#; + + let result = string_to_table(input, true, true, 2).unwrap(); + assert_eq!( + result, + vec![ + vec![ + owned("Column1", "a multi-word value"), + owned("Column2", "b"), + owned("Column3", ""), + owned("Column4", "d"), + owned("Column5", "") + ], + vec![ + owned("Column1", "1"), + owned("Column2", ""), + owned("Column3", "3-3"), + owned("Column4", "4"), + owned("Column5", "") + ], + vec![ + owned("Column1", ""), + owned("Column2", ""), + owned("Column3", ""), + owned("Column4", ""), + owned("Column5", "last") + ], + ] + ) + } + + #[test] + fn input_is_parsed_correctly_if_either_option_works() { + let input = r#" + docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP + kubernetes component=apiserver,provider=kubernetes 172.30.0.2 443/TCP + kubernetes-ro component=apiserver,provider=kubernetes 172.30.0.1 80/TCP + "#; + + let aligned_columns_headerless = string_to_table(input, true, true, 2).unwrap(); + let separator_headerless = string_to_table(input, true, false, 2).unwrap(); + let aligned_columns_with_headers = string_to_table(input, false, true, 2).unwrap(); + let separator_with_headers = string_to_table(input, false, false, 2).unwrap(); + assert_eq!(aligned_columns_headerless, separator_headerless); + assert_eq!(aligned_columns_with_headers, separator_with_headers); + } } diff --git a/tests/filters_test.rs b/tests/filters_test.rs index 9ccb4ab718..e18f20be67 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -450,7 +450,18 @@ fn converts_from_ssv_text_treating_first_line_as_data_with_flag() { "#, )]); - let actual = nu!( + let aligned_columns = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open oc_get_svc.txt + | from-ssv --headerless --aligned-columns + | first + | get Column1 + | echo $it + "# + )); + + let separator_based = nu!( cwd: dirs.test(), h::pipeline( r#" open oc_get_svc.txt @@ -461,7 +472,8 @@ fn converts_from_ssv_text_treating_first_line_as_data_with_flag() { "# )); - assert_eq!(actual, "docker-registry"); + assert_eq!(aligned_columns, separator_based); + assert_eq!(separator_based, "docker-registry"); }) }