Additional flags for commands from csv and from tsv (#8398)

# Description

Resolves issue #8370

Adds the following flags to commands `from csv` and `from tsv`:
- `--flexible`: allow the number of fields in records to be variable
- `-c --comment`: a comment character to ignore lines starting with it
- `-q --quote`: a quote character to ignore separators in strings,
defaults to '\"'
- `-e --escape`: an escape character for strings containing the quote
character

Internally, the `Value` struct has an additional helper function
`as_char` which converts it to a single `char`

# User-Facing Changes

The single quoted string `'\t'` can no longer be used as a parameter for
the flag `--separator '\t'` as it is interpreted as a two-character
string. One needs to use from now on the flag with a double quoted
string like so: `-s "\t"` which correctly interprets the string as a
single `char`.
This commit is contained in:
Matthew Deville 2023-03-16 23:49:46 +01:00 committed by GitHub
parent bdaa01165e
commit 8543b0789d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 494 additions and 54 deletions

View file

@ -1,4 +1,4 @@
use super::delimited::{from_delimited_data, trim_from_str};
use super::delimited::{from_delimited_data, trim_from_str, DelimitedReaderConfig};
use nu_engine::CallExt;
use nu_protocol::ast::Call;
@ -24,11 +24,34 @@ impl Command for FromCsv {
"a character to separate columns, defaults to ','",
Some('s'),
)
.named(
"comment",
SyntaxShape::String,
"a comment character to ignore lines starting with it",
Some('c'),
)
.named(
"quote",
SyntaxShape::String,
"a quote character to ignore separators in strings, defaults to '\"'",
Some('q'),
)
.named(
"escape",
SyntaxShape::String,
"an escape character for strings containing the quote character",
Some('e'),
)
.switch(
"noheaders",
"don't treat the first row as column names",
Some('n'),
)
.switch(
"flexible",
"allow the number of fields in records to be variable",
None,
)
.switch("no-infer", "no field type inferencing", None)
.named(
"trim",
@ -75,28 +98,28 @@ impl Command for FromCsv {
example: "open data.txt | from csv --noheaders",
result: None,
},
Example {
description: "Convert comma-separated data to a table, ignoring headers",
example: "open data.txt | from csv -n",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table",
example: "open data.txt | from csv --separator ';'",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names and field values",
description: "Convert comma-separated data to a table, ignoring lines starting with '#'",
example: "open data.txt | from csv --comment '#'",
result: None,
},
Example {
description: "Convert comma-separated data to a table, dropping all possible whitespaces around header names and field values",
example: "open data.txt | from csv --trim all",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names",
description: "Convert comma-separated data to a table, dropping all possible whitespaces around header names",
example: "open data.txt | from csv --trim headers",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around field values",
description: "Convert comma-separated data to a table, dropping all possible whitespaces around field values",
example: "open data.txt | from csv --trim fields",
result: None,
},
@ -112,32 +135,41 @@ fn from_csv(
) -> Result<PipelineData, ShellError> {
let name = call.head;
let separator = call
.get_flag(engine_state, stack, "separator")?
.map(|v: Value| v.as_char())
.transpose()?
.unwrap_or(',');
let comment = call
.get_flag(engine_state, stack, "comment")?
.map(|v: Value| v.as_char())
.transpose()?;
let quote = call
.get_flag(engine_state, stack, "quote")?
.map(|v: Value| v.as_char())
.transpose()?
.unwrap_or('"');
let escape = call
.get_flag(engine_state, stack, "escape")?
.map(|v: Value| v.as_char())
.transpose()?;
let no_infer = call.has_flag("no-infer");
let noheaders = call.has_flag("noheaders");
let separator: Option<Value> = call.get_flag(engine_state, stack, "separator")?;
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let flexible = call.has_flag("flexible");
let trim = trim_from_str(call.get_flag(engine_state, stack, "trim")?)?;
let sep = match separator {
Some(Value::String { val: s, span }) => {
if s == r"\t" {
'\t'
} else {
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::MissingParameter {
param_name: "single character separator".into(),
span,
});
};
vec_s[0]
}
}
_ => ',',
let config = DelimitedReaderConfig {
separator,
comment,
quote,
escape,
noheaders,
flexible,
no_infer,
trim,
};
let trim = trim_from_str(trim)?;
from_delimited_data(noheaders, no_infer, sep, trim, input, name)
from_delimited_data(config, input, name)
}
#[cfg(test)]

View file

@ -2,16 +2,26 @@ use csv::{ReaderBuilder, Trim};
use nu_protocol::{IntoPipelineData, PipelineData, ShellError, Span, Value};
fn from_delimited_string_to_value(
DelimitedReaderConfig {
separator,
comment,
quote,
escape,
noheaders,
flexible,
no_infer,
trim,
}: DelimitedReaderConfig,
s: String,
noheaders: bool,
no_infer: bool,
separator: char,
trim: Trim,
span: Span,
) -> Result<Value, csv::Error> {
let mut reader = ReaderBuilder::new()
.has_headers(!noheaders)
.flexible(flexible)
.delimiter(separator as u8)
.comment(comment.map(|c| c as u8))
.quote(quote as u8)
.escape(escape.map(|c| c as u8))
.trim(trim)
.from_reader(s.as_bytes());
@ -56,24 +66,30 @@ fn from_delimited_string_to_value(
Ok(Value::List { vals: rows, span })
}
pub fn from_delimited_data(
noheaders: bool,
no_infer: bool,
sep: char,
trim: Trim,
pub(super) struct DelimitedReaderConfig {
pub separator: char,
pub comment: Option<char>,
pub quote: char,
pub escape: Option<char>,
pub noheaders: bool,
pub flexible: bool,
pub no_infer: bool,
pub trim: Trim,
}
pub(super) fn from_delimited_data(
config: DelimitedReaderConfig,
input: PipelineData,
name: Span,
) -> Result<PipelineData, ShellError> {
let (concat_string, _span, metadata) = input.collect_string_strict(name)?;
Ok(
from_delimited_string_to_value(concat_string, noheaders, no_infer, sep, trim, name)
.map_err(|x| ShellError::DelimiterError {
msg: x.to_string(),
span: name,
})?
.into_pipeline_data_with_metadata(metadata),
)
Ok(from_delimited_string_to_value(config, concat_string, name)
.map_err(|x| ShellError::DelimiterError {
msg: x.to_string(),
span: name,
})?
.into_pipeline_data_with_metadata(metadata))
}
pub fn trim_from_str(trim: Option<Value>) -> Result<Trim, ShellError> {

View file

@ -1,4 +1,4 @@
use super::delimited::{from_delimited_data, trim_from_str};
use super::delimited::{from_delimited_data, trim_from_str, DelimitedReaderConfig};
use nu_engine::CallExt;
use nu_protocol::ast::Call;
@ -18,11 +18,34 @@ impl Command for FromTsv {
fn signature(&self) -> Signature {
Signature::build("from tsv")
.input_output_types(vec![(Type::String, Type::Table(vec![]))])
.named(
"comment",
SyntaxShape::String,
"a comment character to ignore lines starting with it",
Some('c'),
)
.named(
"quote",
SyntaxShape::String,
"a quote character to ignore separators in strings, defaults to '\"'",
Some('q'),
)
.named(
"escape",
SyntaxShape::String,
"an escape character for strings containing the quote character",
Some('e'),
)
.switch(
"noheaders",
"don't treat the first row as column names",
Some('n'),
)
.switch(
"flexible",
"allow the number of fields in records to be variable",
None,
)
.switch("no-infer", "no field type inferencing", None)
.named(
"trim",
@ -101,12 +124,36 @@ fn from_tsv(
) -> Result<PipelineData, ShellError> {
let name = call.head;
let comment = call
.get_flag(engine_state, stack, "comment")?
.map(|v: Value| v.as_char())
.transpose()?;
let quote = call
.get_flag(engine_state, stack, "quote")?
.map(|v: Value| v.as_char())
.transpose()?
.unwrap_or('"');
let escape = call
.get_flag(engine_state, stack, "escape")?
.map(|v: Value| v.as_char())
.transpose()?;
let no_infer = call.has_flag("no-infer");
let noheaders = call.has_flag("noheaders");
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let trim = trim_from_str(trim)?;
let flexible = call.has_flag("flexible");
let trim = trim_from_str(call.get_flag(engine_state, stack, "trim")?)?;
from_delimited_data(noheaders, no_infer, '\t', trim, input, name)
let config = DelimitedReaderConfig {
separator: '\t',
comment,
quote,
escape,
noheaders,
flexible,
no_infer,
trim,
};
from_delimited_data(config, input, name)
}
#[cfg(test)]

View file

@ -183,8 +183,92 @@ fn from_csv_text_with_tab_separator_to_table() {
}
#[test]
fn from_csv_text_skipping_headers_to_table() {
fn from_csv_text_with_comments_to_table() {
Playground::setup("filter_from_csv_test_5", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
# This is a comment
first_name,last_name,rusty_luck
# This one too
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
# This one also
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r##"
open los_tres_caballeros.txt
| from csv --comment "#"
| get rusty_luck
| length
"##
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_custom_quotes_to_table() {
Playground::setup("filter_from_csv_test_6", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
'And''rés',Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --quote "'"
| first
| get first_name
"#
));
assert_eq!(actual.out, "And'rés");
})
}
#[test]
fn from_csv_text_with_custom_escapes_to_table() {
Playground::setup("filter_from_csv_test_7", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
"And\"rés",Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --escape '\'
| first
| get first_name
"#
));
assert_eq!(actual.out, "And\"rés");
})
}
#[test]
fn from_csv_text_skipping_headers_to_table() {
Playground::setup("filter_from_csv_test_8", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
@ -208,6 +292,84 @@ fn from_csv_text_skipping_headers_to_table() {
})
}
#[test]
fn from_csv_text_with_missing_columns_to_table() {
Playground::setup("filter_from_csv_test_9", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --flexible
| get -i rusty_luck
| compact
| length
"#
));
assert_eq!(actual.out, "2");
})
}
#[test]
fn from_csv_text_with_multiple_char_separator() {
Playground::setup("filter_from_csv_test_10", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "li"
"#
));
assert!(actual.err.contains("single character separator"));
})
}
#[test]
fn from_csv_text_with_wrong_type_separator() {
Playground::setup("filter_from_csv_test_11", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator ('123' | into int)
"#
));
assert!(actual.err.contains("can't convert int to char"));
})
}
#[test]
fn table_with_record_error() {
let actual = nu!(

View file

@ -16,7 +16,7 @@ fn table_to_tsv_text_and_from_tsv_text_back_into_table() {
fn table_to_tsv_text_and_from_tsv_text_back_into_table_using_csv_separator() {
let actual = nu!(
cwd: "tests/fixtures/formats",
r"open caco3_plastics.tsv | to tsv | from csv --separator '\t' | first | get origin"
r#"open caco3_plastics.tsv | to tsv | from csv --separator "\t" | first | get origin"#
);
assert_eq!(actual.out, "SPAIN");
@ -106,8 +106,92 @@ fn from_tsv_text_to_table() {
}
#[test]
fn from_tsv_text_skipping_headers_to_table() {
fn from_tsv_text_with_comments_to_table() {
Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
# This is a comment
first_name last_name rusty_luck
# This one too
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
# This one also
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r##"
open los_tres_caballeros.txt
| from tsv --comment "#"
| get rusty_luck
| length
"##
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_tsv_text_with_custom_quotes_to_table() {
Playground::setup("filter_from_tsv_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
'And''rés' Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from tsv --quote "'"
| first
| get first_name
"#
));
assert_eq!(actual.out, "And'rés");
})
}
#[test]
fn from_tsv_text_with_custom_escapes_to_table() {
Playground::setup("filter_from_tsv_test_4", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
"And\"rés" Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from tsv --escape '\'
| first
| get first_name
"#
));
assert_eq!(actual.out, "And\"rés");
})
}
#[test]
fn from_tsv_text_skipping_headers_to_table() {
Playground::setup("filter_from_tsv_test_5", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
@ -130,3 +214,81 @@ fn from_tsv_text_skipping_headers_to_table() {
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_tsv_text_with_missing_columns_to_table() {
Playground::setup("filter_from_tsv_test_6", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from tsv --flexible
| get -i rusty_luck
| compact
| length
"#
));
assert_eq!(actual.out, "2");
})
}
#[test]
fn from_tsv_text_with_multiple_char_comment() {
Playground::setup("filter_from_tsv_test_7", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --comment "li"
"#
));
assert!(actual.err.contains("single character separator"));
})
}
#[test]
fn from_tsv_text_with_wrong_type_comment() {
Playground::setup("filter_from_csv_test_8", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --comment ('123' | into int)
"#
));
assert!(actual.err.contains("can't convert int to char"));
})
}

View file

@ -190,6 +190,27 @@ impl Clone for Value {
}
impl Value {
pub fn as_char(&self) -> Result<char, ShellError> {
match self {
Value::String { val, span } => {
let mut chars = val.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => Ok(c),
_ => Err(ShellError::MissingParameter {
param_name: "single character separator".into(),
span: *span,
}),
}
}
x => Err(ShellError::CantConvert {
to_type: "char".into(),
from_type: x.get_type().to_string(),
span: self.span()?,
help: None,
}),
}
}
/// Converts into string values that can be changed into string natively
pub fn as_string(&self) -> Result<String, ShellError> {
match self {