nushell/crates/nu-command/tests/format_conversions/csv.rs
Hudson Clark cb754befe9
fix: Ensure consistent vals and cols when parsing with --flexible (#10814)
# Description
`from tsv` and `from csv` both support a `--flexible` flag. This flag
can be used to "allow the number of fields in records to be variable".

Previously, a record's invariant that `rec.cols.len() == rec.vals.len()`
could be broken during parsing. This can cause runtime errors as in
#10693. Other commands, like `select` were also affected.

The inconsistencies are somewhat hard to see, as most nushell code
assumes an equal number of columns and values.

# Before

### Fewer values than columns
```nushell
> let record = (echo "one,two\n1" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# But only one value
> $record | values | to nuon
[1]
# And printing the record doesn't show the second column!
> $record | to nuon
{one: 1}
```

### More values than columns
```nushell
> let record = (echo "one,two\n1,2,3" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# But three values
> $record | values | to nuon
[1, 2, 3]
# And printing the record doesn't show the third value!
> $record | to nuon
{one: 1, two: 2}
```
# After

### Fewer values than columns
```nushell
> let record = (echo "one,two\n1" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# And a matching number of values
> $record | values | to nuon
[1, null]
# And printing the record works as expected
> $record | to nuon
{one: 1, two: null}
```

### More values than columns
```nushell
> let record = (echo "one,two\n1,2,3" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# And a matching number of values
> $record | values | to nuon
[1, 2]
# And printing the record works as expected
> $record | to nuon
{one: 1, two: 2}
```

# User-Facing Changes
Using the `--flexible` flag with `from csv` and `from tsv` will not
result in corrupted record state.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
2023-10-24 15:54:26 -05:00

482 lines
12 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline};
#[test]
fn table_to_csv_text_and_from_csv_text_back_into_table() {
let actual = nu!(
cwd: "tests/fixtures/formats",
"open caco3_plastics.csv | to csv | from csv | first | get origin "
);
assert_eq!(actual.out, "SPAIN");
}
#[test]
fn table_to_csv_text() {
Playground::setup("filter_to_csv_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"csv_text_sample.txt",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open csv_text_sample.txt
| lines
| str trim
| split column "," a b c d origin
| last 1
| to csv
| lines
| get 1
"#
));
assert!(actual
.out
.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
})
}
#[test]
fn table_to_csv_text_skipping_headers_after_conversion() {
Playground::setup("filter_to_csv_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"csv_text_sample.txt",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open csv_text_sample.txt
| lines
| str trim
| split column "," a b c d origin
| last 1
| to csv --noheaders
"#
));
assert!(actual
.out
.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
})
}
#[test]
fn infers_types() {
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_cuatro_mosqueteros.csv",
r#"
first_name,last_name,rusty_luck,d
Andrés,Robalino,1,d
JT,Turner,1,d
Yehuda,Katz,1,d
Jason,Gedge,1,d
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_cuatro_mosqueteros.csv
| where rusty_luck > 0
| length
"#
));
assert_eq!(actual.out, "4");
})
}
#[test]
fn from_csv_text_to_table() {
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
JT,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_separator_to_table() {
Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name;last_name;rusty_luck
Andrés;Robalino;1
JT;Turner;1
Yehuda;Katz;1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator ";"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_tab_separator_to_table() {
Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino 1
JT Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator (char tab)
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_comments_to_table() {
Playground::setup("filter_from_csv_test_5", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
# This is a comment
first_name,last_name,rusty_luck
# This one too
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
# This one also
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r##"
open los_tres_caballeros.txt
| from csv --comment "#"
| get rusty_luck
| length
"##
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_custom_quotes_to_table() {
Playground::setup("filter_from_csv_test_6", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
'And''rés',Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --quote "'"
| first
| get first_name
"#
));
assert_eq!(actual.out, "And'rés");
})
}
#[test]
fn from_csv_text_with_custom_escapes_to_table() {
Playground::setup("filter_from_csv_test_7", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
"And\"rés",Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r"
open los_tres_caballeros.txt
| from csv --escape '\'
| first
| get first_name
"
));
assert_eq!(actual.out, "And\"rés");
})
}
#[test]
fn from_csv_text_skipping_headers_to_table() {
Playground::setup("filter_from_csv_test_8", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
Andrés,Robalino,1
JT,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_amigos.txt
| from csv --noheaders
| get column3
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_missing_columns_to_table() {
Playground::setup("filter_from_csv_test_9", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --flexible
| get -i rusty_luck
| compact
| length
"#
));
assert_eq!(actual.out, "2");
})
}
#[test]
fn from_csv_text_with_multiple_char_separator() {
Playground::setup("filter_from_csv_test_10", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "li"
"#
));
assert!(actual
.err
.contains("separator should be a single char or a 4-byte unicode"));
})
}
#[test]
fn from_csv_text_with_wrong_type_separator() {
Playground::setup("filter_from_csv_test_11", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator ('123' | into int)
"#
));
assert!(actual.err.contains("can't convert int to string"));
})
}
#[test]
fn table_with_record_error() {
let actual = nu!(pipeline(
r#"
[[a b]; [1 2] [3 {a: 1 b: 2}]]
| to csv
"#
));
assert!(actual.err.contains("can't convert"))
}
#[test]
fn list_not_table_error() {
let actual = nu!(pipeline(
r#"
[{a: 1 b: 2} {a: 3 b: 4} 1]
| to csv
"#
));
assert!(actual.err.contains("can't convert"))
}
#[test]
fn string_to_csv_error() {
let actual = nu!(pipeline(
r#"
'qwe' | to csv
"#
));
assert!(actual.err.contains("command doesn't support"))
}
#[test]
fn parses_csv_with_unicode_sep() {
Playground::setup("filter_from_csv_unicode_sep_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name;last_name;rusty_luck
Andrés;Robalino;1
JT;Turner;1
Yehuda;Katz;1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "003B"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn parses_csv_with_unicode_x1f_sep() {
Playground::setup("filter_from_csv_unicode_sep_x1f_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_namelast_namerusty_luck
AndrésRobalino1
JTTurner1
YehudaKatz1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "001F"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_test_flexible_extra_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, 2]");
}
#[test]
fn from_csv_test_flexible_missing_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, null]");
}