mirror of
https://github.com/nushell/nushell
synced 2024-12-25 12:33:17 +00:00
truly flexible csv/tsv parsing (#14399)
- fixes #14398 I will properly fill out this PR and fix any tests that might break when I have the time, this was a quick fix. # Description This PR makes `from csv` and `from tsv`, with the `--flexible` flag, stop dropping extra/unexpected columns. # User-Facing Changes `$text`'s contents ```csv value 1,aaa 2,bbb 3 4,ddd 5,eee,extra ``` Old behavior ```nushell > $text | from csv --flexible --noheaders ╭─#─┬─column0─╮ │ 0 │ value │ │ 1 │ 1 │ │ 2 │ 2 │ │ 3 │ 3 │ │ 4 │ 4 │ │ 5 │ 5 │ ╰─#─┴─column0─╯ ``` New behavior ```nushell > $text | from csv --flexible --noheaders ╭─#─┬─column0─┬─column1─┬─column2─╮ │ 0 │ value │ ❎ │ ❎ │ │ 1 │ 1 │ aaa │ ❎ │ │ 2 │ 2 │ bbb │ ❎ │ │ 3 │ 3 │ ❎ │ ❎ │ │ 4 │ 4 │ ddd │ ❎ │ │ 5 │ 5 │ eee │ extra │ ╰─#─┴─column0─┴─column1─┴─column2─╯ ``` - The first line in a csv (or tsv) document no longer limits the number of columns - Missing values in columns are longer automatically filled with `null` with this change, as a later row can introduce new columns. **BREAKING CHANGE** Because missing columns are different from empty columns, operations on possibly missing columns will have to use optional access syntax e.g. `get foo` => `get foo?` # Tests + Formatting Added examples that run as tests and adjusted existing tests to confirm the new behavior. # After Submitting Update the workaround with fish completer mentioned [here](https://www.nushell.sh/cookbook/external_completers.html#fish-completer)
This commit is contained in:
parent
2a90cb7355
commit
5f7082f053
4 changed files with 62 additions and 30 deletions
|
@ -11,7 +11,10 @@ impl Command for FromCsv {
|
|||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("from csv")
|
||||
.input_output_types(vec![(Type::String, Type::table())])
|
||||
.input_output_types(vec![
|
||||
(Type::String, Type::table()),
|
||||
(Type::String, Type::list(Type::Any)),
|
||||
])
|
||||
.named(
|
||||
"separator",
|
||||
SyntaxShape::String,
|
||||
|
@ -82,6 +85,26 @@ impl Command for FromCsv {
|
|||
})],
|
||||
))
|
||||
},
|
||||
Example {
|
||||
description: "Convert comma-separated data to a table, allowing variable number of columns per row",
|
||||
example: "\"ColA,ColB\n1,2\n3,4,5\n6\" | from csv --flexible",
|
||||
result: Some(Value::test_list (
|
||||
vec![
|
||||
Value::test_record(record! {
|
||||
"ColA" => Value::test_int(1),
|
||||
"ColB" => Value::test_int(2),
|
||||
}),
|
||||
Value::test_record(record! {
|
||||
"ColA" => Value::test_int(3),
|
||||
"ColB" => Value::test_int(4),
|
||||
"column2" => Value::test_int(5),
|
||||
}),
|
||||
Value::test_record(record! {
|
||||
"ColA" => Value::test_int(6),
|
||||
}),
|
||||
],
|
||||
))
|
||||
},
|
||||
Example {
|
||||
description: "Convert comma-separated data to a table, ignoring headers",
|
||||
example: "open data.txt | from csv --noheaders",
|
||||
|
|
|
@ -39,12 +39,7 @@ fn from_delimited_stream(
|
|||
.from_reader(input_reader);
|
||||
|
||||
let headers = if noheaders {
|
||||
(0..reader
|
||||
.headers()
|
||||
.map_err(|err| from_csv_error(err, span))?
|
||||
.len())
|
||||
.map(|i| format!("column{i}"))
|
||||
.collect::<Vec<String>>()
|
||||
vec![]
|
||||
} else {
|
||||
reader
|
||||
.headers()
|
||||
|
@ -54,32 +49,28 @@ fn from_delimited_stream(
|
|||
.collect()
|
||||
};
|
||||
|
||||
let n = headers.len();
|
||||
let columns = headers
|
||||
.into_iter()
|
||||
.chain((n..).map(|i| format!("column{i}")));
|
||||
let iter = reader.into_records().map(move |row| {
|
||||
let row = match row {
|
||||
Ok(row) => row,
|
||||
Err(err) => return Value::error(from_csv_error(err, span), span),
|
||||
};
|
||||
let columns = headers.iter().cloned();
|
||||
let values = row
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
if no_infer {
|
||||
Value::string(s, span)
|
||||
} else if let Ok(i) = s.parse() {
|
||||
Value::int(i, span)
|
||||
} else if let Ok(f) = s.parse() {
|
||||
Value::float(f, span)
|
||||
} else {
|
||||
Value::string(s, span)
|
||||
}
|
||||
})
|
||||
.chain(std::iter::repeat(Value::nothing(span)));
|
||||
let columns = columns.clone();
|
||||
let values = row.into_iter().map(|s| {
|
||||
if no_infer {
|
||||
Value::string(s, span)
|
||||
} else if let Ok(i) = s.parse() {
|
||||
Value::int(i, span)
|
||||
} else if let Ok(f) = s.parse() {
|
||||
Value::float(f, span)
|
||||
} else {
|
||||
Value::string(s, span)
|
||||
}
|
||||
});
|
||||
|
||||
// If there are more values than the number of headers,
|
||||
// then the remaining values are ignored.
|
||||
//
|
||||
// Otherwise, if there are less values than headers,
|
||||
// then `Value::nothing(span)` is used to fill the remaining columns.
|
||||
Value::record(columns.zip(values).collect(), span)
|
||||
});
|
||||
|
||||
|
|
|
@ -11,7 +11,10 @@ impl Command for FromTsv {
|
|||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("from tsv")
|
||||
.input_output_types(vec![(Type::String, Type::table())])
|
||||
.input_output_types(vec![
|
||||
(Type::String, Type::table()),
|
||||
(Type::String, Type::list(Type::Any)),
|
||||
])
|
||||
.named(
|
||||
"comment",
|
||||
SyntaxShape::String,
|
||||
|
@ -76,6 +79,21 @@ impl Command for FromTsv {
|
|||
})],
|
||||
))
|
||||
},
|
||||
Example {
|
||||
description: "Convert comma-separated data to a table, allowing variable number of columns per row and ignoring headers",
|
||||
example: "\"value 1\nvalue 2\tdescription 2\" | from tsv --flexible --noheaders",
|
||||
result: Some(Value::test_list (
|
||||
vec![
|
||||
Value::test_record(record! {
|
||||
"column0" => Value::test_string("value 1"),
|
||||
}),
|
||||
Value::test_record(record! {
|
||||
"column0" => Value::test_string("value 2"),
|
||||
"column1" => Value::test_string("description 2"),
|
||||
}),
|
||||
],
|
||||
))
|
||||
},
|
||||
Example {
|
||||
description: "Create a tsv file with header columns and open it",
|
||||
example: r#"$'c1(char tab)c2(char tab)c3(char nl)1(char tab)2(char tab)3' | save tsv-data | open tsv-data | from tsv"#,
|
||||
|
|
|
@ -469,7 +469,7 @@ fn from_csv_test_flexible_extra_vals() {
|
|||
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
|
||||
"#
|
||||
));
|
||||
assert_eq!(actual.out, "[1, 2]");
|
||||
assert_eq!(actual.out, "[1, 2, 3]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -479,5 +479,5 @@ fn from_csv_test_flexible_missing_vals() {
|
|||
echo "a,b\n1" | from csv --flexible | first | values | to nuon
|
||||
"#
|
||||
));
|
||||
assert_eq!(actual.out, "[1, null]");
|
||||
assert_eq!(actual.out, "[1]");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue