mirror of
https://github.com/nushell/nushell
synced 2024-12-26 04:53:09 +00:00
truly flexible csv/tsv parsing (#14399)
- fixes #14398 I will properly fill out this PR and fix any tests that might break when I have the time, this was a quick fix. # Description This PR makes `from csv` and `from tsv`, with the `--flexible` flag, stop dropping extra/unexpected columns. # User-Facing Changes `$text`'s contents ```csv value 1,aaa 2,bbb 3 4,ddd 5,eee,extra ``` Old behavior ```nushell > $text | from csv --flexible --noheaders ╭─#─┬─column0─╮ │ 0 │ value │ │ 1 │ 1 │ │ 2 │ 2 │ │ 3 │ 3 │ │ 4 │ 4 │ │ 5 │ 5 │ ╰─#─┴─column0─╯ ``` New behavior ```nushell > $text | from csv --flexible --noheaders ╭─#─┬─column0─┬─column1─┬─column2─╮ │ 0 │ value │ ❎ │ ❎ │ │ 1 │ 1 │ aaa │ ❎ │ │ 2 │ 2 │ bbb │ ❎ │ │ 3 │ 3 │ ❎ │ ❎ │ │ 4 │ 4 │ ddd │ ❎ │ │ 5 │ 5 │ eee │ extra │ ╰─#─┴─column0─┴─column1─┴─column2─╯ ``` - The first line in a csv (or tsv) document no longer limits the number of columns - Missing values in columns are longer automatically filled with `null` with this change, as a later row can introduce new columns. **BREAKING CHANGE** Because missing columns are different from empty columns, operations on possibly missing columns will have to use optional access syntax e.g. `get foo` => `get foo?` # Tests + Formatting Added examples that run as tests and adjusted existing tests to confirm the new behavior. # After Submitting Update the workaround with fish completer mentioned [here](https://www.nushell.sh/cookbook/external_completers.html#fish-completer)
This commit is contained in:
parent
2a90cb7355
commit
5f7082f053
4 changed files with 62 additions and 30 deletions
|
@ -11,7 +11,10 @@ impl Command for FromCsv {
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("from csv")
|
Signature::build("from csv")
|
||||||
.input_output_types(vec![(Type::String, Type::table())])
|
.input_output_types(vec![
|
||||||
|
(Type::String, Type::table()),
|
||||||
|
(Type::String, Type::list(Type::Any)),
|
||||||
|
])
|
||||||
.named(
|
.named(
|
||||||
"separator",
|
"separator",
|
||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
|
@ -82,6 +85,26 @@ impl Command for FromCsv {
|
||||||
})],
|
})],
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "Convert comma-separated data to a table, allowing variable number of columns per row",
|
||||||
|
example: "\"ColA,ColB\n1,2\n3,4,5\n6\" | from csv --flexible",
|
||||||
|
result: Some(Value::test_list (
|
||||||
|
vec![
|
||||||
|
Value::test_record(record! {
|
||||||
|
"ColA" => Value::test_int(1),
|
||||||
|
"ColB" => Value::test_int(2),
|
||||||
|
}),
|
||||||
|
Value::test_record(record! {
|
||||||
|
"ColA" => Value::test_int(3),
|
||||||
|
"ColB" => Value::test_int(4),
|
||||||
|
"column2" => Value::test_int(5),
|
||||||
|
}),
|
||||||
|
Value::test_record(record! {
|
||||||
|
"ColA" => Value::test_int(6),
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
))
|
||||||
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Convert comma-separated data to a table, ignoring headers",
|
description: "Convert comma-separated data to a table, ignoring headers",
|
||||||
example: "open data.txt | from csv --noheaders",
|
example: "open data.txt | from csv --noheaders",
|
||||||
|
|
|
@ -39,12 +39,7 @@ fn from_delimited_stream(
|
||||||
.from_reader(input_reader);
|
.from_reader(input_reader);
|
||||||
|
|
||||||
let headers = if noheaders {
|
let headers = if noheaders {
|
||||||
(0..reader
|
vec![]
|
||||||
.headers()
|
|
||||||
.map_err(|err| from_csv_error(err, span))?
|
|
||||||
.len())
|
|
||||||
.map(|i| format!("column{i}"))
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
} else {
|
} else {
|
||||||
reader
|
reader
|
||||||
.headers()
|
.headers()
|
||||||
|
@ -54,32 +49,28 @@ fn from_delimited_stream(
|
||||||
.collect()
|
.collect()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let n = headers.len();
|
||||||
|
let columns = headers
|
||||||
|
.into_iter()
|
||||||
|
.chain((n..).map(|i| format!("column{i}")));
|
||||||
let iter = reader.into_records().map(move |row| {
|
let iter = reader.into_records().map(move |row| {
|
||||||
let row = match row {
|
let row = match row {
|
||||||
Ok(row) => row,
|
Ok(row) => row,
|
||||||
Err(err) => return Value::error(from_csv_error(err, span), span),
|
Err(err) => return Value::error(from_csv_error(err, span), span),
|
||||||
};
|
};
|
||||||
let columns = headers.iter().cloned();
|
let columns = columns.clone();
|
||||||
let values = row
|
let values = row.into_iter().map(|s| {
|
||||||
.into_iter()
|
if no_infer {
|
||||||
.map(|s| {
|
Value::string(s, span)
|
||||||
if no_infer {
|
} else if let Ok(i) = s.parse() {
|
||||||
Value::string(s, span)
|
Value::int(i, span)
|
||||||
} else if let Ok(i) = s.parse() {
|
} else if let Ok(f) = s.parse() {
|
||||||
Value::int(i, span)
|
Value::float(f, span)
|
||||||
} else if let Ok(f) = s.parse() {
|
} else {
|
||||||
Value::float(f, span)
|
Value::string(s, span)
|
||||||
} else {
|
}
|
||||||
Value::string(s, span)
|
});
|
||||||
}
|
|
||||||
})
|
|
||||||
.chain(std::iter::repeat(Value::nothing(span)));
|
|
||||||
|
|
||||||
// If there are more values than the number of headers,
|
|
||||||
// then the remaining values are ignored.
|
|
||||||
//
|
|
||||||
// Otherwise, if there are less values than headers,
|
|
||||||
// then `Value::nothing(span)` is used to fill the remaining columns.
|
|
||||||
Value::record(columns.zip(values).collect(), span)
|
Value::record(columns.zip(values).collect(), span)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,10 @@ impl Command for FromTsv {
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("from tsv")
|
Signature::build("from tsv")
|
||||||
.input_output_types(vec![(Type::String, Type::table())])
|
.input_output_types(vec![
|
||||||
|
(Type::String, Type::table()),
|
||||||
|
(Type::String, Type::list(Type::Any)),
|
||||||
|
])
|
||||||
.named(
|
.named(
|
||||||
"comment",
|
"comment",
|
||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
|
@ -76,6 +79,21 @@ impl Command for FromTsv {
|
||||||
})],
|
})],
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "Convert comma-separated data to a table, allowing variable number of columns per row and ignoring headers",
|
||||||
|
example: "\"value 1\nvalue 2\tdescription 2\" | from tsv --flexible --noheaders",
|
||||||
|
result: Some(Value::test_list (
|
||||||
|
vec![
|
||||||
|
Value::test_record(record! {
|
||||||
|
"column0" => Value::test_string("value 1"),
|
||||||
|
}),
|
||||||
|
Value::test_record(record! {
|
||||||
|
"column0" => Value::test_string("value 2"),
|
||||||
|
"column1" => Value::test_string("description 2"),
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
))
|
||||||
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Create a tsv file with header columns and open it",
|
description: "Create a tsv file with header columns and open it",
|
||||||
example: r#"$'c1(char tab)c2(char tab)c3(char nl)1(char tab)2(char tab)3' | save tsv-data | open tsv-data | from tsv"#,
|
example: r#"$'c1(char tab)c2(char tab)c3(char nl)1(char tab)2(char tab)3' | save tsv-data | open tsv-data | from tsv"#,
|
||||||
|
|
|
@ -469,7 +469,7 @@ fn from_csv_test_flexible_extra_vals() {
|
||||||
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
|
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
assert_eq!(actual.out, "[1, 2]");
|
assert_eq!(actual.out, "[1, 2, 3]");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -479,5 +479,5 @@ fn from_csv_test_flexible_missing_vals() {
|
||||||
echo "a,b\n1" | from csv --flexible | first | values | to nuon
|
echo "a,b\n1" | from csv --flexible | first | values | to nuon
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
assert_eq!(actual.out, "[1, null]");
|
assert_eq!(actual.out, "[1]");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue