Updated polars version for faster CSV reader (#3781)

This commit is contained in:
Fernando Herrera 2021-07-14 21:33:21 +01:00 committed by GitHub
parent 2864eaebae
commit 3645a0f0e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 17 deletions

39
Cargo.lock generated
View file

@ -1284,6 +1284,15 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "dirs"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
@ -4342,9 +4351,9 @@ dependencies = [
[[package]]
name = "polars"
version = "0.14.5"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f254b79757346a86a8371ea4a087ce6a56e604c82d61093a1b85bfd0df99aeb"
checksum = "5c1bf71de63afed8a9262b61a2a7c3a463bf3b2b8dc28a91873621a86ddeb996"
dependencies = [
"polars-core",
"polars-io",
@ -4353,9 +4362,9 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.14.5"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec1ef88e60b660c51644a5b098570519948d95f389b67ef690a0f1187395d7bf"
checksum = "d1d436b455a6ac76f09b7b8127d2c4b21e80e8b7579218edee91ce138281d178"
dependencies = [
"arrow",
"num 0.4.0",
@ -4364,9 +4373,9 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.14.5"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5e6ee23eb50845501c8c31368051af75801185cf4bedf9e7b3ec945a49af9c"
checksum = "f3e1a74ab0ddbb0cca4f9a79691cf419ec7af63fc1f72206fac6ab45508d6d85"
dependencies = [
"ahash",
"anyhow",
@ -4391,15 +4400,16 @@ dependencies = [
[[package]]
name = "polars-io"
version = "0.14.5"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e8719cdb70555e0492dd24e8f09f637cc112bac438be754bad8dca75f466ab"
checksum = "54f37bf032736512cc938000e937175b3bbfc27ea72abd07fc03d25145d709a0"
dependencies = [
"ahash",
"anyhow",
"arrow",
"csv",
"csv-core",
"dirs 3.0.2",
"fast-float",
"lazy_static 1.4.0",
"lexical",
@ -4411,13 +4421,14 @@ dependencies = [
"polars-core",
"rayon",
"regex",
"simdutf8",
]
[[package]]
name = "polars-lazy"
version = "0.14.5"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ca6b2fb59bbe6725a84c48df12f509b4655d173cd113e5fb51f971cff1f93bc"
checksum = "4c7f767d152af32f2880c02d0e4ead23d5591e066bf92af462ef9d1e4149e7af"
dependencies = [
"ahash",
"itertools",
@ -5499,6 +5510,12 @@ dependencies = [
"libc",
]
[[package]]
name = "simdutf8"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970da16e7c682fa90a261cf0724dee241c9f7831635ecc4e988ae8f3b505559"
[[package]]
name = "siphasher"
version = "0.3.5"
@ -5877,7 +5894,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42"
dependencies = [
"byteorder",
"dirs",
"dirs 1.0.5",
"winapi 0.3.9",
]

View file

@ -98,7 +98,7 @@ which = { version="4.1.0", optional=true }
zip = { version="0.5.9", optional=true }
[dependencies.polars]
version = "0.14.5"
version = "0.14.7"
optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in"]

View file

@ -8,7 +8,7 @@ use nu_protocol::{
};
use nu_source::Tagged;
use polars::prelude::{CsvReader, JsonReader, ParquetReader, SerReader};
use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, PolarsError, SerReader};
use std::fs::File;
pub struct DataFrame;
@ -151,7 +151,8 @@ fn from_csv(args: CommandArgs) -> Result<polars::prelude::DataFrame, ShellError>
let columns: Option<Vec<Value>> = args.get_flag("columns")?;
let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| parse_polars_error::<&str>(&e, &file.tag.span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, &file.tag.span, None))?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
@ -205,6 +206,15 @@ fn from_csv(args: CommandArgs) -> Result<polars::prelude::DataFrame, ShellError>
match csv_reader.finish() {
Ok(df) => Ok(df),
Err(e) => Err(parse_polars_error::<&str>(&e, &file.tag.span, None)),
Err(e) => match e {
PolarsError::Other(_) => Err(ShellError::labeled_error_with_secondary(
"Schema error",
"Error with the inferred schema",
&file.tag.span,
"You can use the argument 'infer_schema' with a number of rows large enough to better infer the schema",
&file.tag.span,
)),
_ => Err(parse_polars_error::<&str>(&e, &file.tag.span, None)),
},
}
}

View file

@ -38,7 +38,7 @@ nu-value-ext = { version = "0.34.0", path="../nu-value-ext" }
nu-ansi-term = { version = "0.34.0", path="../nu-ansi-term" }
[dependencies.polars]
version = "0.14.5"
version = "0.14.7"
optional = true
features = ["strings", "checked_arithmetic"]

View file

@ -31,7 +31,7 @@ serde_yaml = "0.8.16"
toml = "0.5.8"
[dependencies.polars]
version = "0.14.5"
version = "0.14.7"
optional = true
features = ["serde", "rows"]