diff --git a/Cargo.lock b/Cargo.lock index b4d0da0aee..90b053ddea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,6 +252,20 @@ dependencies = [ "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "calamine" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "codepage 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "zip 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cc" version = "1.0.47" @@ -328,6 +342,14 @@ dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "codepage" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "config" version = "0.9.3" @@ -702,6 +724,14 @@ name = "encode_unicode" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "encoding_rs" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "env_logger" version = "0.6.2" @@ -1580,6 +1610,7 @@ dependencies = [ "bson 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", "byte-unit 3.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)", + "calamine 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)", "chrono-humanize 0.0.11 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1875,6 +1906,11 @@ dependencies = [ "inflate 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "podio" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ppv-lite86" version = "0.2.6" @@ -1948,6 +1984,15 @@ name = "quick-error" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "quick-xml" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quote" version = "1.0.2" @@ -3028,6 +3073,16 @@ dependencies = [ "linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "zip" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", + "podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" @@ -3060,6 +3115,7 @@ dependencies = [ "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" +"checksum calamine 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "213df9241db37007bf06fb3da8f61f1cddb3badb9a702c62f4e80299d4d2982f" "checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e8493056968583b0193c1bb04d6f7684586f3726992d6c573261941a895dbd68" @@ -3068,6 +3124,7 @@ dependencies = [ "checksum clipboard 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "25a904646c0340239dcf7c51677b33928bf24fdf424b79a57909c0109075b2e7" "checksum clipboard-win 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e3a093d6fed558e5fe24c3dfc85a68bb68f1c824f440d3ba5aca189e2998786b" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +"checksum codepage 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8b0e9222c0cdf2c6ac27d73f664f9520266fa911c3106329d359f8861cb8bde9" "checksum config 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f9107d78ed62b3fa5a86e7d18e647abed48cfd8f8fab6c72f4cdb982d196f7e6" "checksum constant_time_eq 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "995a44c877f9212528ccc74b21a232f66ad69001e40ede5bcee2ac9ef2657120" "checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d" @@ -3109,6 +3166,7 @@ dependencies = [ "checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum encode_unicode 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" "checksum failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "f8273f13c977665c5db7eb2b99ae520952fe5ac831ae4cd09d80c4c7042b5ed9" "checksum failure_derive 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0bc225b78e0391e4b8683440bf2e63c2deeeb2ce5189eab46e2b68c6d3725d08" @@ -3231,6 +3289,7 @@ dependencies = [ "checksum platforms 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "feb3b2b1033b8a60b4da6ee470325f887758c95d5320f52f9ce0df055a55940e" "checksum plist 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a9f075f6394100e7c105ed1af73fb1859d6fd14e49d4290d578120beb167f" "checksum png 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8422b27bb2c013dd97b9aef69e161ce262236f49aaf46a0489011c8ff0264602" +"checksum podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "780fb4b6698bbf9cf2444ea5d22411cef2953f0824b98f33cf454ec5615645bd" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum pretty-hex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be91bcc43e73799dc46a6c194a55e7aae1d86cc867c860fd4a436019af21bd8c" "checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" @@ -3239,6 +3298,7 @@ dependencies = [ "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27" "checksum ptree 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0a3be00b19ee7bd33238c1c523a7ab4df697345f6b36f90827a7860ea938d4" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" +"checksum quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcdba8c8d71275493d966ef052a88726ac8590c15a09968b32158205c672ef" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" "checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412" "checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" @@ -3365,3 +3425,4 @@ dependencies = [ "checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" "checksum xmlparser 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8110496c5bcc0d966b0b2da38d5a791aa139eeb0b80e7840a7463c2b806921eb" "checksum yaml-rust 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65923dd1784f44da1d2c3dbbc5e822045628c590ba72123e1c73d3c230c4434d" +"checksum zip 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3c21bb410afa2bd823a047f5bda3adb62f51074ac7e06263b2c97ecdd47e9fc6" diff --git a/Cargo.toml b/Cargo.toml index 9c64c77c17..556d68ed9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ trash = "1.0.0" regex = "1" cfg-if = "0.1" strip-ansi-escapes = "0.1.0" +calamine = "0.16" neso = { version = "0.5.0", optional = true } crossterm = { version = "0.10.2", optional = true } diff --git a/src/cli.rs b/src/cli.rs index d12f5bbcde..d7d873bd15 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -297,6 +297,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(FromSQLite), whole_stream_command(FromTOML), whole_stream_command(FromURL), + whole_stream_command(FromXLSX), whole_stream_command(FromXML), whole_stream_command(FromYAML), whole_stream_command(FromYML), diff --git a/src/commands.rs b/src/commands.rs index c238b451d8..bf25e1bb23 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -32,6 +32,7 @@ pub(crate) mod from_ssv; pub(crate) mod from_toml; pub(crate) mod from_tsv; pub(crate) mod from_url; +pub(crate) mod from_xlsx; pub(crate) mod from_xml; pub(crate) mod from_yaml; pub(crate) mod get; @@ -118,6 +119,7 @@ pub(crate) use from_ssv::FromSSV; pub(crate) use from_toml::FromTOML; pub(crate) use from_tsv::FromTSV; pub(crate) use from_url::FromURL; +pub(crate) use from_xlsx::FromXLSX; pub(crate) use from_xml::FromXML; pub(crate) use from_yaml::FromYAML; pub(crate) use from_yaml::FromYML; diff --git a/src/commands/from_sqlite.rs b/src/commands/from_sqlite.rs index 7b93dc1633..55dff62a47 100644 --- a/src/commands/from_sqlite.rs +++ b/src/commands/from_sqlite.rs @@ -158,8 +158,8 @@ fn from_sqlite(args: CommandArgs, registry: &CommandRegistry) -> Result yield Err(ShellError::labeled_error_with_secondary( - "Expected a string from pipeline", - "requires string input", + "Expected binary data from pipeline", + "requires binary data input", &tag, "value originates from here", value_tag, diff --git a/src/commands/from_xlsx.rs b/src/commands/from_xlsx.rs new file mode 100644 index 0000000000..da6e3c883e --- /dev/null +++ b/src/commands/from_xlsx.rs @@ -0,0 +1,102 @@ +use crate::commands::WholeStreamCommand; +use crate::data::{Primitive, Value}; +use crate::prelude::*; +use crate::{TaggedDictBuilder, TaggedListBuilder}; +use calamine::*; +use std::io::Cursor; + +pub struct FromXLSX; + +#[derive(Deserialize)] +pub struct FromXLSXArgs { + headerless: bool, +} + +impl WholeStreamCommand for FromXLSX { + fn name(&self) -> &str { + "from-xlsx" + } + + fn signature(&self) -> Signature { + Signature::build("from-xlsx") + .switch("headerless", "don't treat the first row as column names") + } + + fn usage(&self) -> &str { + "Parse binary Excel(.xlsx) data and create table." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, from_xlsx)?.run() + } +} + +fn from_xlsx( + FromXLSXArgs { + headerless: _headerless, + }: FromXLSXArgs, + runnable_context: RunnableContext, +) -> Result { + let input = runnable_context.input; + let tag = runnable_context.name; + + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + for value in values { + let value_tag = value.tag(); + match value.item { + Value::Primitive(Primitive::Binary(vb)) => { + let mut buf: Cursor> = Cursor::new(vb); + let mut xls = Xlsx::<_>::new(buf).unwrap(); + + let mut dict = TaggedDictBuilder::new(&tag); + + let sheet_names = xls.sheet_names().to_owned(); + + for sheet_name in &sheet_names { + let mut sheet_output = TaggedListBuilder::new(&tag); + + let current_sheet = xls.worksheet_range(sheet_name).unwrap().unwrap(); + + for row in current_sheet.rows() { + let mut row_output = TaggedDictBuilder::new(&tag); + for (i, cell) in row.iter().enumerate() { + let value = match cell { + DataType::Empty => Value::nothing(), + DataType::String(s) => Value::string(s), + DataType::Float(f) => Value::decimal(*f), + DataType::Int(i) => Value::int(*i), + DataType::Bool(b) => Value::boolean(*b), + _ => Value::nothing(), + }; + + row_output.insert(&format!("Column{}", i), value); + } + + sheet_output.push(row_output.into_tagged_value().item); + } + + dict.insert(sheet_name, sheet_output.into_tagged_value().item); + } + + yield ReturnSuccess::value(dict.into_tagged_value()); + } + _ => yield Err(ShellError::labeled_error_with_secondary( + "Expected binary data from pipeline", + "requires binary data input", + &tag, + "value originates from here", + value_tag, + )), + + } + } + }; + + Ok(stream.to_output_stream()) +} diff --git a/src/lib.rs b/src/lib.rs index 38f770dc21..d6dcee0f22 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ pub use crate::utils::{did_you_mean, AbsoluteFile, AbsolutePath, RelativePath}; pub use cli::cli; pub use data::base::{Primitive, Value}; pub use data::config::{config_path, APP_INFO}; -pub use data::dict::{Dictionary, TaggedDictBuilder}; +pub use data::dict::{Dictionary, TaggedDictBuilder, TaggedListBuilder}; pub use data::meta::{ tag_for_tagged_list, HasFallibleSpan, HasSpan, Span, Spanned, SpannedItem, Tag, Tagged, TaggedItem, diff --git a/src/utils.rs b/src/utils.rs index cef4aad193..66835ab83f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -498,6 +498,10 @@ mod tests { loc: fixtures().join("sample.url"), at: 0 }, + Res { + loc: fixtures().join("sample_data.xlsx"), + at: 0 + }, Res { loc: fixtures().join("sgml_description.json"), at: 0 diff --git a/tests/filters_test.rs b/tests/filters_test.rs index e18f20be67..b31fe78654 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -495,6 +495,22 @@ fn can_convert_table_to_bson_and_back_into_table() { assert_eq!(actual, "whel"); } +#[test] +fn can_read_excel_file() { + let actual = nu!( + cwd: "tests/fixtures/formats", h::pipeline( + r#" + open sample_data.xlsx + | get SalesOrders + | nth 4 + | get Column2 + | echo $it + "# + )); + + assert_eq!(actual, "Gill"); +} + #[test] fn can_convert_table_to_sqlite_and_back_into_table() { let actual = nu!( diff --git a/tests/fixtures/formats/sample_data.xlsx b/tests/fixtures/formats/sample_data.xlsx new file mode 100644 index 0000000000..1cd1b832a4 Binary files /dev/null and b/tests/fixtures/formats/sample_data.xlsx differ