From db2bca56c98907c97ca1ff495a50b4d4c7934e28 Mon Sep 17 00:00:00 2001 From: Luccas Mateus Date: Fri, 12 Nov 2021 17:46:39 -0300 Subject: [PATCH] `from url` and `from eml` (#324) * MathEval Variance and Stddev * Fix tests and linting * Typo * Deal with streams when they are not tables * FromEml and FromUrl Added tests for from eml --- Cargo.lock | 47 ++++ crates/nu-command/Cargo.toml | 3 + crates/nu-command/src/default_context.rs | 2 + crates/nu-command/src/formats/from/eml.rs | 254 ++++++++++++++++++++++ crates/nu-command/src/formats/from/mod.rs | 4 + crates/nu-command/src/formats/from/url.rs | 92 ++++++++ crates/nu-protocol/src/value/mod.rs | 1 + 7 files changed, 403 insertions(+) create mode 100644 crates/nu-command/src/formats/from/eml.rs create mode 100644 crates/nu-command/src/formats/from/url.rs diff --git a/Cargo.lock b/Cargo.lock index dc658ed906..03edf85d7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -438,6 +438,15 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "eml-parser" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031fe36712cec8b81c5b76b555666ce855a4dfc2dcc35bb907046bf2ef545578" +dependencies = [ + "regex", +] + [[package]] name = "encode_unicode" version = "0.3.6" @@ -474,6 +483,16 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "getrandom" version = "0.2.3" @@ -534,6 +553,7 @@ checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", "hashbrown", + "serde", ] [[package]] @@ -616,6 +636,12 @@ dependencies = [ "crossterm 0.21.0", ] +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + [[package]] name = "memchr" version = "2.4.1" @@ -766,7 +792,9 @@ dependencies = [ "chrono-tz", "csv", "dialoguer", + "eml-parser", "glob", + "indexmap", "itertools", "lscolors", "meval", @@ -780,6 +808,7 @@ dependencies = [ "rand", "rayon", "serde", + "serde_urlencoded", "serde_yaml", "sysinfo", "terminal_size", @@ -979,6 +1008,12 @@ dependencies = [ "regex", ] +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + [[package]] name = "pest" version = "2.1.3" @@ -1310,6 +1345,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_yaml" version = "0.8.21" diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index b123ae07fc..9982f8e985 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -27,6 +27,7 @@ chrono = { version = "0.4.19", features = ["serde"] } chrono-humanize = "0.2.1" chrono-tz = "0.6.0" terminal_size = "0.1.17" +indexmap = { version="1.7", features=["serde-1"] } lscolors = { version = "0.8.0", features = ["crossterm"] } bytesize = "1.1.0" dialoguer = "0.9.0" @@ -36,6 +37,8 @@ titlecase = "1.1.0" meval = "0.2.0" serde = { version="1.0.123", features=["derive"] } serde_yaml = "0.8.16" +serde_urlencoded = "0.7.0" +eml-parser = "0.1.0" itertools = "0.10.0" rand = "0.8" diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 4110e72999..d60c06a3b0 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -50,6 +50,8 @@ pub fn create_default_context() -> EngineState { FromYaml, FromYml, FromTsv, + FromUrl, + FromEml, Get, Griddle, Help, diff --git a/crates/nu-command/src/formats/from/eml.rs b/crates/nu-command/src/formats/from/eml.rs new file mode 100644 index 0000000000..3c8b684b93 --- /dev/null +++ b/crates/nu-command/src/formats/from/eml.rs @@ -0,0 +1,254 @@ +use ::eml_parser::eml::*; +use ::eml_parser::EmlParser; +use indexmap::map::IndexMap; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct FromEml; + +const DEFAULT_BODY_PREVIEW: usize = 50; + +impl Command for FromEml { + fn name(&self) -> &str { + "from eml" + } + + fn signature(&self) -> Signature { + Signature::build("from eml").named( + "preview-body", + SyntaxShape::Int, + "How many bytes of the body to preview", + Some('b'), + ) + } + + fn usage(&self) -> &str { + "Parse text as .eml and create table." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + let preview_body: Option> = + call.get_flag(engine_state, stack, "preview-body")?; + from_eml(input, preview_body, head) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Convert eml structured data into table", + example: "'From: test@email.com +Subject: Welcome +To: someone@somewhere.com + +Test' | from eml", + result: Some(Value::Record { + cols: vec![ + "Subject".to_string(), + "From".to_string(), + "To".to_string(), + "Body".to_string(), + ], + vals: vec![ + Value::test_string("Welcome"), + Value::Record { + cols: vec!["Name".to_string(), "Address".to_string()], + vals: vec![ + Value::nothing(Span::unknown()), + Value::test_string("test@email.com"), + ], + span: Span::unknown(), + }, + Value::Record { + cols: vec!["Name".to_string(), "Address".to_string()], + vals: vec![ + Value::nothing(Span::unknown()), + Value::test_string("someone@somewhere.com"), + ], + span: Span::unknown(), + }, + Value::test_string("Test"), + ], + span: Span::unknown(), + }), + }, + Example { + description: "Convert eml structured data into table", + example: "'From: test@email.com +Subject: Welcome +To: someone@somewhere.com + +Test' | from eml -b 1", + result: Some(Value::Record { + cols: vec![ + "Subject".to_string(), + "From".to_string(), + "To".to_string(), + "Body".to_string(), + ], + vals: vec![ + Value::test_string("Welcome"), + Value::Record { + cols: vec!["Name".to_string(), "Address".to_string()], + vals: vec![ + Value::nothing(Span::unknown()), + Value::test_string("test@email.com"), + ], + span: Span::unknown(), + }, + Value::Record { + cols: vec!["Name".to_string(), "Address".to_string()], + vals: vec![ + Value::nothing(Span::unknown()), + Value::test_string("someone@somewhere.com"), + ], + span: Span::unknown(), + }, + Value::test_string("T"), + ], + span: Span::unknown(), + }), + }, + ] + } +} + +fn emailaddress_to_value(span: Span, email_address: &EmailAddress) -> Value { + let (n, a) = match email_address { + EmailAddress::AddressOnly { address } => ( + Value::nothing(span), + Value::String { + val: address.to_string(), + span, + }, + ), + EmailAddress::NameAndEmailAddress { name, address } => ( + Value::String { + val: name.to_string(), + span, + }, + Value::String { + val: address.to_string(), + span, + }, + ), + }; + + Value::Record { + cols: vec!["Name".to_string(), "Address".to_string()], + vals: vec![n, a], + span, + } +} + +fn headerfieldvalue_to_value(head: Span, value: &HeaderFieldValue) -> Value { + use HeaderFieldValue::*; + + match value { + SingleEmailAddress(address) => emailaddress_to_value(head, address), + MultipleEmailAddresses(addresses) => Value::List { + vals: addresses + .iter() + .map(|a| emailaddress_to_value(head, a)) + .collect(), + span: head, + }, + Unstructured(s) => Value::String { + val: s.to_string(), + span: head, + }, + Empty => Value::nothing(head), + } +} + +fn from_eml( + input: PipelineData, + preview_body: Option>, + head: Span, +) -> Result { + let value = input.collect_string(""); + + let body_preview = preview_body + .map(|b| b.item as usize) + .unwrap_or(DEFAULT_BODY_PREVIEW); + + let eml = EmlParser::from_string(value) + .with_body_preview(body_preview) + .parse() + .map_err(|_| { + ShellError::CantConvert("structured data from eml".into(), "string".into(), head) + })?; + + let mut collected = IndexMap::new(); + + if let Some(subj) = eml.subject { + collected.insert( + "Subject".to_string(), + Value::String { + val: subj, + span: head, + }, + ); + } + + if let Some(from) = eml.from { + collected.insert("From".to_string(), headerfieldvalue_to_value(head, &from)); + } + + if let Some(to) = eml.to { + collected.insert("To".to_string(), headerfieldvalue_to_value(head, &to)); + } + + for HeaderField { name, value } in &eml.headers { + collected.insert(name.to_string(), headerfieldvalue_to_value(head, value)); + } + + if let Some(body) = eml.body { + collected.insert( + "Body".to_string(), + Value::String { + val: body, + span: head, + }, + ); + } + + let (cols, vals) = collected + .into_iter() + .fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k); + acc.1.push(v); + acc + }); + + let record = Value::Record { + cols, + vals, + span: head, + }; + + Ok(PipelineData::Value(record)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromEml {}) + } +} diff --git a/crates/nu-command/src/formats/from/mod.rs b/crates/nu-command/src/formats/from/mod.rs index c7577d74a6..6692f791c8 100644 --- a/crates/nu-command/src/formats/from/mod.rs +++ b/crates/nu-command/src/formats/from/mod.rs @@ -1,13 +1,17 @@ mod command; mod csv; mod delimited; +mod eml; mod json; mod tsv; +mod url; mod yaml; pub use self::csv::FromCsv; pub use command::From; +pub use eml::FromEml; pub use json::FromJson; pub use tsv::FromTsv; +pub use url::FromUrl; pub use yaml::FromYaml; pub use yaml::FromYml; diff --git a/crates/nu-command/src/formats/from/url.rs b/crates/nu-command/src/formats/from/url.rs new file mode 100644 index 0000000000..4df1fd9abe --- /dev/null +++ b/crates/nu-command/src/formats/from/url.rs @@ -0,0 +1,92 @@ +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, Value}; + +#[derive(Clone)] +pub struct FromUrl; + +impl Command for FromUrl { + fn name(&self) -> &str { + "from url" + } + + fn signature(&self) -> Signature { + Signature::build("from url") + } + + fn usage(&self) -> &str { + "Parse url-encoded string as a table." + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + from_url(input, head) + } + + fn examples(&self) -> Vec { + vec![Example { + example: "'bread=baguette&cheese=comt%C3%A9&meat=ham&fat=butter' | from url", + description: "Convert url encoded string into a table", + result: Some(Value::Record { + cols: vec![ + "bread".to_string(), + "cheese".to_string(), + "meat".to_string(), + "fat".to_string(), + ], + vals: vec![ + Value::test_string("baguette"), + Value::test_string("comté"), + Value::test_string("ham"), + Value::test_string("butter"), + ], + span: Span::unknown(), + }), + }] + } +} + +fn from_url(input: PipelineData, head: Span) -> Result { + let concat_string = input.collect_string(""); + + let result = serde_urlencoded::from_str::>(&concat_string); + + match result { + Ok(result) => { + let mut cols = vec![]; + let mut vals = vec![]; + for (k, v) in result { + cols.push(k); + vals.push(Value::String { val: v, span: head }) + } + + Ok(PipelineData::Value(Value::Record { + cols, + vals, + span: head, + })) + } + _ => Err(ShellError::UnsupportedInput( + "String not compatible with url-encoding".to_string(), + head, + )), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromUrl {}) + } +} diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 5cfc49282a..7724076686 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -531,6 +531,7 @@ impl PartialOrd for Value { (Value::Binary { val: lhs, .. }, Value::Binary { val: rhs, .. }) => { lhs.partial_cmp(rhs) } + (Value::Nothing { .. }, Value::Nothing { .. }) => Some(Ordering::Equal), (_, _) => None, } }