From 8a9cdcab173c77914e776480bdada4388f5492ae Mon Sep 17 00:00:00 2001 From: Jonathan Turner Date: Tue, 3 Sep 2019 18:04:46 +1200 Subject: [PATCH] Split fetch command away from open --- README.md | 4 +- src/cli.rs | 1 + src/commands.rs | 2 + src/commands/fetch.rs | 302 +++++++++++++++++++++++++++++++++ src/commands/open.rs | 379 ++++++++++++------------------------------ 5 files changed, 413 insertions(+), 275 deletions(-) create mode 100644 src/commands/fetch.rs diff --git a/README.md b/README.md index d2243e1193..a809c39619 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,8 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | ps | View current processes | | sys | View information about the current system | | which filename | Finds a program file. | -| open {filename or url} | Load a file into a cell, convert to table if possible (avoid by appending '--raw') | +| open filename | Load a file into a cell, convert to table if possible (avoid by appending '--raw') | +| fetch url | Fetch contents from a url and retrieve data as a table if possible | | post url body (--user ) (--password ) | Post content to a url and retrieve data as a table if possible | | rm {file or directory} | Remove a file, (for removing directory append '--recursive') | | exit (--now) | Exit the current shell (or all shells) | @@ -255,7 +256,6 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | to-bson | Convert table into .bson binary data | | to-tsv | Convert table into .tsv text | | to-sqlite | Convert table to sqlite .db binary data | -| reverse | Reverse the rows of a table | ## Filters on text (unstructured data) | command | description | diff --git a/src/cli.rs b/src/cli.rs index a5a2aebdf7..7cd64531fe 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -208,6 +208,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(Pick), whole_stream_command(Get), per_item_command(Remove), + per_item_command(Fetch), per_item_command(Open), per_item_command(Post), per_item_command(Where), diff --git a/src/commands.rs b/src/commands.rs index 0da8cadbd4..bc6452090f 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -13,6 +13,7 @@ pub(crate) mod date; pub(crate) mod debug; pub(crate) mod enter; pub(crate) mod exit; +pub(crate) mod fetch; pub(crate) mod first; pub(crate) mod from_array; pub(crate) mod from_bson; @@ -78,6 +79,7 @@ pub(crate) use date::Date; pub(crate) use debug::Debug; pub(crate) use enter::Enter; pub(crate) use exit::Exit; +pub(crate) use fetch::Fetch; pub(crate) use first::First; pub(crate) use from_array::FromArray; pub(crate) use from_bson::FromBSON; diff --git a/src/commands/fetch.rs b/src/commands/fetch.rs new file mode 100644 index 0000000000..8f69929a19 --- /dev/null +++ b/src/commands/fetch.rs @@ -0,0 +1,302 @@ +use crate::commands::UnevaluatedCallInfo; +use crate::context::SpanSource; +use crate::errors::ShellError; +use crate::object::Value; +use crate::parser::hir::SyntaxType; +use crate::parser::registry::Signature; +use crate::prelude::*; +use mime::Mime; +use std::path::PathBuf; +use std::str::FromStr; +use surf::mime; +use uuid::Uuid; +pub struct Fetch; + +impl PerItemCommand for Fetch { + fn name(&self) -> &str { + "fetch" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("path", SyntaxType::Path) + .switch("raw") + } + + fn usage(&self) -> &str { + "Load from a URL into a cell, convert to table if possible (avoid by appending '--raw')" + } + + fn run( + &self, + call_info: &CallInfo, + registry: &CommandRegistry, + raw_args: &RawCommandArgs, + _input: Tagged, + ) -> Result { + run(call_info, registry, raw_args) + } +} + +fn run( + call_info: &CallInfo, + registry: &CommandRegistry, + raw_args: &RawCommandArgs, +) -> Result { + let path = match call_info + .args + .nth(0) + .ok_or_else(|| ShellError::string(&format!("No file or directory specified")))? + { + file => file, + }; + let path_buf = path.as_path()?; + let path_str = path_buf.display().to_string(); + let path_span = path.span(); + let has_raw = call_info.args.has("raw"); + let registry = registry.clone(); + let raw_args = raw_args.clone(); + + let stream = async_stream_block! { + + let result = fetch(&path_str, path_span).await; + + if let Err(e) = result { + yield Err(e); + return; + } + let (file_extension, contents, contents_tag, span_source) = result.unwrap(); + + let file_extension = if has_raw { + None + } else { + // If the extension could not be determined via mimetype, try to use the path + // extension. Some file types do not declare their mimetypes (such as bson files). + file_extension.or(path_str.split('.').last().map(String::from)) + }; + + if let Some(uuid) = contents_tag.origin { + // If we have loaded something, track its source + yield ReturnSuccess::action(CommandAction::AddSpanSource( + uuid, + span_source, + )); + } + + let tagged_contents = contents.tagged(contents_tag); + + if let Some(extension) = file_extension { + let command_name = format!("from-{}", extension); + if let Some(converter) = registry.get_command(&command_name) { + let new_args = RawCommandArgs { + host: raw_args.host, + shell_manager: raw_args.shell_manager, + call_info: UnevaluatedCallInfo { + args: crate::parser::hir::Call { + head: raw_args.call_info.args.head, + positional: None, + named: None + }, + source: raw_args.call_info.source, + source_map: raw_args.call_info.source_map, + name_span: raw_args.call_info.name_span, + } + }; + let mut result = converter.run(new_args.with_input(vec![tagged_contents]), ®istry); + let result_vec: Vec> = result.drain_vec().await; + for res in result_vec { + match res { + Ok(ReturnSuccess::Value(Tagged { item: Value::List(list), ..})) => { + for l in list { + yield Ok(ReturnSuccess::Value(l)); + } + } + Ok(ReturnSuccess::Value(Tagged { item, .. })) => { + yield Ok(ReturnSuccess::Value(Tagged { item, tag: contents_tag })); + } + x => yield x, + } + } + } else { + yield ReturnSuccess::value(tagged_contents); + } + } else { + yield ReturnSuccess::value(tagged_contents); + } + }; + + Ok(stream.to_output_stream()) +} + +pub async fn fetch( + location: &str, + span: Span, +) -> Result<(Option, Value, Tag, SpanSource), ShellError> { + if let Err(_) = url::Url::parse(location) { + return Err(ShellError::labeled_error( + "Incomplete or incorrect url", + "expected a full url", + span, + )); + } + + let response = surf::get(location).await; + match response { + Ok(mut r) => match r.headers().get("content-type") { + Some(content_type) => { + let content_type = Mime::from_str(content_type).unwrap(); + match (content_type.type_(), content_type.subtype()) { + (mime::APPLICATION, mime::XML) => Ok(( + Some("xml".to_string()), + Value::string(r.body_string().await.map_err(|_| { + ShellError::labeled_error( + "Could not load text from remote url", + "could not load", + span, + ) + })?), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + (mime::APPLICATION, mime::JSON) => Ok(( + Some("json".to_string()), + Value::string(r.body_string().await.map_err(|_| { + ShellError::labeled_error( + "Could not load text from remote url", + "could not load", + span, + ) + })?), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + (mime::APPLICATION, mime::OCTET_STREAM) => { + let buf: Vec = r.body_bytes().await.map_err(|_| { + ShellError::labeled_error( + "Could not load binary file", + "could not load", + span, + ) + })?; + Ok(( + None, + Value::Binary(buf), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )) + } + (mime::IMAGE, mime::SVG) => Ok(( + Some("svg".to_string()), + Value::string(r.body_string().await.map_err(|_| { + ShellError::labeled_error( + "Could not load svg from remote url", + "could not load", + span, + ) + })?), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + (mime::IMAGE, image_ty) => { + let buf: Vec = r.body_bytes().await.map_err(|_| { + ShellError::labeled_error( + "Could not load image file", + "could not load", + span, + ) + })?; + Ok(( + Some(image_ty.to_string()), + Value::Binary(buf), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )) + } + (mime::TEXT, mime::HTML) => Ok(( + Some("html".to_string()), + Value::string(r.body_string().await.map_err(|_| { + ShellError::labeled_error( + "Could not load text from remote url", + "could not load", + span, + ) + })?), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + (mime::TEXT, mime::PLAIN) => { + let path_extension = url::Url::parse(location) + .unwrap() + .path_segments() + .and_then(|segments| segments.last()) + .and_then(|name| if name.is_empty() { None } else { Some(name) }) + .and_then(|name| { + PathBuf::from(name) + .extension() + .map(|name| name.to_string_lossy().to_string()) + }); + + Ok(( + path_extension, + Value::string(r.body_string().await.map_err(|_| { + ShellError::labeled_error( + "Could not load text from remote url", + "could not load", + span, + ) + })?), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )) + } + (ty, sub_ty) => Ok(( + None, + Value::string(format!("Not yet supported MIME type: {} {}", ty, sub_ty)), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + } + } + None => Ok(( + None, + Value::string(format!("No content type found")), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::Url(location.to_string()), + )), + }, + Err(_) => { + return Err(ShellError::labeled_error( + "URL could not be opened", + "url not found", + span, + )); + } + } +} diff --git a/src/commands/open.rs b/src/commands/open.rs index 0faebdef77..ea960d8380 100644 --- a/src/commands/open.rs +++ b/src/commands/open.rs @@ -5,10 +5,7 @@ use crate::object::Value; use crate::parser::hir::SyntaxType; use crate::parser::registry::Signature; use crate::prelude::*; -use mime::Mime; use std::path::{Path, PathBuf}; -use std::str::FromStr; -use surf::mime; use uuid::Uuid; pub struct Open; @@ -138,290 +135,126 @@ pub async fn fetch( span: Span, ) -> Result<(Option, Value, Tag, SpanSource), ShellError> { let mut cwd = cwd.clone(); - if location.starts_with("http:") || location.starts_with("https:") { - let response = surf::get(location).await; - match response { - Ok(mut r) => match r.headers().get("content-type") { - Some(content_type) => { - let content_type = Mime::from_str(content_type).unwrap(); - match (content_type.type_(), content_type.subtype()) { - (mime::APPLICATION, mime::XML) => Ok(( - Some("xml".to_string()), - Value::string(r.body_string().await.map_err(|_| { - ShellError::labeled_error( - "Could not load text from remote url", - "could not load", - span, - ) - })?), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )), - (mime::APPLICATION, mime::JSON) => Ok(( - Some("json".to_string()), - Value::string(r.body_string().await.map_err(|_| { - ShellError::labeled_error( - "Could not load text from remote url", - "could not load", - span, - ) - })?), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )), - (mime::APPLICATION, mime::OCTET_STREAM) => { - let buf: Vec = r.body_bytes().await.map_err(|_| { - ShellError::labeled_error( - "Could not load binary file", - "could not load", - span, - ) - })?; - Ok(( - None, - Value::Binary(buf), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )) - } - (mime::IMAGE, mime::SVG) => Ok(( - Some("svg".to_string()), - Value::string(r.body_string().await.map_err(|_| { - ShellError::labeled_error( - "Could not load svg from remote url", - "could not load", - span, - ) - })?), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )), - (mime::IMAGE, image_ty) => { - let buf: Vec = r.body_bytes().await.map_err(|_| { - ShellError::labeled_error( - "Could not load image file", - "could not load", - span, - ) - })?; - Ok(( - Some(image_ty.to_string()), - Value::Binary(buf), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )) - } - (mime::TEXT, mime::HTML) => Ok(( - Some("html".to_string()), - Value::string(r.body_string().await.map_err(|_| { - ShellError::labeled_error( - "Could not load text from remote url", - "could not load", - span, - ) - })?), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )), - (mime::TEXT, mime::PLAIN) => { - let path_extension = url::Url::parse(location) - .unwrap() - .path_segments() - .and_then(|segments| segments.last()) - .and_then(|name| if name.is_empty() { None } else { Some(name) }) - .and_then(|name| { - PathBuf::from(name) - .extension() - .map(|name| name.to_string_lossy().to_string()) - }); - Ok(( - path_extension, - Value::string(r.body_string().await.map_err(|_| { - ShellError::labeled_error( - "Could not load text from remote url", - "could not load", - span, - ) - })?), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )) - } - (ty, sub_ty) => Ok(( - None, - Value::string(format!( - "Not yet supported MIME type: {} {}", - ty, sub_ty - )), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::Url(location.to_string()), - )), - } - } - None => Ok(( - None, - Value::string(format!("No content type found")), + cwd.push(Path::new(location)); + if let Ok(cwd) = dunce::canonicalize(cwd) { + match std::fs::read(&cwd) { + Ok(bytes) => match std::str::from_utf8(&bytes) { + Ok(s) => Ok(( + cwd.extension() + .map(|name| name.to_string_lossy().to_string()), + Value::string(s), Tag { span, origin: Some(Uuid::new_v4()), }, - SpanSource::Url(location.to_string()), + SpanSource::File(cwd.to_string_lossy().to_string()), )), + Err(_) => { + //Non utf8 data. + match (bytes.get(0), bytes.get(1)) { + (Some(x), Some(y)) if *x == 0xff && *y == 0xfe => { + // Possibly UTF-16 little endian + let utf16 = read_le_u16(&bytes[2..]); + + if let Some(utf16) = utf16 { + match std::string::String::from_utf16(&utf16) { + Ok(s) => Ok(( + cwd.extension() + .map(|name| name.to_string_lossy().to_string()), + Value::string(s), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + Err(_) => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } else { + Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )) + } + } + (Some(x), Some(y)) if *x == 0xfe && *y == 0xff => { + // Possibly UTF-16 big endian + let utf16 = read_be_u16(&bytes[2..]); + + if let Some(utf16) = utf16 { + match std::string::String::from_utf16(&utf16) { + Ok(s) => Ok(( + cwd.extension() + .map(|name| name.to_string_lossy().to_string()), + Value::string(s), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + Err(_) => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } else { + Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )) + } + } + _ => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } }, Err(_) => { return Err(ShellError::labeled_error( - "URL could not be opened", - "url not found", + "File could not be opened", + "file not found", span, )); } } } else { - cwd.push(Path::new(location)); - if let Ok(cwd) = dunce::canonicalize(cwd) { - match std::fs::read(&cwd) { - Ok(bytes) => match std::str::from_utf8(&bytes) { - Ok(s) => Ok(( - cwd.extension() - .map(|name| name.to_string_lossy().to_string()), - Value::string(s), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - Err(_) => { - //Non utf8 data. - match (bytes.get(0), bytes.get(1)) { - (Some(x), Some(y)) if *x == 0xff && *y == 0xfe => { - // Possibly UTF-16 little endian - let utf16 = read_le_u16(&bytes[2..]); - - if let Some(utf16) = utf16 { - match std::string::String::from_utf16(&utf16) { - Ok(s) => Ok(( - cwd.extension() - .map(|name| name.to_string_lossy().to_string()), - Value::string(s), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - Err(_) => Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - } - } else { - Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )) - } - } - (Some(x), Some(y)) if *x == 0xfe && *y == 0xff => { - // Possibly UTF-16 big endian - let utf16 = read_be_u16(&bytes[2..]); - - if let Some(utf16) = utf16 { - match std::string::String::from_utf16(&utf16) { - Ok(s) => Ok(( - cwd.extension() - .map(|name| name.to_string_lossy().to_string()), - Value::string(s), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - Err(_) => Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - } - } else { - Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )) - } - } - _ => Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), - } - } - }, - Err(_) => { - return Err(ShellError::labeled_error( - "File could not be opened", - "file not found", - span, - )); - } - } - } else { - return Err(ShellError::labeled_error( - "File could not be opened", - "file not found", - span, - )); - } + return Err(ShellError::labeled_error( + "File could not be opened", + "file not found", + span, + )); } }