mirror of
https://github.com/nushell/nushell
synced 2024-11-15 01:17:07 +00:00
Convert open/fetch to stream (#2028)
* Types lined up for open with stream * Chunking stream * Maybe I didn't need most of the Stream stuff after all? * Some clean-up * Merge weird cargo.lock * Start moving some encoding logic to MaybeTextCodec Will we lose the nice table formatting if we Stream? How do we get it back? Collect the Stream at the end? * Clean-up and small refinements * Put in auto-convert workaround * Workaround to make sure bat functionality works * Handle some easy error cases * All tests pass * Remove guessing logic * Address clippy comments * Pull latest master and fix MaybeTextCodec usage * Add tag to enable autoview
This commit is contained in:
parent
8775991c2d
commit
e31e8d1550
10 changed files with 609 additions and 407 deletions
|
@ -1,5 +1,5 @@
|
|||
use crate::commands::classified::block::run_block;
|
||||
use crate::commands::classified::external::{MaybeTextCodec, StringOrBinary};
|
||||
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||
use crate::commands::plugin::JsonRpc;
|
||||
use crate::commands::plugin::{PluginCommand, PluginSink};
|
||||
use crate::commands::whole_stream_command;
|
||||
|
@ -953,7 +953,7 @@ pub async fn process_line(
|
|||
|
||||
let input_stream = if redirect_stdin {
|
||||
let file = futures::io::AllowStdIo::new(std::io::stdin());
|
||||
let stream = FramedRead::new(file, MaybeTextCodec).map(|line| {
|
||||
let stream = FramedRead::new(file, MaybeTextCodec::default()).map(|line| {
|
||||
if let Ok(line) = line {
|
||||
match line {
|
||||
StringOrBinary::String(s) => Ok(Value {
|
||||
|
|
|
@ -20,6 +20,7 @@ pub(crate) mod clip;
|
|||
pub(crate) mod command;
|
||||
pub(crate) mod compact;
|
||||
pub(crate) mod config;
|
||||
pub(crate) mod constants;
|
||||
pub(crate) mod count;
|
||||
pub(crate) mod cp;
|
||||
pub(crate) mod date;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||
use crate::evaluate::evaluate_baseline_expr;
|
||||
use crate::futures::ThreadedReceiver;
|
||||
use crate::prelude::*;
|
||||
|
@ -7,9 +8,7 @@ use std::ops::Deref;
|
|||
use std::process::{Command, Stdio};
|
||||
use std::sync::mpsc;
|
||||
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use futures::executor::block_on_stream;
|
||||
// use futures::stream::StreamExt;
|
||||
use futures_codec::FramedRead;
|
||||
use log::trace;
|
||||
|
||||
|
@ -18,70 +17,6 @@ use nu_protocol::hir::ExternalCommand;
|
|||
use nu_protocol::{Primitive, Scope, ShellTypeName, UntaggedValue, Value};
|
||||
use nu_source::Tag;
|
||||
|
||||
pub enum StringOrBinary {
|
||||
String(String),
|
||||
Binary(Vec<u8>),
|
||||
}
|
||||
pub struct MaybeTextCodec;
|
||||
|
||||
impl futures_codec::Encoder for MaybeTextCodec {
|
||||
type Item = StringOrBinary;
|
||||
type Error = std::io::Error;
|
||||
|
||||
fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
|
||||
match item {
|
||||
StringOrBinary::String(s) => {
|
||||
dst.reserve(s.len());
|
||||
dst.put(s.as_bytes());
|
||||
Ok(())
|
||||
}
|
||||
StringOrBinary::Binary(b) => {
|
||||
dst.reserve(b.len());
|
||||
dst.put(Bytes::from(b));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl futures_codec::Decoder for MaybeTextCodec {
|
||||
type Item = StringOrBinary;
|
||||
type Error = std::io::Error;
|
||||
|
||||
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
|
||||
let v: Vec<u8> = src.to_vec();
|
||||
match String::from_utf8(v) {
|
||||
Ok(s) => {
|
||||
src.clear();
|
||||
if s.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(StringOrBinary::String(s)))
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
// Note: the longest UTF-8 character per Unicode spec is currently 6 bytes. If we fail somewhere earlier than the last 6 bytes,
|
||||
// we know that we're failing to understand the string encoding and not just seeing a partial character. When this happens, let's
|
||||
// fall back to assuming it's a binary buffer.
|
||||
if src.is_empty() {
|
||||
Ok(None)
|
||||
} else if src.len() > 6 && (src.len() - err.utf8_error().valid_up_to() > 6) {
|
||||
// Fall back to assuming binary
|
||||
let buf = src.to_vec();
|
||||
src.clear();
|
||||
Ok(Some(StringOrBinary::Binary(buf)))
|
||||
} else {
|
||||
// Looks like a utf-8 string, so let's assume that
|
||||
let buf = src.split_to(err.utf8_error().valid_up_to() + 1);
|
||||
String::from_utf8(buf.to_vec())
|
||||
.map(|x| Some(StringOrBinary::String(x)))
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn run_external_command(
|
||||
command: ExternalCommand,
|
||||
context: &mut Context,
|
||||
|
@ -319,7 +254,7 @@ fn spawn(
|
|||
};
|
||||
|
||||
let file = futures::io::AllowStdIo::new(stdout);
|
||||
let stream = FramedRead::new(file, MaybeTextCodec);
|
||||
let stream = FramedRead::new(file, MaybeTextCodec::default());
|
||||
|
||||
for line in block_on_stream(stream) {
|
||||
match line {
|
||||
|
@ -373,7 +308,7 @@ fn spawn(
|
|||
}
|
||||
|
||||
let file = futures::io::AllowStdIo::new(stderr);
|
||||
let err_stream = FramedRead::new(file, MaybeTextCodec);
|
||||
let err_stream = FramedRead::new(file, MaybeTextCodec::default());
|
||||
|
||||
for err_line in block_on_stream(err_stream) {
|
||||
match err_line {
|
||||
|
|
103
crates/nu-cli/src/commands/classified/maybe_text_codec.rs
Normal file
103
crates/nu-cli/src/commands/classified/maybe_text_codec.rs
Normal file
|
@ -0,0 +1,103 @@
|
|||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
|
||||
use nu_errors::ShellError;
|
||||
|
||||
extern crate encoding_rs;
|
||||
use encoding_rs::{CoderResult, Decoder, Encoding, UTF_8};
|
||||
|
||||
const OUTPUT_BUFFER_SIZE: usize = 8192;
|
||||
|
||||
pub enum StringOrBinary {
|
||||
String(String),
|
||||
Binary(Vec<u8>),
|
||||
}
|
||||
|
||||
pub struct MaybeTextCodec {
|
||||
decoder: Decoder,
|
||||
}
|
||||
|
||||
impl MaybeTextCodec {
|
||||
// The constructor takes an Option<&'static Encoding>, because an absence of an encoding indicates that we want BOM sniffing enabled
|
||||
pub fn new(encoding: Option<&'static Encoding>) -> Self {
|
||||
let decoder = match encoding {
|
||||
Some(e) => e.new_decoder_with_bom_removal(),
|
||||
None => UTF_8.new_decoder(),
|
||||
};
|
||||
MaybeTextCodec { decoder }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaybeTextCodec {
|
||||
// The default MaybeTextCodec uses a UTF_8 decoder
|
||||
fn default() -> Self {
|
||||
MaybeTextCodec {
|
||||
decoder: UTF_8.new_decoder(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl futures_codec::Encoder for MaybeTextCodec {
|
||||
type Item = StringOrBinary;
|
||||
type Error = std::io::Error;
|
||||
|
||||
fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
|
||||
match item {
|
||||
StringOrBinary::String(s) => {
|
||||
dst.reserve(s.len());
|
||||
dst.put(s.as_bytes());
|
||||
Ok(())
|
||||
}
|
||||
StringOrBinary::Binary(b) => {
|
||||
dst.reserve(b.len());
|
||||
dst.put(Bytes::from(b));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Write some tests
|
||||
impl futures_codec::Decoder for MaybeTextCodec {
|
||||
type Item = StringOrBinary;
|
||||
type Error = ShellError;
|
||||
|
||||
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
|
||||
if src.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut s = String::with_capacity(OUTPUT_BUFFER_SIZE);
|
||||
|
||||
let (res, read, replacements) = self.decoder.decode_to_string(src, &mut s, false);
|
||||
// If we had to make replacements when converting to utf8, fallback to binary
|
||||
if replacements {
|
||||
return Ok(Some(StringOrBinary::Binary(src.to_vec())));
|
||||
}
|
||||
|
||||
match res {
|
||||
CoderResult::InputEmpty => {
|
||||
src.clear();
|
||||
Ok(Some(StringOrBinary::String(s)))
|
||||
}
|
||||
CoderResult::OutputFull => {
|
||||
// If the original buffer size is too small,
|
||||
// We continue to allocate new Strings and append them to the result until the input buffer is smaller than the allocated String
|
||||
let mut starting_index = read;
|
||||
loop {
|
||||
let mut more = String::with_capacity(OUTPUT_BUFFER_SIZE);
|
||||
let (res, read, _replacements) =
|
||||
self.decoder
|
||||
.decode_to_string(&src[starting_index..], &mut more, false);
|
||||
s.push_str(&more);
|
||||
// Our input buffer is smaller than out allocated String, we can stop now
|
||||
if let CoderResult::InputEmpty = res {
|
||||
break;
|
||||
}
|
||||
starting_index += read;
|
||||
}
|
||||
src.clear();
|
||||
Ok(Some(StringOrBinary::String(s)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,6 +3,7 @@ mod dynamic;
|
|||
pub(crate) mod expr;
|
||||
pub(crate) mod external;
|
||||
pub(crate) mod internal;
|
||||
pub(crate) mod maybe_text_codec;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub(crate) use dynamic::Command as DynamicCommand;
|
||||
|
|
358
crates/nu-cli/src/commands/constants.rs
Normal file
358
crates/nu-cli/src/commands/constants.rs
Normal file
|
@ -0,0 +1,358 @@
|
|||
pub const BAT_LANGUAGES: &[&str] = &[
|
||||
"as",
|
||||
"csv",
|
||||
"tsv",
|
||||
"applescript",
|
||||
"script editor",
|
||||
"s",
|
||||
"S",
|
||||
"adoc",
|
||||
"asciidoc",
|
||||
"asc",
|
||||
"asa",
|
||||
"yasm",
|
||||
"nasm",
|
||||
"asm",
|
||||
"inc",
|
||||
"mac",
|
||||
"awk",
|
||||
"bat",
|
||||
"cmd",
|
||||
"bib",
|
||||
"sh",
|
||||
"bash",
|
||||
"zsh",
|
||||
".bash_aliases",
|
||||
".bash_completions",
|
||||
".bash_functions",
|
||||
".bash_login",
|
||||
".bash_logout",
|
||||
".bash_profile",
|
||||
".bash_variables",
|
||||
".bashrc",
|
||||
".profile",
|
||||
".textmate_init",
|
||||
".zshrc",
|
||||
"PKGBUILD",
|
||||
".ebuild",
|
||||
".eclass",
|
||||
"c",
|
||||
"h",
|
||||
"cs",
|
||||
"csx",
|
||||
"cpp",
|
||||
"cc",
|
||||
"cp",
|
||||
"cxx",
|
||||
"c++",
|
||||
"C",
|
||||
"h",
|
||||
"hh",
|
||||
"hpp",
|
||||
"hxx",
|
||||
"h++",
|
||||
"inl",
|
||||
"ipp",
|
||||
"cabal",
|
||||
"clj",
|
||||
"cljc",
|
||||
"cljs",
|
||||
"edn",
|
||||
"CMakeLists.txt",
|
||||
"cmake",
|
||||
"h.in",
|
||||
"hh.in",
|
||||
"hpp.in",
|
||||
"hxx.in",
|
||||
"h++.in",
|
||||
"CMakeCache.txt",
|
||||
"cr",
|
||||
"css",
|
||||
"css.erb",
|
||||
"css.liquid",
|
||||
"d",
|
||||
"di",
|
||||
"dart",
|
||||
"diff",
|
||||
"patch",
|
||||
"Dockerfile",
|
||||
"dockerfile",
|
||||
"ex",
|
||||
"exs",
|
||||
"elm",
|
||||
"erl",
|
||||
"hrl",
|
||||
"Emakefile",
|
||||
"emakefile",
|
||||
"fs",
|
||||
"fsi",
|
||||
"fsx",
|
||||
"fs",
|
||||
"fsi",
|
||||
"fsx",
|
||||
"fish",
|
||||
"attributes",
|
||||
"gitattributes",
|
||||
".gitattributes",
|
||||
"COMMIT_EDITMSG",
|
||||
"MERGE_MSG",
|
||||
"TAG_EDITMSG",
|
||||
"gitconfig",
|
||||
".gitconfig",
|
||||
".gitmodules",
|
||||
"exclude",
|
||||
"gitignore",
|
||||
".gitignore",
|
||||
".git",
|
||||
"gitlog",
|
||||
"git-rebase-todo",
|
||||
"go",
|
||||
"dot",
|
||||
"DOT",
|
||||
"gv",
|
||||
"groovy",
|
||||
"gvy",
|
||||
"gradle",
|
||||
"Jenkinsfile",
|
||||
"hs",
|
||||
"hs",
|
||||
"hsc",
|
||||
"show-nonprintable",
|
||||
"html",
|
||||
"htm",
|
||||
"shtml",
|
||||
"xhtml",
|
||||
"asp",
|
||||
"html.eex",
|
||||
"yaws",
|
||||
"rails",
|
||||
"rhtml",
|
||||
"erb",
|
||||
"html.erb",
|
||||
"adp",
|
||||
"twig",
|
||||
"html.twig",
|
||||
"ini",
|
||||
"INI",
|
||||
"INF",
|
||||
"reg",
|
||||
"REG",
|
||||
"lng",
|
||||
"cfg",
|
||||
"CFG",
|
||||
"desktop",
|
||||
"url",
|
||||
"URL",
|
||||
".editorconfig",
|
||||
".hgrc",
|
||||
"hgrc",
|
||||
"java",
|
||||
"bsh",
|
||||
"properties",
|
||||
"jsp",
|
||||
"js",
|
||||
"htc",
|
||||
"js",
|
||||
"jsx",
|
||||
"babel",
|
||||
"es6",
|
||||
"js.erb",
|
||||
"json",
|
||||
"sublime-settings",
|
||||
"sublime-menu",
|
||||
"sublime-keymap",
|
||||
"sublime-mousemap",
|
||||
"sublime-theme",
|
||||
"sublime-build",
|
||||
"sublime-project",
|
||||
"sublime-completions",
|
||||
"sublime-commands",
|
||||
"sublime-macro",
|
||||
"sublime-color-scheme",
|
||||
"ipynb",
|
||||
"Pipfile.lock",
|
||||
"jsonnet",
|
||||
"libsonnet",
|
||||
"libjsonnet",
|
||||
"jl",
|
||||
"kt",
|
||||
"kts",
|
||||
"tex",
|
||||
"ltx",
|
||||
"less",
|
||||
"css.less",
|
||||
"lisp",
|
||||
"cl",
|
||||
"clisp",
|
||||
"l",
|
||||
"mud",
|
||||
"el",
|
||||
"scm",
|
||||
"ss",
|
||||
"lsp",
|
||||
"fasl",
|
||||
"lhs",
|
||||
"lua",
|
||||
"make",
|
||||
"GNUmakefile",
|
||||
"makefile",
|
||||
"Makefile",
|
||||
"makefile.am",
|
||||
"Makefile.am",
|
||||
"makefile.in",
|
||||
"Makefile.in",
|
||||
"OCamlMakefile",
|
||||
"mak",
|
||||
"mk",
|
||||
"md",
|
||||
"mdown",
|
||||
"markdown",
|
||||
"markdn",
|
||||
"matlab",
|
||||
"build",
|
||||
"nix",
|
||||
"m",
|
||||
"h",
|
||||
"mm",
|
||||
"M",
|
||||
"h",
|
||||
"ml",
|
||||
"mli",
|
||||
"mll",
|
||||
"mly",
|
||||
"pas",
|
||||
"p",
|
||||
"dpr",
|
||||
"pl",
|
||||
"pm",
|
||||
"pod",
|
||||
"t",
|
||||
"PL",
|
||||
"php",
|
||||
"php3",
|
||||
"php4",
|
||||
"php5",
|
||||
"php7",
|
||||
"phps",
|
||||
"phpt",
|
||||
"phtml",
|
||||
"txt",
|
||||
"ps1",
|
||||
"psm1",
|
||||
"psd1",
|
||||
"proto",
|
||||
"protodevel",
|
||||
"pb.txt",
|
||||
"proto.text",
|
||||
"textpb",
|
||||
"pbtxt",
|
||||
"prototxt",
|
||||
"pp",
|
||||
"epp",
|
||||
"purs",
|
||||
"py",
|
||||
"py3",
|
||||
"pyw",
|
||||
"pyi",
|
||||
"pyx",
|
||||
"pyx.in",
|
||||
"pxd",
|
||||
"pxd.in",
|
||||
"pxi",
|
||||
"pxi.in",
|
||||
"rpy",
|
||||
"cpy",
|
||||
"SConstruct",
|
||||
"Sconstruct",
|
||||
"sconstruct",
|
||||
"SConscript",
|
||||
"gyp",
|
||||
"gypi",
|
||||
"Snakefile",
|
||||
"wscript",
|
||||
"R",
|
||||
"r",
|
||||
"s",
|
||||
"S",
|
||||
"Rprofile",
|
||||
"rd",
|
||||
"re",
|
||||
"rst",
|
||||
"rest",
|
||||
"robot",
|
||||
"rb",
|
||||
"Appfile",
|
||||
"Appraisals",
|
||||
"Berksfile",
|
||||
"Brewfile",
|
||||
"capfile",
|
||||
"cgi",
|
||||
"Cheffile",
|
||||
"config.ru",
|
||||
"Deliverfile",
|
||||
"Fastfile",
|
||||
"fcgi",
|
||||
"Gemfile",
|
||||
"gemspec",
|
||||
"Guardfile",
|
||||
"irbrc",
|
||||
"jbuilder",
|
||||
"Podfile",
|
||||
"podspec",
|
||||
"prawn",
|
||||
"rabl",
|
||||
"rake",
|
||||
"Rakefile",
|
||||
"Rantfile",
|
||||
"rbx",
|
||||
"rjs",
|
||||
"ruby.rail",
|
||||
"Scanfile",
|
||||
"simplecov",
|
||||
"Snapfile",
|
||||
"thor",
|
||||
"Thorfile",
|
||||
"Vagrantfile",
|
||||
"haml",
|
||||
"sass",
|
||||
"rxml",
|
||||
"builder",
|
||||
"rs",
|
||||
"scala",
|
||||
"sbt",
|
||||
"sql",
|
||||
"ddl",
|
||||
"dml",
|
||||
"erbsql",
|
||||
"sql.erb",
|
||||
"swift",
|
||||
"log",
|
||||
"tcl",
|
||||
"tf",
|
||||
"tfvars",
|
||||
"hcl",
|
||||
"sty",
|
||||
"cls",
|
||||
"textile",
|
||||
"toml",
|
||||
"tml",
|
||||
"Cargo.lock",
|
||||
"Gopkg.lock",
|
||||
"Pipfile",
|
||||
"ts",
|
||||
"tsx",
|
||||
"varlink",
|
||||
"vim",
|
||||
".vimrc",
|
||||
"xml",
|
||||
"xsd",
|
||||
"xslt",
|
||||
"tld",
|
||||
"dtml",
|
||||
"rss",
|
||||
"opml",
|
||||
"svg",
|
||||
"yaml",
|
||||
"yml",
|
||||
"sublime-syntax",
|
||||
];
|
|
@ -121,21 +121,16 @@ async fn enter(
|
|||
|
||||
let full_path = std::path::PathBuf::from(cwd);
|
||||
|
||||
let (file_extension, contents, contents_tag) = crate::commands::open::fetch(
|
||||
let (file_extension, tagged_contents) = crate::commands::open::fetch(
|
||||
&full_path,
|
||||
&PathBuf::from(location_clone),
|
||||
tag.span,
|
||||
match encoding {
|
||||
Some(e) => e.to_string(),
|
||||
_ => "".to_string(),
|
||||
},
|
||||
encoding,
|
||||
)
|
||||
.await?;
|
||||
|
||||
match contents {
|
||||
match tagged_contents.value {
|
||||
UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||
let tagged_contents = contents.into_value(&contents_tag);
|
||||
|
||||
if let Some(extension) = file_extension {
|
||||
let command_name = format!("from {}", extension);
|
||||
if let Some(converter) = registry.get_command(&command_name) {
|
||||
|
@ -156,18 +151,18 @@ async fn enter(
|
|||
scope: scope.clone(),
|
||||
},
|
||||
};
|
||||
let tag = tagged_contents.tag.clone();
|
||||
let mut result = converter
|
||||
.run(new_args.with_input(vec![tagged_contents]), ®istry)
|
||||
.await?;
|
||||
let result_vec: Vec<Result<ReturnSuccess, ShellError>> =
|
||||
result.drain_vec().await;
|
||||
|
||||
Ok(futures::stream::iter(result_vec.into_iter().map(
|
||||
move |res| match res {
|
||||
Ok(ReturnSuccess::Value(Value { value, .. })) => Ok(
|
||||
ReturnSuccess::Action(CommandAction::EnterValueShell(Value {
|
||||
value,
|
||||
tag: contents_tag.clone(),
|
||||
tag: tag.clone(),
|
||||
})),
|
||||
),
|
||||
x => x,
|
||||
|
@ -185,13 +180,9 @@ async fn enter(
|
|||
)))
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let tagged_contents = contents.into_value(contents_tag);
|
||||
|
||||
Ok(OutputStream::one(ReturnSuccess::action(
|
||||
CommandAction::EnterValueShell(tagged_contents),
|
||||
)))
|
||||
}
|
||||
_ => Ok(OutputStream::one(ReturnSuccess::action(
|
||||
CommandAction::EnterValueShell(tagged_contents),
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use futures_codec::FramedRead;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue};
|
||||
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
|
||||
use nu_source::{AnchorLocation, Span, Tagged};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::PathBuf;
|
||||
extern crate encoding_rs;
|
||||
use crate::commands::constants::BAT_LANGUAGES;
|
||||
use encoding_rs::*;
|
||||
use futures::prelude::*;
|
||||
use log::debug;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
pub struct Open;
|
||||
|
||||
|
@ -81,23 +83,25 @@ documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_encoding(opt: Option<String>) -> &'static Encoding {
|
||||
pub fn get_encoding(opt: Option<Tagged<String>>) -> Result<&'static Encoding, ShellError> {
|
||||
match opt {
|
||||
None => UTF_8,
|
||||
Some(label) => match Encoding::for_label((&label).as_bytes()) {
|
||||
None => {
|
||||
//print!("{} is not a known encoding label. Trying UTF-8.", label);
|
||||
//std::process::exit(-2);
|
||||
get_encoding(Some("utf-8".to_string()))
|
||||
}
|
||||
Some(encoding) => encoding,
|
||||
None => Ok(UTF_8),
|
||||
Some(label) => match Encoding::for_label((&label.item).as_bytes()) {
|
||||
None => Err(ShellError::labeled_error(
|
||||
format!(
|
||||
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
|
||||
label.item
|
||||
),
|
||||
"invalid encoding",
|
||||
label.span(),
|
||||
)),
|
||||
Some(encoding) => Ok(encoding),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
||||
let cwd = PathBuf::from(args.shell_manager.path());
|
||||
let full_path = cwd;
|
||||
let registry = registry.clone();
|
||||
|
||||
let (
|
||||
|
@ -108,329 +112,135 @@ async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStr
|
|||
},
|
||||
_,
|
||||
) = args.process(®istry).await?;
|
||||
let enc = match encoding {
|
||||
Some(e) => e.to_string(),
|
||||
_ => "".to_string(),
|
||||
};
|
||||
let result = fetch(&full_path, &path.item, path.tag.span, enc).await;
|
||||
|
||||
let (file_extension, contents, contents_tag) = result?;
|
||||
// TODO: Remove once Streams are supported everywhere!
|
||||
// As a short term workaround for getting AutoConvert and Bat functionality (Those don't currently support Streams)
|
||||
|
||||
let file_extension = if raw.item {
|
||||
// Check if the extension has a "from *" command OR "bat" supports syntax highlighting
|
||||
// AND the user doesn't want the raw output
|
||||
// In these cases, we will collect the Stream
|
||||
let ext = if raw.item {
|
||||
None
|
||||
} else {
|
||||
// If the extension could not be determined via mimetype, try to use the path
|
||||
// extension. Some file types do not declare their mimetypes (such as bson files).
|
||||
file_extension.or_else(|| path.extension().map(|x| x.to_string_lossy().to_string()))
|
||||
path.extension()
|
||||
.map(|name| name.to_string_lossy().to_string())
|
||||
};
|
||||
|
||||
let tagged_contents = contents.into_value(&contents_tag);
|
||||
|
||||
if let Some(extension) = file_extension {
|
||||
Ok(OutputStream::one(ReturnSuccess::action(
|
||||
CommandAction::AutoConvert(tagged_contents, extension),
|
||||
)))
|
||||
} else {
|
||||
Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)))
|
||||
if let Some(ext) = ext {
|
||||
// Check if we have a conversion command
|
||||
if let Some(_command) = registry.get_command(&format!("from {}", ext)) {
|
||||
let (_, tagged_contents) = crate::commands::open::fetch(
|
||||
&cwd,
|
||||
&PathBuf::from(&path.item),
|
||||
path.tag.span,
|
||||
encoding,
|
||||
)
|
||||
.await?;
|
||||
return Ok(OutputStream::one(ReturnSuccess::action(
|
||||
CommandAction::AutoConvert(tagged_contents, ext),
|
||||
)));
|
||||
}
|
||||
// Check if bat does syntax highlighting
|
||||
if BAT_LANGUAGES.contains(&ext.as_ref()) {
|
||||
let (_, tagged_contents) = crate::commands::open::fetch(
|
||||
&cwd,
|
||||
&PathBuf::from(&path.item),
|
||||
path.tag.span,
|
||||
encoding,
|
||||
)
|
||||
.await?;
|
||||
return Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)));
|
||||
}
|
||||
}
|
||||
|
||||
// Normal Streaming operation
|
||||
let with_encoding = if encoding.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(get_encoding(encoding)?)
|
||||
};
|
||||
let f = File::open(&path).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
format!("Error opening file: {:?}", e),
|
||||
"Error opening file",
|
||||
path.span(),
|
||||
)
|
||||
})?;
|
||||
let async_reader = futures::io::AllowStdIo::new(f);
|
||||
let sob_stream = FramedRead::new(async_reader, MaybeTextCodec::new(with_encoding))
|
||||
.map_err(|e| ShellError::unexpected(format!("AsyncRead failed in open function: {:?}", e)))
|
||||
.into_stream();
|
||||
|
||||
let final_stream = sob_stream.map(|x| match x {
|
||||
Ok(StringOrBinary::String(s)) => {
|
||||
ReturnSuccess::value(UntaggedValue::string(s).into_untagged_value())
|
||||
}
|
||||
Ok(StringOrBinary::Binary(b)) => ReturnSuccess::value(
|
||||
UntaggedValue::binary(b.into_iter().collect()).into_untagged_value(),
|
||||
),
|
||||
Err(se) => Err(se),
|
||||
});
|
||||
|
||||
Ok(OutputStream::new(final_stream))
|
||||
}
|
||||
|
||||
// Note that we do not output a Stream in "fetch" since it is only used by "enter" command
|
||||
// Which we expect to use a concrete Value a not a Stream
|
||||
pub async fn fetch(
|
||||
cwd: &PathBuf,
|
||||
location: &PathBuf,
|
||||
span: Span,
|
||||
encoding: String,
|
||||
) -> Result<(Option<String>, UntaggedValue, Tag), ShellError> {
|
||||
encoding_choice: Option<Tagged<String>>,
|
||||
) -> Result<(Option<String>, Value), ShellError> {
|
||||
// TODO: I don't understand the point of this? Maybe for better error reporting
|
||||
let mut cwd = cwd.clone();
|
||||
let output_encoding: &Encoding = get_encoding(Some("utf-8".to_string()));
|
||||
let input_encoding: &Encoding = get_encoding(Some(encoding.clone()));
|
||||
let mut decoder = input_encoding.new_decoder();
|
||||
let mut encoder = output_encoding.new_encoder();
|
||||
let mut _file: File;
|
||||
let buf = Vec::new();
|
||||
let mut bufwriter = BufWriter::new(buf);
|
||||
|
||||
cwd.push(Path::new(location));
|
||||
if let Ok(cwd) = dunce::canonicalize(&cwd) {
|
||||
if !encoding.is_empty() {
|
||||
// use the encoding string
|
||||
match File::open(&Path::new(&cwd)) {
|
||||
Ok(mut _file) => {
|
||||
convert_via_utf8(
|
||||
&mut decoder,
|
||||
&mut encoder,
|
||||
&mut _file,
|
||||
&mut bufwriter,
|
||||
false,
|
||||
);
|
||||
//bufwriter.flush()?;
|
||||
Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(String::from_utf8_lossy(&bufwriter.buffer())),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
||||
},
|
||||
))
|
||||
}
|
||||
Err(_) => Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
span,
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
// Do the old stuff
|
||||
match std::fs::read(&cwd) {
|
||||
Ok(bytes) => match std::str::from_utf8(&bytes) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
||||
},
|
||||
)),
|
||||
Err(_) => {
|
||||
//Non utf8 data.
|
||||
match (bytes.get(0), bytes.get(1)) {
|
||||
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
|
||||
// Possibly UTF-16 little endian
|
||||
let utf16 = read_le_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
Err(_) => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
|
||||
// Possibly UTF-16 big endian
|
||||
let utf16 = read_be_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
Err(_) => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(_) => Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
cwd.push(location);
|
||||
let nice_location = dunce::canonicalize(&cwd).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
format!("Cannot canonicalize file {:?} because {:?}", &cwd, e),
|
||||
"Cannot canonicalize",
|
||||
span,
|
||||
))
|
||||
}
|
||||
}
|
||||
)
|
||||
})?;
|
||||
|
||||
fn convert_via_utf8(
|
||||
decoder: &mut Decoder,
|
||||
encoder: &mut Encoder,
|
||||
read: &mut dyn Read,
|
||||
write: &mut dyn Write,
|
||||
last: bool,
|
||||
) {
|
||||
let mut input_buffer = [0u8; 2048];
|
||||
let mut intermediate_buffer_bytes = [0u8; 4096];
|
||||
// Is there a safe way to create a stack-allocated &mut str?
|
||||
let mut intermediate_buffer: &mut str =
|
||||
//unsafe { std::mem::transmute(&mut intermediate_buffer_bytes[..]) };
|
||||
std::str::from_utf8_mut(&mut intermediate_buffer_bytes[..]).expect("error with from_utf8_mut");
|
||||
let mut output_buffer = [0u8; 4096];
|
||||
let mut current_input_ended = false;
|
||||
while !current_input_ended {
|
||||
match read.read(&mut input_buffer) {
|
||||
Err(_) => {
|
||||
print!("Error reading input.");
|
||||
//std::process::exit(-5);
|
||||
}
|
||||
Ok(decoder_input_end) => {
|
||||
current_input_ended = decoder_input_end == 0;
|
||||
let input_ended = last && current_input_ended;
|
||||
let mut decoder_input_start = 0usize;
|
||||
loop {
|
||||
let (decoder_result, decoder_read, decoder_written, _) = decoder.decode_to_str(
|
||||
&input_buffer[decoder_input_start..decoder_input_end],
|
||||
&mut intermediate_buffer,
|
||||
input_ended,
|
||||
);
|
||||
decoder_input_start += decoder_read;
|
||||
// The extension may be used in AutoConvert later on
|
||||
let ext = location
|
||||
.extension()
|
||||
.map(|name| name.to_string_lossy().to_string());
|
||||
|
||||
let last_output = if input_ended {
|
||||
match decoder_result {
|
||||
CoderResult::InputEmpty => true,
|
||||
CoderResult::OutputFull => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
// The tag that will used when returning a Value
|
||||
let file_tag = Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
nice_location.to_string_lossy().to_string(),
|
||||
)),
|
||||
};
|
||||
|
||||
// Regardless of whether the intermediate buffer got full
|
||||
// or the input buffer was exhausted, let's process what's
|
||||
// in the intermediate buffer.
|
||||
let res = std::fs::read(location)?;
|
||||
|
||||
if encoder.encoding() == UTF_8 {
|
||||
// If the target is UTF-8, optimize out the encoder.
|
||||
if write
|
||||
.write_all(&intermediate_buffer.as_bytes()[..decoder_written])
|
||||
.is_err()
|
||||
{
|
||||
print!("Error writing output.");
|
||||
//std::process::exit(-7);
|
||||
}
|
||||
} else {
|
||||
let mut encoder_input_start = 0usize;
|
||||
loop {
|
||||
let (encoder_result, encoder_read, encoder_written, _) = encoder
|
||||
.encode_from_utf8(
|
||||
&intermediate_buffer[encoder_input_start..decoder_written],
|
||||
&mut output_buffer,
|
||||
last_output,
|
||||
);
|
||||
encoder_input_start += encoder_read;
|
||||
if write.write_all(&output_buffer[..encoder_written]).is_err() {
|
||||
print!("Error writing output.");
|
||||
//std::process::exit(-6);
|
||||
}
|
||||
match encoder_result {
|
||||
CoderResult::InputEmpty => {
|
||||
break;
|
||||
}
|
||||
CoderResult::OutputFull => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now let's see if we should read again or process the
|
||||
// rest of the current input buffer.
|
||||
match decoder_result {
|
||||
CoderResult::InputEmpty => {
|
||||
break;
|
||||
}
|
||||
CoderResult::OutputFull => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
|
||||
if input.len() % 2 != 0 || input.len() < 2 {
|
||||
None
|
||||
// If no encoding is provided we try to guess the encoding to read the file with
|
||||
let encoding = if encoding_choice.is_none() {
|
||||
UTF_8
|
||||
} else {
|
||||
let mut result = vec![];
|
||||
let mut pos = 0;
|
||||
while pos < input.len() {
|
||||
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
|
||||
pos += 2;
|
||||
}
|
||||
get_encoding(encoding_choice.clone())?
|
||||
};
|
||||
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
|
||||
if input.len() % 2 != 0 || input.len() < 2 {
|
||||
None
|
||||
// If the user specified an encoding, then do not do BOM sniffing
|
||||
let decoded_res = if encoding_choice.is_some() {
|
||||
let (cow_res, _replacements) = encoding.decode_with_bom_removal(&res);
|
||||
cow_res
|
||||
} else {
|
||||
let mut result = vec![];
|
||||
let mut pos = 0;
|
||||
while pos < input.len() {
|
||||
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
|
||||
pos += 2;
|
||||
// Otherwise, use the default UTF-8 encoder with BOM sniffing
|
||||
let (cow_res, actual_encoding, replacements) = encoding.decode(&res);
|
||||
// If we had to use replacement characters then fallback to binary
|
||||
if replacements {
|
||||
return Ok((ext, UntaggedValue::binary(res).into_value(file_tag)));
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
debug!("Decoded using {:?}", actual_encoding);
|
||||
cow_res
|
||||
};
|
||||
let v = UntaggedValue::string(decoded_res.to_string()).into_value(file_tag);
|
||||
Ok((ext, v))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -80,7 +80,6 @@ fn errors_if_file_not_found() {
|
|||
"enter i_dont_exist.csv"
|
||||
);
|
||||
|
||||
//assert!(actual.err.contains("File could not be opened"));
|
||||
assert!(actual.err.contains("file not found"));
|
||||
assert!(actual.err.contains("Cannot canonicalize"));
|
||||
})
|
||||
}
|
||||
|
|
|
@ -224,7 +224,11 @@ fn errors_if_file_not_found() {
|
|||
cwd: "tests/fixtures/formats",
|
||||
"open i_dont_exist.txt"
|
||||
);
|
||||
|
||||
//assert!(actual.err.contains("File could not be opened"));
|
||||
assert!(actual.err.contains("Cannot open"));
|
||||
let expected = "Cannot canonicalize";
|
||||
assert!(
|
||||
actual.err.contains(expected),
|
||||
"Error:\n{}\ndoes not contain{}",
|
||||
actual.err,
|
||||
expected
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue